From 2be94ca479e1a46a7ee053f0f6e6d733093a463e Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Wed, 15 Apr 2026 13:14:52 +0200
Subject: [PATCH 01/84] feat: Send GenAI spans as V2 envelope items

---
 sentry_sdk/client.py | 105 ++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 103 insertions(+), 2 deletions(-)

diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py
index 9f795d2489..ed58104ec7 100644
--- a/sentry_sdk/client.py
+++ b/sentry_sdk/client.py
@@ -27,6 +27,7 @@
     get_before_send_metric,
     has_logs_enabled,
     has_metrics_enabled,
+    serialize_attribute,
 )
 from sentry_sdk.serializer import serialize
 from sentry_sdk.tracing import trace
@@ -56,6 +57,74 @@
 )
 from sentry_sdk.scrubber import EventScrubber
 from sentry_sdk.monitor import Monitor
+from sentry_sdk.envelope import Item, PayloadRef
+
+
+_ISO_TIMESTAMP_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ"
+
+
+def _iso_to_epoch(iso_str: str) -> float:
+    return (
+        datetime.strptime(iso_str, _ISO_TIMESTAMP_FORMAT)
+        .replace(tzinfo=timezone.utc)
+        .timestamp()
+    )
+
+
+def _v1_span_to_v2(span: "Dict[str, Any]", event: "Dict[str, Any]") -> "Dict[str, Any]":
+    rv: "Dict[str, Any]" = {
+        "trace_id": span["trace_id"],
+        "span_id": span["span_id"],
+        "name": span.get("description") or "",
+        "is_segment": False,
+        "start_timestamp": _iso_to_epoch(span["start_timestamp"]),
+        "status": "ok",
+    }
+
+    if span.get("timestamp"):
+        rv["end_timestamp"] = _iso_to_epoch(span["timestamp"])
+
+    if span.get("parent_span_id"):
+        rv["parent_span_id"] = span["parent_span_id"]
+
+    status = span.get("status")
+    if status and status != "ok":
+        rv["status"] = "error"
+
+    attributes: "Dict[str, Any]" = {}
+
+    if span.get("op"):
+        attributes["sentry.op"] = span["op"]
+    if span.get("origin"):
+        attributes["sentry.origin"] = span["origin"]
+
+    for key, value in (span.get("data") or {}).items():
+        attributes[key] = value
+    for key, value in (span.get("tags") or {}).items():
+        attributes[key] = value
+
+    trace_context = event.get("contexts", {}).get("trace", {})
+    sdk_info = event.get("sdk", {})
+
+    if event.get("release"):
+        attributes["sentry.release"] = event["release"]
+    if event.get("environment"):
+        attributes["sentry.environment"] = event["environment"]
+    if event.get("transaction"):
+        attributes["sentry.segment.name"] = event["transaction"]
+
+    if trace_context.get("span_id"):
+        attributes["sentry.segment.id"] = trace_context["span_id"]
+    if sdk_info.get("name"):
+        attributes["sentry.sdk.name"] = sdk_info["name"]
+    if sdk_info.get("version"):
+        attributes["sentry.sdk.version"] = sdk_info["version"]
+
+    if attributes:
+        rv["attributes"] = {k: serialize_attribute(v) for k, v in attributes.items()}
+
+    return rv
+
 
 if TYPE_CHECKING:
     from typing import Any
@@ -72,7 +141,7 @@
     from sentry_sdk.session import Session
     from sentry_sdk.spotlight import SpotlightClient
     from sentry_sdk.traces import StreamedSpan
-    from sentry_sdk.transport import Transport, Item
+    from sentry_sdk.transport import Transport, Item, PayloadRef
     from sentry_sdk._log_batcher import LogBatcher
     from sentry_sdk._metrics_batcher import MetricsBatcher
     from sentry_sdk.utils import Dsn
@@ -912,7 +981,39 @@ def capture_event(
         if is_transaction:
             if isinstance(profile, Profile):
                 envelope.add_profile(profile.to_json(event_opt, self.options))
-            envelope.add_transaction(event_opt)
+
+            nonstreamed_spans = []
+            streamed_spans = []
+            for span in event_opt.get("spans") or []:
+                span_op = span.get("op")
+                if span_op is not None and span_op.startswith("gen_ai."):
+                    streamed_spans.append(span)
+                else:
+                    nonstreamed_spans.append(span)
+
+            if nonstreamed_spans:
+                event_opt["spans"] = nonstreamed_spans
+                envelope.add_transaction(event_opt)
+
+            if streamed_spans:
+                envelope.add_item(
+                    Item(
+                        type=SpanBatcher.TYPE,
+                        content_type=SpanBatcher.CONTENT_TYPE,
+                        headers={
+                            "item_count": len(streamed_spans),
+                        },
+                        payload=PayloadRef(
+                            json={
+                                "items": [
+                                    _v1_span_to_v2(span, event)
+                                    for span in streamed_spans
+                                ]
+                            },
+                        ),
+                    )
+                )
+
         elif is_checkin:
             envelope.add_checkin(event_opt)
         else:

From 01f479a09e4791082da604ba0f57cc4b74f1bf2f Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Wed, 15 Apr 2026 15:42:59 +0200
Subject: [PATCH 02/84] .

---
 sentry_sdk/client.py | 213 ++++++++++++++++++++++++++-----------------
 1 file changed, 130 insertions(+), 83 deletions(-)

diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py
index ed58104ec7..8667c2b194 100644
--- a/sentry_sdk/client.py
+++ b/sentry_sdk/client.py
@@ -2,7 +2,7 @@
 import uuid
 import random
 import socket
-from collections.abc import Mapping
+from collections.abc import Mapping, Iterable
 from datetime import datetime, timezone
 from importlib import import_module
 from typing import TYPE_CHECKING, List, Dict, cast, overload
@@ -58,104 +58,156 @@
 from sentry_sdk.scrubber import EventScrubber
 from sentry_sdk.monitor import Monitor
 from sentry_sdk.envelope import Item, PayloadRef
+from sentry_sdk.utils import datetime_from_isoformat
 
+if TYPE_CHECKING:
+    from typing import Any
+    from typing import Callable
+    from typing import Optional
+    from typing import Sequence
+    from typing import Type
+    from typing import Union
+    from typing import TypeVar
+
+    from sentry_sdk._types import Event, Hint, SDKInfo, Log, Metric, EventDataCategory
+    from sentry_sdk.integrations import Integration
+    from sentry_sdk.scope import Scope
+    from sentry_sdk.session import Session
+    from sentry_sdk.spotlight import SpotlightClient
+    from sentry_sdk.traces import StreamedSpan
+    from sentry_sdk.transport import Transport, Item, PayloadRef
+    from sentry_sdk._log_batcher import LogBatcher
+    from sentry_sdk._metrics_batcher import MetricsBatcher
+    from sentry_sdk.utils import Dsn
 
-_ISO_TIMESTAMP_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ"
+    I = TypeVar("I", bound=Integration)  # noqa: E741
 
+_client_init_debug = ContextVar("client_init_debug")
 
-def _iso_to_epoch(iso_str: str) -> float:
-    return (
-        datetime.strptime(iso_str, _ISO_TIMESTAMP_FORMAT)
-        .replace(tzinfo=timezone.utc)
-        .timestamp()
-    )
+SDK_INFO: "SDKInfo" = {
+    "name": "sentry.python",  # SDK name will be overridden after integrations have been loaded with sentry_sdk.integrations.setup_integrations()
+    "version": VERSION,
+    "packages": [{"name": "pypi:sentry-sdk", "version": VERSION}],
+}
 
 
-def _v1_span_to_v2(span: "Dict[str, Any]", event: "Dict[str, Any]") -> "Dict[str, Any]":
-    rv: "Dict[str, Any]" = {
-        "trace_id": span["trace_id"],
-        "span_id": span["span_id"],
-        "name": span.get("description") or "",
-        "is_segment": False,
-        "start_timestamp": _iso_to_epoch(span["start_timestamp"]),
+def _serialized_v1_span_to_serialized_v2_span(
+    span: "Dict[str, Any]", event: "Event"
+) -> "dict[str, Any]":
+    # See SpanBatcher._to_transport_format() for analogous population of all entries except "attributes".
+    res: "Dict[str, Any]" = {
         "status": "ok",
+        "is_segment": False,
     }
 
-    if span.get("timestamp"):
-        rv["end_timestamp"] = _iso_to_epoch(span["timestamp"])
+    if "trace_id" in span:
+        res["trace_id"] = span["trace_id"]
+
+    if "span_id" in span:
+        res["span_id"] = span["span_id"]
+
+    if "description" in span:
+        res["name"] = span["description"]
 
-    if span.get("parent_span_id"):
-        rv["parent_span_id"] = span["parent_span_id"]
+    if "start_timestamp" in span:
+        start_timestamp = None
+        try:
+            start_timestamp = datetime_from_isoformat(span["start_timestamp"])
+        except Exception:
+            pass
+
+        if start_timestamp is not None:
+            res["start_timestamp"] = start_timestamp.timestamp()
+
+    if "timestamp" in span:
+        end_timestamp = None
+        try:
+            end_timestamp = datetime_from_isoformat(span["timestamp"])
+        except Exception:
+            pass
 
-    status = span.get("status")
-    if status and status != "ok":
-        rv["status"] = "error"
+        if end_timestamp is not None:
+            res["end_timestamp"] = end_timestamp.timestamp()
+
+    if "parent_span_id" in span:
+        res["parent_span_id"] = span["parent_span_id"]
+
+    if "status" in span and span["status"] != "ok":
+        res["status"] = "error"
 
     attributes: "Dict[str, Any]" = {}
 
-    if span.get("op"):
+    if "op" in span:
         attributes["sentry.op"] = span["op"]
-    if span.get("origin"):
+    if "origin" in span:
         attributes["sentry.origin"] = span["origin"]
 
-    for key, value in (span.get("data") or {}).items():
-        attributes[key] = value
-    for key, value in (span.get("tags") or {}).items():
-        attributes[key] = value
-
-    trace_context = event.get("contexts", {}).get("trace", {})
-    sdk_info = event.get("sdk", {})
-
-    if event.get("release"):
+    span_data = span.get("data")
+    if isinstance(span_data, dict):
+        attributes.update(span_data)
+
+    span_tags = span.get("tags")
+    if isinstance(span_tags, dict):
+        attributes.update(span_tags)
+
+    # See Scope._apply_user_attributes_to_telemetry() for user attributes.
+    user = event.get("user")
+    if isinstance(user, dict):
+        if "id" in user:
+            attributes["user.id"] = user["id"]
+        if "username" in user:
+            attributes["user.name"] = user["username"]
+        if "email" in user:
+            attributes["user.email"] = user["email"]
+
+    # See Scope.set_global_attributes() for release, environment, and SDK metadata.
+    if "release" in event:
         attributes["sentry.release"] = event["release"]
-    if event.get("environment"):
+    if "environment" in event:
         attributes["sentry.environment"] = event["environment"]
-    if event.get("transaction"):
+    if "transaction" in event:
         attributes["sentry.segment.name"] = event["transaction"]
 
-    if trace_context.get("span_id"):
+    trace_context = event.get("contexts", {}).get("trace", {})
+    if "span_id" in trace_context:
         attributes["sentry.segment.id"] = trace_context["span_id"]
-    if sdk_info.get("name"):
-        attributes["sentry.sdk.name"] = sdk_info["name"]
-    if sdk_info.get("version"):
-        attributes["sentry.sdk.version"] = sdk_info["version"]
+
+    sdk_info = event.get("sdk")
+    if isinstance(sdk_info, dict):
+        if "name" in sdk_info:
+            attributes["sentry.sdk.name"] = sdk_info["name"]
+        if "version" in sdk_info:
+            attributes["sentry.sdk.version"] = sdk_info["version"]
 
     if attributes:
-        rv["attributes"] = {k: serialize_attribute(v) for k, v in attributes.items()}
+        res["attributes"] = {k: serialize_attribute(v) for k, v in attributes.items()}
 
-    return rv
+    return res
 
 
-if TYPE_CHECKING:
-    from typing import Any
-    from typing import Callable
-    from typing import Optional
-    from typing import Sequence
-    from typing import Type
-    from typing import Union
-    from typing import TypeVar
+def _split_gen_ai_spans(
+    event_opt: "Event",
+) -> "tuple[List[Dict[str, object]], List[Dict[str, object]]]":
+    if "spans" not in event_opt:
+        return [], []
 
-    from sentry_sdk._types import Event, Hint, SDKInfo, Log, Metric, EventDataCategory
-    from sentry_sdk.integrations import Integration
-    from sentry_sdk.scope import Scope
-    from sentry_sdk.session import Session
-    from sentry_sdk.spotlight import SpotlightClient
-    from sentry_sdk.traces import StreamedSpan
-    from sentry_sdk.transport import Transport, Item, PayloadRef
-    from sentry_sdk._log_batcher import LogBatcher
-    from sentry_sdk._metrics_batcher import MetricsBatcher
-    from sentry_sdk.utils import Dsn
+    spans = event_opt["spans"]
+    if isinstance(spans, AnnotatedValue):
+        spans = spans.value
 
-    I = TypeVar("I", bound=Integration)  # noqa: E741
-
-_client_init_debug = ContextVar("client_init_debug")
+    if not isinstance(spans, Iterable):
+        return [], []
 
+    non_gen_ai_spans = []
+    gen_ai_spans = []
+    for span in spans:
+        span_op = span.get("op")
+        if isinstance(span_op, str) and span_op.startswith("gen_ai."):
+            gen_ai_spans.append(span)
+        else:
+            non_gen_ai_spans.append(span)
 
-SDK_INFO: "SDKInfo" = {
-    "name": "sentry.python",  # SDK name will be overridden after integrations have been loaded with sentry_sdk.integrations.setup_integrations()
-    "version": VERSION,
-    "packages": [{"name": "pypi:sentry-sdk", "version": VERSION}],
-}
+    return non_gen_ai_spans, gen_ai_spans
 
 
 def _get_options(*args: "Optional[str]", **kwargs: "Any") -> "Dict[str, Any]":
@@ -982,32 +1034,27 @@ def capture_event(
             if isinstance(profile, Profile):
                 envelope.add_profile(profile.to_json(event_opt, self.options))
 
-            nonstreamed_spans = []
-            streamed_spans = []
-            for span in event_opt.get("spans") or []:
-                span_op = span.get("op")
-                if span_op is not None and span_op.startswith("gen_ai."):
-                    streamed_spans.append(span)
-                else:
-                    nonstreamed_spans.append(span)
+            non_gen_ai_spans, gen_ai_spans = _split_gen_ai_spans(event_opt)
 
-            if nonstreamed_spans:
-                event_opt["spans"] = nonstreamed_spans
-                envelope.add_transaction(event_opt)
+            event_opt["spans"] = non_gen_ai_spans
+            envelope.add_transaction(event_opt)
 
-            if streamed_spans:
+            if gen_ai_spans:
                 envelope.add_item(
                     Item(
                         type=SpanBatcher.TYPE,
                         content_type=SpanBatcher.CONTENT_TYPE,
                         headers={
-                            "item_count": len(streamed_spans),
+                            "item_count": len(gen_ai_spans),
                         },
                         payload=PayloadRef(
                             json={
                                 "items": [
-                                    _v1_span_to_v2(span, event)
-                                    for span in streamed_spans
+                                    _serialized_v1_span_to_serialized_v2_span(
+                                        span, event
+                                    )
+                                    for span in gen_ai_spans
+                                    if isinstance(span, dict)
                                 ]
                             },
                         ),

From 80e6a106b8472f6a6984ab254ca56646f0d51e59 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Wed, 15 Apr 2026 15:43:59 +0200
Subject: [PATCH 03/84] .

---
 sentry_sdk/client.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py
index 8667c2b194..41ab81c58e 100644
--- a/sentry_sdk/client.py
+++ b/sentry_sdk/client.py
@@ -75,7 +75,7 @@
     from sentry_sdk.session import Session
     from sentry_sdk.spotlight import SpotlightClient
     from sentry_sdk.traces import StreamedSpan
-    from sentry_sdk.transport import Transport, Item, PayloadRef
+    from sentry_sdk.transport import Transport, Item
     from sentry_sdk._log_batcher import LogBatcher
     from sentry_sdk._metrics_batcher import MetricsBatcher
     from sentry_sdk.utils import Dsn

From 0622cf410d9c6496d81d50ce163f52fa1d97eaee Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Wed, 15 Apr 2026 15:44:35 +0200
Subject: [PATCH 04/84] .

---
 sentry_sdk/client.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py
index 41ab81c58e..2895f23436 100644
--- a/sentry_sdk/client.py
+++ b/sentry_sdk/client.py
@@ -84,6 +84,7 @@
 
 _client_init_debug = ContextVar("client_init_debug")
 
+
 SDK_INFO: "SDKInfo" = {
     "name": "sentry.python",  # SDK name will be overridden after integrations have been loaded with sentry_sdk.integrations.setup_integrations()
     "version": VERSION,

From 7c75da105649abe57a6e32946507d97c85c86123 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Wed, 15 Apr 2026 16:01:06 +0200
Subject: [PATCH 05/84] .

---
 sentry_sdk/client.py | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py
index 2895f23436..7bb2acf7dc 100644
--- a/sentry_sdk/client.py
+++ b/sentry_sdk/client.py
@@ -188,16 +188,16 @@ def _serialized_v1_span_to_serialized_v2_span(
 
 def _split_gen_ai_spans(
     event_opt: "Event",
-) -> "tuple[List[Dict[str, object]], List[Dict[str, object]]]":
+) -> "Optional[tuple[List[Dict[str, object]], List[Dict[str, object]]]]":
     if "spans" not in event_opt:
-        return [], []
+        return None
 
     spans = event_opt["spans"]
     if isinstance(spans, AnnotatedValue):
         spans = spans.value
 
     if not isinstance(spans, Iterable):
-        return [], []
+        return None
 
     non_gen_ai_spans = []
     gen_ai_spans = []
@@ -1035,12 +1035,15 @@ def capture_event(
             if isinstance(profile, Profile):
                 envelope.add_profile(profile.to_json(event_opt, self.options))
 
-            non_gen_ai_spans, gen_ai_spans = _split_gen_ai_spans(event_opt)
+            split_spans = _split_gen_ai_spans(event_opt)
+            if split_spans is None or not split_spans[1]:
+                envelope.add_transaction(event_opt)
+            else:
+                non_gen_ai_spans, gen_ai_spans = split_spans
 
-            event_opt["spans"] = non_gen_ai_spans
-            envelope.add_transaction(event_opt)
+                event_opt["spans"] = non_gen_ai_spans
+                envelope.add_transaction(event_opt)
 
-            if gen_ai_spans:
                 envelope.add_item(
                     Item(
                         type=SpanBatcher.TYPE,

From 54a9b073a5887cdc51bd2d23253014e1bcb55c0f Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Wed, 15 Apr 2026 16:08:42 +0200
Subject: [PATCH 06/84] update

---
 sentry_sdk/client.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py
index 7bb2acf7dc..9ee225150d 100644
--- a/sentry_sdk/client.py
+++ b/sentry_sdk/client.py
@@ -93,10 +93,10 @@
 
 
 def _serialized_v1_span_to_serialized_v2_span(
-    span: "Dict[str, Any]", event: "Event"
+    span: "dict[str, Any]", event: "Event"
 ) -> "dict[str, Any]":
     # See SpanBatcher._to_transport_format() for analogous population of all entries except "attributes".
-    res: "Dict[str, Any]" = {
+    res: "dict[str, Any]" = {
         "status": "ok",
         "is_segment": False,
     }

From d1aa07cb2c201ab69a130e9b1b3705f2330d629b Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Wed, 15 Apr 2026 16:48:38 +0200
Subject: [PATCH 07/84] .

---
 sentry_sdk/client.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py
index 9ee225150d..e02841d5a3 100644
--- a/sentry_sdk/client.py
+++ b/sentry_sdk/client.py
@@ -192,7 +192,7 @@ def _split_gen_ai_spans(
     if "spans" not in event_opt:
         return None
 
-    spans = event_opt["spans"]
+    spans: "Any" = event_opt["spans"]
     if isinstance(spans, AnnotatedValue):
         spans = spans.value
 

From 117a6c9bf47342883a8cd4546582be97d39ad996 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Wed, 15 Apr 2026 18:17:04 +0200
Subject: [PATCH 08/84] .

---
 sentry_sdk/client.py | 62 ++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 57 insertions(+), 5 deletions(-)

diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py
index e02841d5a3..7c1eb64cff 100644
--- a/sentry_sdk/client.py
+++ b/sentry_sdk/client.py
@@ -7,6 +7,7 @@
 from importlib import import_module
 from typing import TYPE_CHECKING, List, Dict, cast, overload
 import warnings
+import json
 
 from sentry_sdk._compat import check_uwsgi_thread_support
 from sentry_sdk._metrics_batcher import MetricsBatcher
@@ -27,10 +28,10 @@
     get_before_send_metric,
     has_logs_enabled,
     has_metrics_enabled,
-    serialize_attribute,
 )
 from sentry_sdk.serializer import serialize
 from sentry_sdk.tracing import trace
+from sentry_sdk.traces import SpanStatus
 from sentry_sdk.tracing_utils import has_span_streaming_enabled
 from sentry_sdk.transport import (
     HttpTransportCore,
@@ -39,6 +40,7 @@
 )
 from sentry_sdk.consts import (
     SPANDATA,
+    SPANSTATUS,
     DEFAULT_MAX_VALUE_LENGTH,
     DEFAULT_OPTIONS,
     INSTRUMENTER,
@@ -97,7 +99,7 @@ def _serialized_v1_span_to_serialized_v2_span(
 ) -> "dict[str, Any]":
     # See SpanBatcher._to_transport_format() for analogous population of all entries except "attributes".
     res: "dict[str, Any]" = {
-        "status": "ok",
+        "status": SpanStatus.OK.value,
         "is_segment": False,
     }
 
@@ -133,7 +135,7 @@ def _serialized_v1_span_to_serialized_v2_span(
     if "parent_span_id" in span:
         res["parent_span_id"] = span["parent_span_id"]
 
-    if "status" in span and span["status"] != "ok":
+    if "status" in span and span["status"] != SPANSTATUS.OK:
         res["status"] = "error"
 
     attributes: "Dict[str, Any]" = {}
@@ -180,8 +182,58 @@ def _serialized_v1_span_to_serialized_v2_span(
         if "version" in sdk_info:
             attributes["sentry.sdk.version"] = sdk_info["version"]
 
-    if attributes:
-        res["attributes"] = {k: serialize_attribute(v) for k, v in attributes.items()}
+    for key, value in attributes.items():
+        serialized_value = serialize(value)
+        if isinstance(serialized_value, bool):
+            res.setdefault("attributes", {})[key] = {
+                "value": serialized_value,
+                "type": "boolean",
+            }
+            continue
+
+        if isinstance(serialized_value, int):
+            res.setdefault("attributes", {})[key] = {
+                "value": serialized_value,
+                "type": "integer",
+            }
+            continue
+
+        if isinstance(serialized_value, float):
+            res.setdefault("attributes", {})[key] = {
+                "value": serialized_value,
+                "type": "double",
+            }
+            continue
+
+        if isinstance(serialized_value, str):
+            res.setdefault("attributes", {})[key] = {
+                "value": serialized_value,
+                "type": "string",
+            }
+            continue
+
+        if isinstance(serialized_value, list):
+            if not serialized_value:
+                res.setdefault("attributes", {})[key] = {"value": [], "type": "array"}
+
+            ty = type(serialized_value[0])
+            if ty in (int, str, bool, float) and all(
+                type(v) is ty for v in serialized_value
+            ):
+                res.setdefault("attributes", {})[key] = {
+                    "value": serialized_value,
+                    "type": "array",
+                }
+
+            continue
+
+        # Types returned when the serializer for V1 span attributes recurses into some container types.
+        if isinstance(serialized_value, (dict, list)):
+            res.setdefault("attributes", {})[key] = {
+                "value": json.dumps(serialized_value),
+                "type": "string",
+            }
+            continue
 
     return res
 

From 83c36b54c0c46847531db66f2ddc3d6d592d8a95 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Wed, 15 Apr 2026 18:25:21 +0200
Subject: [PATCH 09/84] .

---
 sentry_sdk/client.py | 118 ++++++++++++++++++++++++-------------------
 1 file changed, 66 insertions(+), 52 deletions(-)

diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py
index 7c1eb64cff..c6df2f564b 100644
--- a/sentry_sdk/client.py
+++ b/sentry_sdk/client.py
@@ -71,7 +71,15 @@
     from typing import Union
     from typing import TypeVar
 
-    from sentry_sdk._types import Event, Hint, SDKInfo, Log, Metric, EventDataCategory
+    from sentry_sdk._types import (
+        Event,
+        Hint,
+        SDKInfo,
+        Log,
+        Metric,
+        EventDataCategory,
+        SerializedAttributeValue,
+    )
     from sentry_sdk.integrations import Integration
     from sentry_sdk.scope import Scope
     from sentry_sdk.session import Session
@@ -94,6 +102,56 @@
 }
 
 
+def _serialized_v1_attribute_to_serialized_v2_attribute(
+    attribute_value: "Any",
+) -> "Optional[SerializedAttributeValue]":
+    if isinstance(attribute_value, bool):
+        return {
+            "value": attribute_value,
+            "type": "boolean",
+        }
+
+    if isinstance(attribute_value, int):
+        return {
+            "value": attribute_value,
+            "type": "integer",
+        }
+
+    if isinstance(attribute_value, float):
+        return {
+            "value": attribute_value,
+            "type": "double",
+        }
+
+    if isinstance(attribute_value, str):
+        return {
+            "value": attribute_value,
+            "type": "string",
+        }
+
+    if isinstance(attribute_value, list):
+        if not attribute_value:
+            return {"value": [], "type": "array"}
+
+        ty = type(attribute_value[0])
+        if ty in (int, str, bool, float) and all(
+            type(v) is ty for v in attribute_value
+        ):
+            return {
+                "value": attribute_value,
+                "type": "array",
+            }
+
+    # Types returned when the serializer for V1 span attributes recurses into some container types.
+    if isinstance(attribute_value, (dict, list)):
+        return {
+            "value": json.dumps(attribute_value),
+            "type": "string",
+        }
+
+    return None
+
+
 def _serialized_v1_span_to_serialized_v2_span(
     span: "dict[str, Any]", event: "Event"
 ) -> "dict[str, Any]":
@@ -182,58 +240,14 @@ def _serialized_v1_span_to_serialized_v2_span(
         if "version" in sdk_info:
             attributes["sentry.sdk.version"] = sdk_info["version"]
 
-    for key, value in attributes.items():
-        serialized_value = serialize(value)
-        if isinstance(serialized_value, bool):
-            res.setdefault("attributes", {})[key] = {
-                "value": serialized_value,
-                "type": "boolean",
-            }
-            continue
-
-        if isinstance(serialized_value, int):
-            res.setdefault("attributes", {})[key] = {
-                "value": serialized_value,
-                "type": "integer",
-            }
-            continue
-
-        if isinstance(serialized_value, float):
-            res.setdefault("attributes", {})[key] = {
-                "value": serialized_value,
-                "type": "double",
-            }
-            continue
-
-        if isinstance(serialized_value, str):
-            res.setdefault("attributes", {})[key] = {
-                "value": serialized_value,
-                "type": "string",
-            }
-            continue
-
-        if isinstance(serialized_value, list):
-            if not serialized_value:
-                res.setdefault("attributes", {})[key] = {"value": [], "type": "array"}
-
-            ty = type(serialized_value[0])
-            if ty in (int, str, bool, float) and all(
-                type(v) is ty for v in serialized_value
-            ):
-                res.setdefault("attributes", {})[key] = {
-                    "value": serialized_value,
-                    "type": "array",
-                }
-
-            continue
+    if not attributes:
+        return res
 
-        # Types returned when the serializer for V1 span attributes recurses into some container types.
-        if isinstance(serialized_value, (dict, list)):
-            res.setdefault("attributes", {})[key] = {
-                "value": json.dumps(serialized_value),
-                "type": "string",
-            }
-            continue
+    res["attributes"] = {}
+    for key, value in attributes.items():
+        res["attributes"][key] = _serialized_v1_attribute_to_serialized_v2_attribute(
+            value
+        )
 
     return res
 

From f71e0ce84e3eacdbd46e0509f4f608c919778542 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Thu, 16 Apr 2026 10:50:59 +0200
Subject: [PATCH 10/84] openai tests

---
 tests/integrations/openai/test_openai.py | 891 ++++++++++++-----------
 1 file changed, 450 insertions(+), 441 deletions(-)

diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py
index ada2e633de..e53f8e4f55 100644
--- a/tests/integrations/openai/test_openai.py
+++ b/tests/integrations/openai/test_openai.py
@@ -132,14 +132,14 @@ async def __call__(self, *args, **kwargs):
     ],
 )
 def test_nonstreaming_chat_completion_no_prompts(
-    sentry_init, capture_events, send_default_pii, include_prompts
+    sentry_init, capture_items, send_default_pii, include_prompts
 ):
     sentry_init(
         integrations=[OpenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
     )
-    events = capture_events()
+    items = capture_items("span")
 
     client = OpenAI(api_key="z")
     client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION)
@@ -163,27 +163,26 @@ def test_nonstreaming_chat_completion_no_prompts(
         )
 
     assert response == "the model response"
-    tx = events[0]
-    assert tx["type"] == "transaction"
-    span = tx["spans"][0]
-    assert span["op"] == "gen_ai.chat"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai"
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False
 
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
+    span = next(item.payload for item in items if item.type == "span")
+    assert span["attributes"]["sentry.op"] == "gen_ai.chat"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False
 
-    assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"]
-    assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
-    assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
 
-    assert span["data"]["gen_ai.usage.output_tokens"] == 10
-    assert span["data"]["gen_ai.usage.input_tokens"] == 20
-    assert span["data"]["gen_ai.usage.total_tokens"] == 30
+    assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"]
+    assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+    assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
+
+    assert span["attributes"]["gen_ai.usage.output_tokens"] == 10
+    assert span["attributes"]["gen_ai.usage.input_tokens"] == 20
+    assert span["attributes"]["gen_ai.usage.total_tokens"] == 30
 
 
 @pytest.mark.parametrize(
@@ -229,13 +228,13 @@ def test_nonstreaming_chat_completion_no_prompts(
         ),
     ],
 )
-def test_nonstreaming_chat_completion(sentry_init, capture_events, messages, request):
+def test_nonstreaming_chat_completion(sentry_init, capture_items, messages, request):
     sentry_init(
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("span")
 
     client = OpenAI(api_key="z")
     client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION)
@@ -256,30 +255,29 @@ def test_nonstreaming_chat_completion(sentry_init, capture_events, messages, req
         )
 
     assert response == "the model response"
-    tx = events[0]
-    assert tx["type"] == "transaction"
-    span = tx["spans"][0]
-    assert span["op"] == "gen_ai.chat"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai"
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False
-
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
+
+    span = next(item.payload for item in items if item.type == "span")
+    assert span["attributes"]["sentry.op"] == "gen_ai.chat"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False
+
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
 
     param_id = request.node.callspec.id
     if "blocks" in param_id:
-        assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [
+        assert json.loads(span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [
             {
                 "type": "text",
                 "content": "You are a helpful assistant.",
             }
         ]
     else:
-        assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [
+        assert json.loads(span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [
             {
                 "type": "text",
                 "content": "You are a helpful assistant.",
@@ -290,12 +288,12 @@ def test_nonstreaming_chat_completion(sentry_init, capture_events, messages, req
             },
         ]
 
-    assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-    assert "the model response" in span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+    assert "hello" in span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+    assert "the model response" in span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
 
-    assert span["data"]["gen_ai.usage.output_tokens"] == 10
-    assert span["data"]["gen_ai.usage.input_tokens"] == 20
-    assert span["data"]["gen_ai.usage.total_tokens"] == 30
+    assert span["attributes"]["gen_ai.usage.output_tokens"] == 10
+    assert span["attributes"]["gen_ai.usage.input_tokens"] == 20
+    assert span["attributes"]["gen_ai.usage.total_tokens"] == 30
 
 
 @pytest.mark.asyncio
@@ -308,14 +306,14 @@ def test_nonstreaming_chat_completion(sentry_init, capture_events, messages, req
     ],
 )
 async def test_nonstreaming_chat_completion_async_no_prompts(
-    sentry_init, capture_events, send_default_pii, include_prompts
+    sentry_init, capture_items, send_default_pii, include_prompts
 ):
     sentry_init(
         integrations=[OpenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
     )
-    events = capture_events()
+    items = capture_items("span")
 
     client = AsyncOpenAI(api_key="z")
     client.chat.completions._post = mock.AsyncMock(return_value=EXAMPLE_CHAT_COMPLETION)
@@ -336,27 +334,26 @@ async def test_nonstreaming_chat_completion_async_no_prompts(
         response = response.choices[0].message.content
 
     assert response == "the model response"
-    tx = events[0]
-    assert tx["type"] == "transaction"
-    span = tx["spans"][0]
-    assert span["op"] == "gen_ai.chat"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai"
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False
 
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
+    span = next(item.payload for item in items if item.type == "span")
+    assert span["attributes"]["sentry.op"] == "gen_ai.chat"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False
 
-    assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"]
-    assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
-    assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
 
-    assert span["data"]["gen_ai.usage.output_tokens"] == 10
-    assert span["data"]["gen_ai.usage.input_tokens"] == 20
-    assert span["data"]["gen_ai.usage.total_tokens"] == 30
+    assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"]
+    assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+    assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
+
+    assert span["attributes"]["gen_ai.usage.output_tokens"] == 10
+    assert span["attributes"]["gen_ai.usage.input_tokens"] == 20
+    assert span["attributes"]["gen_ai.usage.total_tokens"] == 30
 
 
 @pytest.mark.asyncio
@@ -404,14 +401,14 @@ async def test_nonstreaming_chat_completion_async_no_prompts(
     ],
 )
 async def test_nonstreaming_chat_completion_async(
-    sentry_init, capture_events, messages, request
+    sentry_init, capture_items, messages, request
 ):
     sentry_init(
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("span")
 
     client = AsyncOpenAI(api_key="z")
     client.chat.completions._post = AsyncMock(return_value=EXAMPLE_CHAT_COMPLETION)
@@ -429,30 +426,29 @@ async def test_nonstreaming_chat_completion_async(
         response = response.choices[0].message.content
 
     assert response == "the model response"
-    tx = events[0]
-    assert tx["type"] == "transaction"
-    span = tx["spans"][0]
-    assert span["op"] == "gen_ai.chat"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai"
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False
-
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
+
+    span = next(item.payload for item in items if item.type == "span")
+    assert span["attributes"]["sentry.op"] == "gen_ai.chat"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False
+
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
 
     param_id = request.node.callspec.id
     if "blocks" in param_id:
-        assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [
+        assert json.loads(span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [
             {
                 "type": "text",
                 "content": "You are a helpful assistant.",
             }
         ]
     else:
-        assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [
+        assert json.loads(span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [
             {
                 "type": "text",
                 "content": "You are a helpful assistant.",
@@ -463,12 +459,12 @@ async def test_nonstreaming_chat_completion_async(
             },
         ]
 
-    assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-    assert "the model response" in span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+    assert "hello" in span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+    assert "the model response" in span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
 
-    assert span["data"]["gen_ai.usage.output_tokens"] == 10
-    assert span["data"]["gen_ai.usage.input_tokens"] == 20
-    assert span["data"]["gen_ai.usage.total_tokens"] == 30
+    assert span["attributes"]["gen_ai.usage.output_tokens"] == 10
+    assert span["attributes"]["gen_ai.usage.input_tokens"] == 20
+    assert span["attributes"]["gen_ai.usage.total_tokens"] == 30
 
 
 def tiktoken_encoding_if_installed():
@@ -491,7 +487,7 @@ def tiktoken_encoding_if_installed():
 )
 def test_streaming_chat_completion_no_prompts(
     sentry_init,
-    capture_events,
+    capture_items,
     send_default_pii,
     include_prompts,
     get_model_response,
@@ -507,7 +503,7 @@ def test_streaming_chat_completion_no_prompts(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
     )
-    events = capture_events()
+    items = capture_items("span")
 
     client = OpenAI(api_key="z")
     returned_stream = get_model_response(
@@ -581,32 +577,31 @@ def test_streaming_chat_completion_no_prompts(
             )
 
     assert response_string == "hello world"
-    tx = events[0]
-    assert tx["type"] == "transaction"
-    span = tx["spans"][0]
-    assert span["op"] == "gen_ai.chat"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai"
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
-
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
-
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "model-id"
-
-    assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"]
-    assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
-    assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+
+    span = next(item.payload for item in items if item.type == "span")
+    assert span["attributes"]["sentry.op"] == "gen_ai.chat"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
+
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "model-id"
+
+    assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"]
+    assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+    assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
 
     try:
         import tiktoken  # type: ignore # noqa # pylint: disable=unused-import
 
-        assert span["data"]["gen_ai.usage.output_tokens"] == 2
-        assert span["data"]["gen_ai.usage.input_tokens"] == 7
-        assert span["data"]["gen_ai.usage.total_tokens"] == 9
+        assert span["attributes"]["gen_ai.usage.output_tokens"] == 2
+        assert span["attributes"]["gen_ai.usage.input_tokens"] == 7
+        assert span["attributes"]["gen_ai.usage.total_tokens"] == 9
     except ImportError:
         pass  # if tiktoken is not installed, we can't guarantee token usage will be calculated properly
 
@@ -617,7 +612,7 @@ def test_streaming_chat_completion_no_prompts(
 )
 def test_streaming_chat_completion_with_usage_in_stream(
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     server_side_event_chunks,
 ):
@@ -627,7 +622,7 @@ def test_streaming_chat_completion_with_usage_in_stream(
         traces_sample_rate=1.0,
         send_default_pii=False,
     )
-    events = capture_events()
+    items = capture_items("span")
 
     client = OpenAI(api_key="z")
     returned_stream = get_model_response(
@@ -684,13 +679,11 @@ def test_streaming_chat_completion_with_usage_in_stream(
             for _ in response_stream:
                 pass
 
-    tx = events[0]
-    assert tx["type"] == "transaction"
-    span = tx["spans"][0]
-    assert span["op"] == "gen_ai.chat"
-    assert span["data"]["gen_ai.usage.input_tokens"] == 20
-    assert span["data"]["gen_ai.usage.output_tokens"] == 10
-    assert span["data"]["gen_ai.usage.total_tokens"] == 30
+    span = next(item.payload for item in items if item.type == "span")
+    assert span["attributes"]["sentry.op"] == "gen_ai.chat"
+    assert span["attributes"]["gen_ai.usage.input_tokens"] == 20
+    assert span["attributes"]["gen_ai.usage.output_tokens"] == 10
+    assert span["attributes"]["gen_ai.usage.total_tokens"] == 30
 
 
 @pytest.mark.skipif(
@@ -699,7 +692,7 @@ def test_streaming_chat_completion_with_usage_in_stream(
 )
 def test_streaming_chat_completion_empty_content_preserves_token_usage(
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     server_side_event_chunks,
 ):
@@ -709,7 +702,7 @@ def test_streaming_chat_completion_empty_content_preserves_token_usage(
         traces_sample_rate=1.0,
         send_default_pii=False,
     )
-    events = capture_events()
+    items = capture_items("span")
 
     client = OpenAI(api_key="z")
     returned_stream = get_model_response(
@@ -747,13 +740,11 @@ def test_streaming_chat_completion_empty_content_preserves_token_usage(
             for _ in response_stream:
                 pass
 
-    tx = events[0]
-    assert tx["type"] == "transaction"
-    span = tx["spans"][0]
-    assert span["op"] == "gen_ai.chat"
-    assert span["data"]["gen_ai.usage.input_tokens"] == 20
-    assert "gen_ai.usage.output_tokens" not in span["data"]
-    assert span["data"]["gen_ai.usage.total_tokens"] == 20
+    span = next(item.payload for item in items if item.type == "span")
+    assert span["attributes"]["sentry.op"] == "gen_ai.chat"
+    assert span["attributes"]["gen_ai.usage.input_tokens"] == 20
+    assert "gen_ai.usage.output_tokens" not in span["attributes"]
+    assert span["attributes"]["gen_ai.usage.total_tokens"] == 20
 
 
 @pytest.mark.skipif(
@@ -763,7 +754,7 @@ def test_streaming_chat_completion_empty_content_preserves_token_usage(
 @pytest.mark.asyncio
 async def test_streaming_chat_completion_empty_content_preserves_token_usage_async(
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     async_iterator,
     server_side_event_chunks,
@@ -774,7 +765,7 @@ async def test_streaming_chat_completion_empty_content_preserves_token_usage_asy
         traces_sample_rate=1.0,
         send_default_pii=False,
     )
-    events = capture_events()
+    items = capture_items("span")
 
     client = AsyncOpenAI(api_key="z")
     returned_stream = get_model_response(
@@ -814,13 +805,11 @@ async def test_streaming_chat_completion_empty_content_preserves_token_usage_asy
             async for _ in response_stream:
                 pass
 
-    tx = events[0]
-    assert tx["type"] == "transaction"
-    span = tx["spans"][0]
-    assert span["op"] == "gen_ai.chat"
-    assert span["data"]["gen_ai.usage.input_tokens"] == 20
-    assert "gen_ai.usage.output_tokens" not in span["data"]
-    assert span["data"]["gen_ai.usage.total_tokens"] == 20
+    span = next(item.payload for item in items if item.type == "span")
+    assert span["attributes"]["sentry.op"] == "gen_ai.chat"
+    assert span["attributes"]["gen_ai.usage.input_tokens"] == 20
+    assert "gen_ai.usage.output_tokens" not in span["attributes"]
+    assert span["attributes"]["gen_ai.usage.total_tokens"] == 20
 
 
 @pytest.mark.skipif(
@@ -830,7 +819,7 @@ async def test_streaming_chat_completion_empty_content_preserves_token_usage_asy
 @pytest.mark.asyncio
 async def test_streaming_chat_completion_async_with_usage_in_stream(
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     async_iterator,
     server_side_event_chunks,
@@ -841,7 +830,7 @@ async def test_streaming_chat_completion_async_with_usage_in_stream(
         traces_sample_rate=1.0,
         send_default_pii=False,
     )
-    events = capture_events()
+    items = capture_items("span")
 
     client = AsyncOpenAI(api_key="z")
     returned_stream = get_model_response(
@@ -900,13 +889,11 @@ async def test_streaming_chat_completion_async_with_usage_in_stream(
             async for _ in response_stream:
                 pass
 
-    tx = events[0]
-    assert tx["type"] == "transaction"
-    span = tx["spans"][0]
-    assert span["op"] == "gen_ai.chat"
-    assert span["data"]["gen_ai.usage.input_tokens"] == 20
-    assert span["data"]["gen_ai.usage.output_tokens"] == 10
-    assert span["data"]["gen_ai.usage.total_tokens"] == 30
+    span = next(item.payload for item in items if item.type == "span")
+    assert span["attributes"]["sentry.op"] == "gen_ai.chat"
+    assert span["attributes"]["gen_ai.usage.input_tokens"] == 20
+    assert span["attributes"]["gen_ai.usage.output_tokens"] == 10
+    assert span["attributes"]["gen_ai.usage.total_tokens"] == 30
 
 
 # noinspection PyTypeChecker
@@ -955,7 +942,7 @@ async def test_streaming_chat_completion_async_with_usage_in_stream(
 )
 def test_streaming_chat_completion(
     sentry_init,
-    capture_events,
+    capture_items,
     messages,
     request,
     get_model_response,
@@ -971,7 +958,7 @@ def test_streaming_chat_completion(
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("span")
 
     client = OpenAI(api_key="z")
     returned_stream = get_model_response(
@@ -1041,30 +1028,29 @@ def test_streaming_chat_completion(
                 map(lambda x: x.choices[0].delta.content, response_stream)
             )
     assert response_string == "hello world"
-    tx = events[0]
-    assert tx["type"] == "transaction"
-    span = tx["spans"][0]
-    assert span["op"] == "gen_ai.chat"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai"
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
-
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
+
+    span = next(item.payload for item in items if item.type == "span")
+    assert span["attributes"]["sentry.op"] == "gen_ai.chat"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
 
     param_id = request.node.callspec.id
     if "blocks" in param_id:
-        assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [
+        assert json.loads(span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [
             {
                 "type": "text",
                 "content": "You are a helpful assistant.",
             }
         ]
     else:
-        assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [
+        assert json.loads(span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [
             {
                 "type": "text",
                 "content": "You are a helpful assistant.",
@@ -1075,22 +1061,22 @@ def test_streaming_chat_completion(
             },
         ]
 
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "model-id"
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "model-id"
 
-    assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-    assert "hello world" in span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+    assert "hello" in span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+    assert "hello world" in span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
 
     try:
         import tiktoken  # type: ignore # noqa # pylint: disable=unused-import
 
         if "blocks" in param_id:
-            assert span["data"]["gen_ai.usage.output_tokens"] == 2
-            assert span["data"]["gen_ai.usage.input_tokens"] == 7
-            assert span["data"]["gen_ai.usage.total_tokens"] == 9
+            assert span["attributes"]["gen_ai.usage.output_tokens"] == 2
+            assert span["attributes"]["gen_ai.usage.input_tokens"] == 7
+            assert span["attributes"]["gen_ai.usage.total_tokens"] == 9
         else:
-            assert span["data"]["gen_ai.usage.output_tokens"] == 2
-            assert span["data"]["gen_ai.usage.input_tokens"] == 12
-            assert span["data"]["gen_ai.usage.total_tokens"] == 14
+            assert span["attributes"]["gen_ai.usage.output_tokens"] == 2
+            assert span["attributes"]["gen_ai.usage.input_tokens"] == 12
+            assert span["attributes"]["gen_ai.usage.total_tokens"] == 14
     except ImportError:
         pass  # if tiktoken is not installed, we can't guarantee token usage will be calculated properly
 
@@ -1107,7 +1093,7 @@ def test_streaming_chat_completion(
 )
 async def test_streaming_chat_completion_async_no_prompts(
     sentry_init,
-    capture_events,
+    capture_items,
     send_default_pii,
     include_prompts,
     get_model_response,
@@ -1124,7 +1110,7 @@ async def test_streaming_chat_completion_async_no_prompts(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
     )
-    events = capture_events()
+    items = capture_items("span")
 
     client = AsyncOpenAI(api_key="z")
     returned_stream = get_model_response(
@@ -1201,32 +1187,31 @@ async def test_streaming_chat_completion_async_no_prompts(
                 response_string += x.choices[0].delta.content
 
     assert response_string == "hello world"
-    tx = events[0]
-    assert tx["type"] == "transaction"
-    span = tx["spans"][0]
-    assert span["op"] == "gen_ai.chat"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai"
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
-
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
-
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "model-id"
-
-    assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"]
-    assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
-    assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+
+    span = next(item.payload for item in items if item.type == "span")
+    assert span["attributes"]["sentry.op"] == "gen_ai.chat"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
+
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "model-id"
+
+    assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"]
+    assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+    assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
 
     try:
         import tiktoken  # type: ignore # noqa # pylint: disable=unused-import
 
-        assert span["data"]["gen_ai.usage.output_tokens"] == 2
-        assert span["data"]["gen_ai.usage.input_tokens"] == 7
-        assert span["data"]["gen_ai.usage.total_tokens"] == 9
+        assert span["attributes"]["gen_ai.usage.output_tokens"] == 2
+        assert span["attributes"]["gen_ai.usage.input_tokens"] == 7
+        assert span["attributes"]["gen_ai.usage.total_tokens"] == 9
 
     except ImportError:
         pass  # if tiktoken is not installed, we can't guarantee token usage will be calculated properly
@@ -1279,7 +1264,7 @@ async def test_streaming_chat_completion_async_no_prompts(
 )
 async def test_streaming_chat_completion_async(
     sentry_init,
-    capture_events,
+    capture_items,
     messages,
     request,
     get_model_response,
@@ -1296,7 +1281,7 @@ async def test_streaming_chat_completion_async(
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("span")
 
     client = AsyncOpenAI(api_key="z")
 
@@ -1371,32 +1356,31 @@ async def test_streaming_chat_completion_async(
                 response_string += x.choices[0].delta.content
 
     assert response_string == "hello world"
-    tx = events[0]
-    assert tx["type"] == "transaction"
-    span = tx["spans"][0]
-    assert span["op"] == "gen_ai.chat"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai"
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
-
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
-
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "model-id"
+
+    span = next(item.payload for item in items if item.type == "span")
+    assert span["attributes"]["sentry.op"] == "gen_ai.chat"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
+
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "model-id"
 
     param_id = request.node.callspec.id
     if "blocks" in param_id:
-        assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [
+        assert json.loads(span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [
             {
                 "type": "text",
                 "content": "You are a helpful assistant.",
             }
         ]
     else:
-        assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [
+        assert json.loads(span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [
             {
                 "type": "text",
                 "content": "You are a helpful assistant.",
@@ -1407,28 +1391,28 @@ async def test_streaming_chat_completion_async(
             },
         ]
 
-    assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-    assert "hello world" in span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+    assert "hello" in span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+    assert "hello world" in span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
 
     try:
         import tiktoken  # type: ignore # noqa # pylint: disable=unused-import
 
         if "blocks" in param_id:
-            assert span["data"]["gen_ai.usage.output_tokens"] == 2
-            assert span["data"]["gen_ai.usage.input_tokens"] == 7
-            assert span["data"]["gen_ai.usage.total_tokens"] == 9
+            assert span["attributes"]["gen_ai.usage.output_tokens"] == 2
+            assert span["attributes"]["gen_ai.usage.input_tokens"] == 7
+            assert span["attributes"]["gen_ai.usage.total_tokens"] == 9
         else:
-            assert span["data"]["gen_ai.usage.output_tokens"] == 2
-            assert span["data"]["gen_ai.usage.input_tokens"] == 12
-            assert span["data"]["gen_ai.usage.total_tokens"] == 14
+            assert span["attributes"]["gen_ai.usage.output_tokens"] == 2
+            assert span["attributes"]["gen_ai.usage.input_tokens"] == 12
+            assert span["attributes"]["gen_ai.usage.total_tokens"] == 14
 
     except ImportError:
         pass  # if tiktoken is not installed, we can't guarantee token usage will be calculated properly
 
 
-def test_bad_chat_completion(sentry_init, capture_events):
+def test_bad_chat_completion(sentry_init, capture_items):
     sentry_init(integrations=[OpenAIIntegration()], traces_sample_rate=1.0)
-    events = capture_events()
+    items = capture_items("event")
 
     client = OpenAI(api_key="z")
     client.chat.completions._post = mock.Mock(
@@ -1440,13 +1424,13 @@ def test_bad_chat_completion(sentry_init, capture_events):
             messages=[{"role": "system", "content": "hello"}],
         )
 
-    (event,) = events
+    (event,) = (item.payload for item in items if item.type == "event")
     assert event["level"] == "error"
 
 
-def test_span_status_error(sentry_init, capture_events):
+def test_span_status_error(sentry_init, capture_items):
     sentry_init(integrations=[OpenAIIntegration()], traces_sample_rate=1.0)
-    events = capture_events()
+    items = capture_items("event", "transaction", "span")
 
     with start_transaction(name="test"):
         client = OpenAI(api_key="z")
@@ -1458,17 +1442,20 @@ def test_span_status_error(sentry_init, capture_events):
                 model="some-model", messages=[{"role": "system", "content": "hello"}]
             )
 
-    (error, transaction) = events
-    assert error["level"] == "error"
-    assert transaction["spans"][0]["status"] == "internal_error"
-    assert transaction["spans"][0]["tags"]["status"] == "internal_error"
+    (event,) = (item.payload for item in items if item.type == "event")
+    assert event["level"] == "error"
+
+    spans = [item.payload for item in items if item.type == "span"]
+    assert spans[0]["status"] == "error"
+
+    (transaction,) = (item.payload for item in items if item.type == "transaction")
     assert transaction["contexts"]["trace"]["status"] == "internal_error"
 
 
 @pytest.mark.asyncio
-async def test_bad_chat_completion_async(sentry_init, capture_events):
+async def test_bad_chat_completion_async(sentry_init, capture_items):
     sentry_init(integrations=[OpenAIIntegration()], traces_sample_rate=1.0)
-    events = capture_events()
+    items = capture_items("event")
 
     client = AsyncOpenAI(api_key="z")
     client.chat.completions._post = AsyncMock(
@@ -1479,7 +1466,7 @@ async def test_bad_chat_completion_async(sentry_init, capture_events):
             model="some-model", messages=[{"role": "system", "content": "hello"}]
         )
 
-    (event,) = events
+    (event,) = (item.payload for item in items if item.type == "event")
     assert event["level"] == "error"
 
 
@@ -1492,14 +1479,14 @@ async def test_bad_chat_completion_async(sentry_init, capture_events):
     ],
 )
 def test_embeddings_create_no_pii(
-    sentry_init, capture_events, send_default_pii, include_prompts
+    sentry_init, capture_items, send_default_pii, include_prompts
 ):
     sentry_init(
         integrations=[OpenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
     )
-    events = capture_events()
+    items = capture_items("span")
 
     client = OpenAI(api_key="z")
 
@@ -1521,17 +1508,15 @@ def test_embeddings_create_no_pii(
 
     assert len(response.data[0].embedding) == 3
 
-    tx = events[0]
-    assert tx["type"] == "transaction"
-    span = tx["spans"][0]
-    assert span["op"] == "gen_ai.embeddings"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-3-large"
+    span = next(item.payload for item in items if item.type == "span")
+    assert span["attributes"]["sentry.op"] == "gen_ai.embeddings"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-3-large"
 
-    assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["data"]
+    assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["attributes"]
 
-    assert span["data"]["gen_ai.usage.input_tokens"] == 20
-    assert span["data"]["gen_ai.usage.total_tokens"] == 30
+    assert span["attributes"]["gen_ai.usage.input_tokens"] == 20
+    assert span["attributes"]["gen_ai.usage.total_tokens"] == 30
 
 
 @pytest.mark.parametrize(
@@ -1577,13 +1562,13 @@ def test_embeddings_create_no_pii(
         ),
     ],
 )
-def test_embeddings_create(sentry_init, capture_events, input, request):
+def test_embeddings_create(sentry_init, capture_items, input, request):
     sentry_init(
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("span")
 
     client = OpenAI(api_key="z")
 
@@ -1603,24 +1588,24 @@ def test_embeddings_create(sentry_init, capture_events, input, request):
 
     assert len(response.data[0].embedding) == 3
 
-    tx = events[0]
-    assert tx["type"] == "transaction"
-    span = tx["spans"][0]
-    assert span["op"] == "gen_ai.embeddings"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-3-large"
+    span = next(item.payload for item in items if item.type == "span")
+    assert span["attributes"]["sentry.op"] == "gen_ai.embeddings"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-3-large"
 
     param_id = request.node.callspec.id
     if param_id == "string":
-        assert json.loads(span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == ["hello"]
+        assert json.loads(span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [
+            "hello"
+        ]
     elif param_id == "string_sequence" or param_id == "string_iterable":
-        assert json.loads(span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [
+        assert json.loads(span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [
             "First text",
             "Second text",
             "Third text",
         ]
     elif param_id == "tokens" or param_id == "token_iterable":
-        assert json.loads(span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [
+        assert json.loads(span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [
             5,
             8,
             13,
@@ -1628,13 +1613,13 @@ def test_embeddings_create(sentry_init, capture_events, input, request):
             34,
         ]
     else:
-        assert json.loads(span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [
+        assert json.loads(span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [
             [5, 8, 13, 21, 34],
             [8, 13, 21, 34, 55],
         ]
 
-    assert span["data"]["gen_ai.usage.input_tokens"] == 20
-    assert span["data"]["gen_ai.usage.total_tokens"] == 30
+    assert span["attributes"]["gen_ai.usage.input_tokens"] == 20
+    assert span["attributes"]["gen_ai.usage.total_tokens"] == 30
 
 
 @pytest.mark.asyncio
@@ -1647,14 +1632,14 @@ def test_embeddings_create(sentry_init, capture_events, input, request):
     ],
 )
 async def test_embeddings_create_async_no_pii(
-    sentry_init, capture_events, send_default_pii, include_prompts
+    sentry_init, capture_items, send_default_pii, include_prompts
 ):
     sentry_init(
         integrations=[OpenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
     )
-    events = capture_events()
+    items = capture_items("span")
 
     client = AsyncOpenAI(api_key="z")
 
@@ -1676,17 +1661,15 @@ async def test_embeddings_create_async_no_pii(
 
     assert len(response.data[0].embedding) == 3
 
-    tx = events[0]
-    assert tx["type"] == "transaction"
-    span = tx["spans"][0]
-    assert span["op"] == "gen_ai.embeddings"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-3-large"
+    span = next(item.payload for item in items if item.type == "span")
+    assert span["attributes"]["sentry.op"] == "gen_ai.embeddings"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-3-large"
 
-    assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["data"]
+    assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["attributes"]
 
-    assert span["data"]["gen_ai.usage.input_tokens"] == 20
-    assert span["data"]["gen_ai.usage.total_tokens"] == 30
+    assert span["attributes"]["gen_ai.usage.input_tokens"] == 20
+    assert span["attributes"]["gen_ai.usage.total_tokens"] == 30
 
 
 @pytest.mark.asyncio
@@ -1733,13 +1716,13 @@ async def test_embeddings_create_async_no_pii(
         ),
     ],
 )
-async def test_embeddings_create_async(sentry_init, capture_events, input, request):
+async def test_embeddings_create_async(sentry_init, capture_items, input, request):
     sentry_init(
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("span")
 
     client = AsyncOpenAI(api_key="z")
 
@@ -1761,24 +1744,24 @@ async def test_embeddings_create_async(sentry_init, capture_events, input, reque
 
     assert len(response.data[0].embedding) == 3
 
-    tx = events[0]
-    assert tx["type"] == "transaction"
-    span = tx["spans"][0]
-    assert span["op"] == "gen_ai.embeddings"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-3-large"
+    span = next(item.payload for item in items if item.type == "span")
+    assert span["attributes"]["sentry.op"] == "gen_ai.embeddings"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-3-large"
 
     param_id = request.node.callspec.id
     if param_id == "string":
-        assert json.loads(span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == ["hello"]
+        assert json.loads(span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [
+            "hello"
+        ]
     elif param_id == "string_sequence" or param_id == "string_iterable":
-        assert json.loads(span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [
+        assert json.loads(span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [
             "First text",
             "Second text",
             "Third text",
         ]
     elif param_id == "tokens" or param_id == "token_iterable":
-        assert json.loads(span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [
+        assert json.loads(span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [
             5,
             8,
             13,
@@ -1786,13 +1769,13 @@ async def test_embeddings_create_async(sentry_init, capture_events, input, reque
             34,
         ]
     else:
-        assert json.loads(span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [
+        assert json.loads(span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [
             [5, 8, 13, 21, 34],
             [8, 13, 21, 34, 55],
         ]
 
-    assert span["data"]["gen_ai.usage.input_tokens"] == 20
-    assert span["data"]["gen_ai.usage.total_tokens"] == 30
+    assert span["attributes"]["gen_ai.usage.input_tokens"] == 20
+    assert span["attributes"]["gen_ai.usage.total_tokens"] == 30
 
 
 @pytest.mark.parametrize(
@@ -1800,14 +1783,14 @@ async def test_embeddings_create_async(sentry_init, capture_events, input, reque
     [(True, True), (True, False), (False, True), (False, False)],
 )
 def test_embeddings_create_raises_error(
-    sentry_init, capture_events, send_default_pii, include_prompts
+    sentry_init, capture_items, send_default_pii, include_prompts
 ):
     sentry_init(
         integrations=[OpenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
     )
-    events = capture_events()
+    items = capture_items("event")
 
     client = OpenAI(api_key="z")
 
@@ -1818,7 +1801,7 @@ def test_embeddings_create_raises_error(
     with pytest.raises(OpenAIError):
         client.embeddings.create(input="hello", model="text-embedding-3-large")
 
-    (event,) = events
+    (event,) = (item.payload for item in items if item.type == "event")
     assert event["level"] == "error"
 
 
@@ -1828,14 +1811,14 @@ def test_embeddings_create_raises_error(
     [(True, True), (True, False), (False, True), (False, False)],
 )
 async def test_embeddings_create_raises_error_async(
-    sentry_init, capture_events, send_default_pii, include_prompts
+    sentry_init, capture_items, send_default_pii, include_prompts
 ):
     sentry_init(
         integrations=[OpenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
     )
-    events = capture_events()
+    items = capture_items("event")
 
     client = AsyncOpenAI(api_key="z")
 
@@ -1846,16 +1829,16 @@ async def test_embeddings_create_raises_error_async(
     with pytest.raises(OpenAIError):
         await client.embeddings.create(input="hello", model="text-embedding-3-large")
 
-    (event,) = events
+    (event,) = (item.payload for item in items if item.type == "event")
     assert event["level"] == "error"
 
 
-def test_span_origin_nonstreaming_chat(sentry_init, capture_events):
+def test_span_origin_nonstreaming_chat(sentry_init, capture_items):
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     client = OpenAI(api_key="z")
     client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION)
@@ -1865,19 +1848,20 @@ def test_span_origin_nonstreaming_chat(sentry_init, capture_events):
             model="some-model", messages=[{"role": "system", "content": "hello"}]
         )
 
-    (event,) = events
-
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["contexts"]["trace"]["origin"] == "manual"
-    assert event["spans"][0]["origin"] == "auto.ai.openai"
+
+    spans = [item.payload for item in items if item.type == "span"]
+    assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.openai"
 
 
 @pytest.mark.asyncio
-async def test_span_origin_nonstreaming_chat_async(sentry_init, capture_events):
+async def test_span_origin_nonstreaming_chat_async(sentry_init, capture_items):
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     client = AsyncOpenAI(api_key="z")
     client.chat.completions._post = AsyncMock(return_value=EXAMPLE_CHAT_COMPLETION)
@@ -1887,18 +1871,19 @@ async def test_span_origin_nonstreaming_chat_async(sentry_init, capture_events):
             model="some-model", messages=[{"role": "system", "content": "hello"}]
         )
 
-    (event,) = events
-
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["contexts"]["trace"]["origin"] == "manual"
-    assert event["spans"][0]["origin"] == "auto.ai.openai"
+
+    spans = [item.payload for item in items if item.type == "span"]
+    assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.openai"
 
 
-def test_span_origin_streaming_chat(sentry_init, capture_events):
+def test_span_origin_streaming_chat(sentry_init, capture_items):
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     client = OpenAI(api_key="z")
     returned_stream = Stream(cast_to=None, response=None, client=client)
@@ -1946,21 +1931,22 @@ def test_span_origin_streaming_chat(sentry_init, capture_events):
 
         "".join(map(lambda x: x.choices[0].delta.content, response_stream))
 
-    (event,) = events
+    (transaction,) = (item.payload for item in items if item.type == "transaction")
+    assert transaction["contexts"]["trace"]["origin"] == "manual"
 
-    assert event["contexts"]["trace"]["origin"] == "manual"
-    assert event["spans"][0]["origin"] == "auto.ai.openai"
+    spans = [item.payload for item in items if item.type == "span"]
+    assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.openai"
 
 
 @pytest.mark.asyncio
 async def test_span_origin_streaming_chat_async(
-    sentry_init, capture_events, async_iterator
+    sentry_init, capture_items, async_iterator
 ):
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     client = AsyncOpenAI(api_key="z")
     returned_stream = AsyncStream(cast_to=None, response=None, client=client)
@@ -2014,18 +2000,19 @@ async def test_span_origin_streaming_chat_async(
 
         # "".join(map(lambda x: x.choices[0].delta.content, response_stream))
 
-    (event,) = events
-
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["contexts"]["trace"]["origin"] == "manual"
-    assert event["spans"][0]["origin"] == "auto.ai.openai"
 
+    spans = [item.payload for item in items if item.type == "span"]
+    assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.openai"
 
-def test_span_origin_embeddings(sentry_init, capture_events):
+
+def test_span_origin_embeddings(sentry_init, capture_items):
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     client = OpenAI(api_key="z")
 
@@ -2043,19 +2030,20 @@ def test_span_origin_embeddings(sentry_init, capture_events):
     with start_transaction(name="openai tx"):
         client.embeddings.create(input="hello", model="text-embedding-3-large")
 
-    (event,) = events
-
+    (event,) = [item.payload for item in items if item.type == "transaction"]
     assert event["contexts"]["trace"]["origin"] == "manual"
-    assert event["spans"][0]["origin"] == "auto.ai.openai"
+
+    spans = [item.payload for item in items if item.type == "span"]
+    assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.openai"
 
 
 @pytest.mark.asyncio
-async def test_span_origin_embeddings_async(sentry_init, capture_events):
+async def test_span_origin_embeddings_async(sentry_init, capture_items):
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     client = AsyncOpenAI(api_key="z")
 
@@ -2073,10 +2061,11 @@ async def test_span_origin_embeddings_async(sentry_init, capture_events):
     with start_transaction(name="openai tx"):
         await client.embeddings.create(input="hello", model="text-embedding-3-large")
 
-    (event,) = events
-
+    (event,) = [item.payload for item in items if item.type == "transaction"]
     assert event["contexts"]["trace"]["origin"] == "manual"
-    assert event["spans"][0]["origin"] == "auto.ai.openai"
+
+    spans = [item.payload for item in items if item.type == "span"]
+    assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.openai"
 
 
 def test_completions_token_usage_from_response():
@@ -2442,12 +2431,12 @@ def count_tokens(msg):
 
 
 @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available")
-def test_ai_client_span_responses_api_no_pii(sentry_init, capture_events):
+def test_ai_client_span_responses_api_no_pii(sentry_init, capture_items):
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
     )
-    events = capture_events()
+    items = capture_items("span")
 
     client = OpenAI(api_key="z")
     client.responses._post = mock.Mock(return_value=EXAMPLE_RESPONSE)
@@ -2462,13 +2451,10 @@ def test_ai_client_span_responses_api_no_pii(sentry_init, capture_events):
             top_p=0.9,
         )
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
 
     assert len(spans) == 1
-    assert spans[0]["op"] == "gen_ai.responses"
-    assert spans[0]["origin"] == "auto.ai.openai"
-    assert spans[0]["data"] == {
+    assert spans[0]["attributes"] == {
         "gen_ai.operation.name": "responses",
         "gen_ai.request.max_tokens": 100,
         "gen_ai.request.temperature": 0.7,
@@ -2482,13 +2468,21 @@ def test_ai_client_span_responses_api_no_pii(sentry_init, capture_events):
         "gen_ai.usage.output_tokens": 10,
         "gen_ai.usage.output_tokens.reasoning": 8,
         "gen_ai.usage.total_tokens": 30,
+        "sentry.environment": "production",
+        "sentry.op": "gen_ai.responses",
+        "sentry.origin": "auto.ai.openai",
+        "sentry.release": mock.ANY,
+        "sentry.sdk.name": "sentry.python",
+        "sentry.sdk.version": mock.ANY,
+        "sentry.segment.id": mock.ANY,
+        "sentry.segment.name": "openai tx",
         "thread.id": mock.ANY,
         "thread.name": mock.ANY,
     }
 
-    assert "gen_ai.system_instructions" not in spans[0]["data"]
-    assert "gen_ai.request.messages" not in spans[0]["data"]
-    assert "gen_ai.response.text" not in spans[0]["data"]
+    assert "gen_ai.system_instructions" not in spans[0]["attributes"]
+    assert "gen_ai.request.messages" not in spans[0]["attributes"]
+    assert "gen_ai.response.text" not in spans[0]["attributes"]
 
 
 @pytest.mark.parametrize(
@@ -2557,14 +2551,14 @@ def test_ai_client_span_responses_api_no_pii(sentry_init, capture_events):
 )
 @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available")
 def test_ai_client_span_responses_api(
-    sentry_init, capture_events, instructions, input, request
+    sentry_init, capture_items, instructions, input, request
 ):
     sentry_init(
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("span")
 
     client = OpenAI(api_key="z")
     client.responses._post = mock.Mock(return_value=EXAMPLE_RESPONSE)
@@ -2579,12 +2573,9 @@ def test_ai_client_span_responses_api(
             top_p=0.9,
         )
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
 
     assert len(spans) == 1
-    assert spans[0]["op"] == "gen_ai.responses"
-    assert spans[0]["origin"] == "auto.ai.openai"
 
     expected_data = {
         "gen_ai.operation.name": "responses",
@@ -2601,6 +2592,14 @@ def test_ai_client_span_responses_api(
         "gen_ai.usage.total_tokens": 30,
         "gen_ai.request.model": "gpt-4o",
         "gen_ai.response.text": "the model response",
+        "sentry.environment": "production",
+        "sentry.op": "gen_ai.responses",
+        "sentry.origin": "auto.ai.openai",
+        "sentry.release": mock.ANY,
+        "sentry.sdk.name": "sentry.python",
+        "sentry.sdk.version": mock.ANY,
+        "sentry.segment.id": mock.ANY,
+        "sentry.segment.name": "openai tx",
         "thread.id": mock.ANY,
         "thread.name": mock.ANY,
     }
@@ -2759,17 +2758,17 @@ def test_ai_client_span_responses_api(
             }
         )
 
-    assert spans[0]["data"] == expected_data
+    assert spans[0]["attributes"] == expected_data
 
 
 @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available")
-def test_error_in_responses_api(sentry_init, capture_events):
+def test_error_in_responses_api(sentry_init, capture_items):
     sentry_init(
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("event", "transaction", "span")
 
     client = OpenAI(api_key="z")
     client.responses._post = mock.Mock(
@@ -2784,15 +2783,17 @@ def test_error_in_responses_api(sentry_init, capture_events):
                 input="How do I check if a Python object is an instance of a class?",
             )
 
-    (error_event, transaction_event) = events
-
-    assert transaction_event["type"] == "transaction"
     # make sure the span where the error occurred is captured
-    assert transaction_event["spans"][0]["op"] == "gen_ai.responses"
+    spans = [item.payload for item in items if item.type == "span"]
+    assert spans[0]["attributes"]["sentry.op"] == "gen_ai.responses"
 
+    (error_event,) = (item.payload for item in items if item.type == "event")
     assert error_event["level"] == "error"
     assert error_event["exception"]["values"][0]["type"] == "OpenAIError"
 
+    (transaction_event,) = (
+        item.payload for item in items if item.type == "transaction"
+    )
     assert (
         error_event["contexts"]["trace"]["trace_id"]
         == transaction_event["contexts"]["trace"]["trace_id"]
@@ -2866,14 +2867,14 @@ def test_error_in_responses_api(sentry_init, capture_events):
     ],
 )
 async def test_ai_client_span_responses_async_api(
-    sentry_init, capture_events, instructions, input, request
+    sentry_init, capture_items, instructions, input, request
 ):
     sentry_init(
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("span")
 
     client = AsyncOpenAI(api_key="z")
     client.responses._post = AsyncMock(return_value=EXAMPLE_RESPONSE)
@@ -2888,12 +2889,9 @@ async def test_ai_client_span_responses_async_api(
             top_p=0.9,
         )
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
 
     assert len(spans) == 1
-    assert spans[0]["op"] == "gen_ai.responses"
-    assert spans[0]["origin"] == "auto.ai.openai"
 
     expected_data = {
         "gen_ai.operation.name": "responses",
@@ -2911,6 +2909,14 @@ async def test_ai_client_span_responses_async_api(
         "gen_ai.usage.output_tokens.reasoning": 8,
         "gen_ai.usage.total_tokens": 30,
         "gen_ai.response.text": "the model response",
+        "sentry.environment": "production",
+        "sentry.op": "gen_ai.responses",
+        "sentry.origin": "auto.ai.openai",
+        "sentry.release": mock.ANY,
+        "sentry.sdk.name": "sentry.python",
+        "sentry.sdk.version": mock.ANY,
+        "sentry.segment.id": mock.ANY,
+        "sentry.segment.name": "openai tx",
         "thread.id": mock.ANY,
         "thread.name": mock.ANY,
     }
@@ -3069,7 +3075,7 @@ async def test_ai_client_span_responses_async_api(
             }
         )
 
-    assert spans[0]["data"] == expected_data
+    assert spans[0]["attributes"] == expected_data
 
 
 @pytest.mark.asyncio
@@ -3140,7 +3146,7 @@ async def test_ai_client_span_responses_async_api(
 @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available")
 async def test_ai_client_span_streaming_responses_async_api(
     sentry_init,
-    capture_events,
+    capture_items,
     instructions,
     input,
     request,
@@ -3153,7 +3159,7 @@ async def test_ai_client_span_streaming_responses_async_api(
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("span")
 
     client = AsyncOpenAI(api_key="z")
     returned_stream = get_model_response(
@@ -3178,11 +3184,12 @@ async def test_ai_client_span_streaming_responses_async_api(
             async for _ in result:
                 pass
 
-    (transaction,) = events
-    spans = [span for span in transaction["spans"] if span["op"] == OP.GEN_AI_RESPONSES]
+    spans = [item.payload for item in items if item.type == "span"]
+    spans = [
+        span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_RESPONSES
+    ]
 
     assert len(spans) == 1
-    assert spans[0]["origin"] == "auto.ai.openai"
 
     expected_data = {
         "gen_ai.operation.name": "responses",
@@ -3200,6 +3207,14 @@ async def test_ai_client_span_streaming_responses_async_api(
         "gen_ai.usage.total_tokens": 30,
         "gen_ai.request.model": "gpt-4o",
         "gen_ai.response.text": "hello world",
+        "sentry.environment": "production",
+        "sentry.op": "gen_ai.responses",
+        "sentry.origin": "auto.ai.openai",
+        "sentry.release": mock.ANY,
+        "sentry.sdk.name": "sentry.python",
+        "sentry.sdk.version": mock.ANY,
+        "sentry.segment.id": mock.ANY,
+        "sentry.segment.name": "openai tx",
         "thread.id": mock.ANY,
         "thread.name": mock.ANY,
     }
@@ -3358,18 +3373,18 @@ async def test_ai_client_span_streaming_responses_async_api(
             }
         )
 
-    assert spans[0]["data"] == expected_data
+    assert spans[0]["attributes"] == expected_data
 
 
 @pytest.mark.asyncio
 @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available")
-async def test_error_in_responses_async_api(sentry_init, capture_events):
+async def test_error_in_responses_async_api(sentry_init, capture_items):
     sentry_init(
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("event", "transaction", "span")
 
     client = AsyncOpenAI(api_key="z")
     client.responses._post = AsyncMock(
@@ -3384,15 +3399,17 @@ async def test_error_in_responses_async_api(sentry_init, capture_events):
                 input="How do I check if a Python object is an instance of a class?",
             )
 
-    (error_event, transaction_event) = events
-
-    assert transaction_event["type"] == "transaction"
     # make sure the span where the error occurred is captured
-    assert transaction_event["spans"][0]["op"] == "gen_ai.responses"
+    spans = [item.payload for item in items if item.type == "span"]
+    assert spans[0]["attributes"]["sentry.op"] == "gen_ai.responses"
 
+    (error_event,) = (item.payload for item in items if item.type == "event")
     assert error_event["level"] == "error"
     assert error_event["exception"]["values"][0]["type"] == "OpenAIError"
 
+    (transaction_event,) = (
+        item.payload for item in items if item.type == "transaction"
+    )
     assert (
         error_event["contexts"]["trace"]["trace_id"]
         == transaction_event["contexts"]["trace"]["trace_id"]
@@ -3479,7 +3496,7 @@ async def test_error_in_responses_async_api(sentry_init, capture_events):
 @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available")
 def test_streaming_responses_api(
     sentry_init,
-    capture_events,
+    capture_items,
     send_default_pii,
     include_prompts,
     get_model_response,
@@ -3494,7 +3511,7 @@ def test_streaming_responses_api(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
     )
-    events = capture_events()
+    items = capture_items("span")
 
     client = OpenAI(api_key="z")
     returned_stream = get_model_response(
@@ -3525,26 +3542,25 @@ def test_streaming_responses_api(
 
     assert response_string == "hello world"
 
-    (transaction,) = events
-    (span,) = transaction["spans"]
-    assert span["op"] == "gen_ai.responses"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
+    (span,) = (item.payload for item in items if item.type == "span")
+    assert span["attributes"]["sentry.op"] == "gen_ai.responses"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
 
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "response-model-id"
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "response-model-id"
 
     if send_default_pii and include_prompts:
-        assert span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == '["hello"]'
-        assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "hello world"
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == '["hello"]'
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "hello world"
     else:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
 
-    assert span["data"]["gen_ai.usage.input_tokens"] == 20
-    assert span["data"]["gen_ai.usage.output_tokens"] == 10
-    assert span["data"]["gen_ai.usage.total_tokens"] == 30
+    assert span["attributes"]["gen_ai.usage.input_tokens"] == 20
+    assert span["attributes"]["gen_ai.usage.output_tokens"] == 10
+    assert span["attributes"]["gen_ai.usage.total_tokens"] == 30
 
 
 @pytest.mark.asyncio
@@ -3555,7 +3571,7 @@ def test_streaming_responses_api(
 @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available")
 async def test_streaming_responses_api_async(
     sentry_init,
-    capture_events,
+    capture_items,
     send_default_pii,
     include_prompts,
     get_model_response,
@@ -3571,7 +3587,7 @@ async def test_streaming_responses_api_async(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
     )
-    events = capture_events()
+    items = capture_items("span")
 
     client = AsyncOpenAI(api_key="z")
     returned_stream = get_model_response(
@@ -3600,26 +3616,25 @@ async def test_streaming_responses_api_async(
 
     assert response_string == "hello world"
 
-    (transaction,) = events
-    (span,) = transaction["spans"]
-    assert span["op"] == "gen_ai.responses"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
+    (span,) = (item.payload for item in items if item.type == "span")
+    assert span["attributes"]["sentry.op"] == "gen_ai.responses"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
 
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "response-model-id"
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "response-model-id"
 
     if send_default_pii and include_prompts:
-        assert span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == '["hello"]'
-        assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "hello world"
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == '["hello"]'
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "hello world"
     else:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
 
-    assert span["data"]["gen_ai.usage.input_tokens"] == 20
-    assert span["data"]["gen_ai.usage.output_tokens"] == 10
-    assert span["data"]["gen_ai.usage.total_tokens"] == 30
+    assert span["attributes"]["gen_ai.usage.input_tokens"] == 20
+    assert span["attributes"]["gen_ai.usage.output_tokens"] == 10
+    assert span["attributes"]["gen_ai.usage.total_tokens"] == 30
 
 
 @pytest.mark.skipif(
@@ -3630,12 +3645,12 @@ async def test_streaming_responses_api_async(
     "tools",
     [[], None, NOT_GIVEN, omit],
 )
-def test_empty_tools_in_chat_completion(sentry_init, capture_events, tools):
+def test_empty_tools_in_chat_completion(sentry_init, capture_items, tools):
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
     )
-    events = capture_events()
+    items = capture_items("span")
 
     client = OpenAI(api_key="z")
     client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION)
@@ -3647,10 +3662,9 @@ def test_empty_tools_in_chat_completion(sentry_init, capture_events, tools):
             tools=tools,
         )
 
-    (event,) = events
-    span = event["spans"][0]
+    span = next(item.payload for item in items if item.type == "span")
 
-    assert "gen_ai.request.available_tools" not in span["data"]
+    assert "gen_ai.request.available_tools" not in span["attributes"]
 
 
 # Test messages with mixed roles including "ai" that should be mapped to "assistant"
@@ -3669,7 +3683,7 @@ def test_empty_tools_in_chat_completion(sentry_init, capture_events, tools):
     ],
 )
 def test_openai_message_role_mapping(
-    sentry_init, capture_events, test_message, expected_role
+    sentry_init, capture_items, test_message, expected_role
 ):
     """Test that OpenAI integration properly maps message roles like 'ai' to 'assistant'"""
 
@@ -3678,7 +3692,7 @@ def test_openai_message_role_mapping(
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("span")
 
     client = OpenAI(api_key="z")
     client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION)
@@ -3688,28 +3702,27 @@ def test_openai_message_role_mapping(
     with start_transaction(name="openai tx"):
         client.chat.completions.create(model="test-model", messages=test_messages)
     # Verify that the span was created correctly
-    (event,) = events
-    span = event["spans"][0]
-    assert span["op"] == "gen_ai.chat"
-    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]
+    span = next(item.payload for item in items if item.type == "span")
+    assert span["attributes"]["sentry.op"] == "gen_ai.chat"
+    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]
 
     # Parse the stored messages
     import json
 
-    stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
 
     assert len(stored_messages) == 1
     assert stored_messages[0]["role"] == expected_role
 
 
-def test_openai_message_truncation(sentry_init, capture_events):
+def test_openai_message_truncation(sentry_init, capture_items):
     """Test that large messages are truncated properly in OpenAI integration."""
     sentry_init(
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     client = OpenAI(api_key="z")
     client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION)
@@ -3730,17 +3743,17 @@ def test_openai_message_truncation(sentry_init, capture_events):
             messages=large_messages,
         )
 
-    (event,) = events
-    span = event["spans"][0]
-    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]
+    span = next(item.payload for item in items if item.type == "span")
+    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]
 
-    messages_data = span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+    messages_data = span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
     assert isinstance(messages_data, str)
 
     parsed_messages = json.loads(messages_data)
     assert isinstance(parsed_messages, list)
     assert len(parsed_messages) <= len(large_messages)
 
+    (event,) = (item.payload for item in items if item.type == "transaction")
     meta_path = event["_meta"]
     span_meta = meta_path["spans"]["0"]["data"]
     messages_meta = span_meta[SPANDATA.GEN_AI_REQUEST_MESSAGES]
@@ -3749,7 +3762,7 @@ def test_openai_message_truncation(sentry_init, capture_events):
 
 # noinspection PyTypeChecker
 def test_streaming_chat_completion_ttft(
-    sentry_init, capture_events, get_model_response, server_side_event_chunks
+    sentry_init, capture_items, get_model_response, server_side_event_chunks
 ):
     """
     Test that streaming chat completions capture time-to-first-token (TTFT).
@@ -3758,7 +3771,7 @@ def test_streaming_chat_completion_ttft(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
     )
-    events = capture_events()
+    items = capture_items("span")
 
     client = OpenAI(api_key="z")
     returned_stream = get_model_response(
@@ -3810,13 +3823,12 @@ def test_streaming_chat_completion_ttft(
             for _ in response_stream:
                 pass
 
-    (tx,) = events
-    span = tx["spans"][0]
-    assert span["op"] == "gen_ai.chat"
+    span = next(item.payload for item in items if item.type == "span")
+    assert span["attributes"]["sentry.op"] == "gen_ai.chat"
 
     # Verify TTFT is captured
-    assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in span["data"]
-    ttft = span["data"][SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN]
+    assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in span["attributes"]
+    ttft = span["attributes"][SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN]
     assert isinstance(ttft, float)
     assert ttft > 0
 
@@ -3825,7 +3837,7 @@ def test_streaming_chat_completion_ttft(
 @pytest.mark.asyncio
 async def test_streaming_chat_completion_ttft_async(
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     async_iterator,
     server_side_event_chunks,
@@ -3837,7 +3849,7 @@ async def test_streaming_chat_completion_ttft_async(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
     )
-    events = capture_events()
+    items = capture_items("span")
 
     client = AsyncOpenAI(api_key="z")
     returned_stream = get_model_response(
@@ -3891,13 +3903,12 @@ async def test_streaming_chat_completion_ttft_async(
             async for _ in response_stream:
                 pass
 
-    (tx,) = events
-    span = tx["spans"][0]
-    assert span["op"] == "gen_ai.chat"
+    span = next(item.payload for item in items if item.type == "span")
+    assert span["attributes"]["sentry.op"] == "gen_ai.chat"
 
     # Verify TTFT is captured
-    assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in span["data"]
-    ttft = span["data"][SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN]
+    assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in span["attributes"]
+    ttft = span["attributes"][SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN]
     assert isinstance(ttft, float)
     assert ttft > 0
 
@@ -3905,7 +3916,7 @@ async def test_streaming_chat_completion_ttft_async(
 # noinspection PyTypeChecker
 @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available")
 def test_streaming_responses_api_ttft(
-    sentry_init, capture_events, get_model_response, server_side_event_chunks
+    sentry_init, capture_items, get_model_response, server_side_event_chunks
 ):
     """
     Test that streaming responses API captures time-to-first-token (TTFT).
@@ -3914,7 +3925,7 @@ def test_streaming_responses_api_ttft(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
     )
-    events = capture_events()
+    items = capture_items("span")
 
     client = OpenAI(api_key="z")
     returned_stream = get_model_response(
@@ -3936,13 +3947,12 @@ def test_streaming_responses_api_ttft(
             for _ in response_stream:
                 pass
 
-    (tx,) = events
-    span = tx["spans"][0]
-    assert span["op"] == "gen_ai.responses"
+    span = next(item.payload for item in items if item.type == "span")
+    assert span["attributes"]["sentry.op"] == "gen_ai.responses"
 
     # Verify TTFT is captured
-    assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in span["data"]
-    ttft = span["data"][SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN]
+    assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in span["attributes"]
+    ttft = span["attributes"][SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN]
     assert isinstance(ttft, float)
     assert ttft > 0
 
@@ -3952,7 +3962,7 @@ def test_streaming_responses_api_ttft(
 @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available")
 async def test_streaming_responses_api_ttft_async(
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     async_iterator,
     server_side_event_chunks,
@@ -3964,7 +3974,7 @@ async def test_streaming_responses_api_ttft_async(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
     )
-    events = capture_events()
+    items = capture_items("span")
 
     client = AsyncOpenAI(api_key="z")
     returned_stream = get_model_response(
@@ -3986,12 +3996,11 @@ async def test_streaming_responses_api_ttft_async(
             async for _ in response_stream:
                 pass
 
-    (tx,) = events
-    span = tx["spans"][0]
-    assert span["op"] == "gen_ai.responses"
+    span = next(item.payload for item in items if item.type == "span")
+    assert span["attributes"]["sentry.op"] == "gen_ai.responses"
 
     # Verify TTFT is captured
-    assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in span["data"]
-    ttft = span["data"][SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN]
+    assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in span["attributes"]
+    ttft = span["attributes"][SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN]
     assert isinstance(ttft, float)
     assert ttft > 0

From 1fab6321ef8a6eb80ecc8fc44c2c733c959a62b4 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Thu, 16 Apr 2026 11:43:47 +0200
Subject: [PATCH 11/84] anthropic tests

---
 .../integrations/anthropic/test_anthropic.py  | 1478 +++++++++--------
 1 file changed, 747 insertions(+), 731 deletions(-)

diff --git a/tests/integrations/anthropic/test_anthropic.py b/tests/integrations/anthropic/test_anthropic.py
index e86f7e1fa9..c7fc280b6c 100644
--- a/tests/integrations/anthropic/test_anthropic.py
+++ b/tests/integrations/anthropic/test_anthropic.py
@@ -91,14 +91,14 @@ async def __call__(self, *args, **kwargs):
     ],
 )
 def test_nonstreaming_create_message(
-    sentry_init, capture_events, send_default_pii, include_prompts
+    sentry_init, capture_items, send_default_pii, include_prompts
 ):
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
     client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE)
 
@@ -120,37 +120,38 @@ def test_nonstreaming_create_message(
     assert usage.input_tokens == 10
     assert usage.output_tokens == 20
 
-    assert len(events) == 1
-    (event,) = events
-
-    assert event["type"] == "transaction"
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["transaction"] == "anthropic"
 
-    assert len(event["spans"]) == 1
-    (span,) = event["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
+    assert len(spans) == 1
+    (span,) = spans
 
-    assert span["op"] == OP.GEN_AI_CHAT
-    assert span["description"] == "chat model"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    assert span["name"] == "chat model"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
     if send_default_pii and include_prompts:
         assert (
-            span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
             == '[{"role": "user", "content": "Hello, Claude"}]'
         )
-        assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude."
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude."
     else:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
 
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL"
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["end_turn"]
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False
+    assert (
+        span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID]
+        == "msg_01XFDUDYJgAACzvnptvVoYEL"
+    )
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["end_turn"]
 
 
 @pytest.mark.asyncio
@@ -164,14 +165,14 @@ def test_nonstreaming_create_message(
     ],
 )
 async def test_nonstreaming_create_message_async(
-    sentry_init, capture_events, send_default_pii, include_prompts
+    sentry_init, capture_items, send_default_pii, include_prompts
 ):
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
     client = AsyncAnthropic(api_key="z")
     client.messages._post = AsyncMock(return_value=EXAMPLE_MESSAGE)
 
@@ -193,36 +194,37 @@ async def test_nonstreaming_create_message_async(
     assert usage.input_tokens == 10
     assert usage.output_tokens == 20
 
-    assert len(events) == 1
-    (event,) = events
-
-    assert event["type"] == "transaction"
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["transaction"] == "anthropic"
 
-    assert len(event["spans"]) == 1
-    (span,) = event["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
+    assert len(spans) == 1
+    (span,) = spans
 
-    assert span["op"] == OP.GEN_AI_CHAT
-    assert span["description"] == "chat model"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    assert span["name"] == "chat model"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
     if send_default_pii and include_prompts:
         assert (
-            span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
             == '[{"role": "user", "content": "Hello, Claude"}]'
         )
-        assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude."
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude."
     else:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
 
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL"
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False
+    assert (
+        span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID]
+        == "msg_01XFDUDYJgAACzvnptvVoYEL"
+    )
 
 
 @pytest.mark.parametrize(
@@ -236,7 +238,7 @@ async def test_nonstreaming_create_message_async(
 )
 def test_streaming_create_message(
     sentry_init,
-    capture_events,
+    capture_items,
     send_default_pii,
     include_prompts,
     get_model_response,
@@ -286,7 +288,7 @@ def test_streaming_create_message(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -308,42 +310,45 @@ def test_streaming_create_message(
             for _ in message:
                 pass
 
-    assert len(events) == 1
-    (event,) = events
-
-    assert event["type"] == "transaction"
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["transaction"] == "anthropic"
 
-    span = next(span for span in event["spans"] if span["op"] == OP.GEN_AI_CHAT)
+    spans = [item.payload for item in items if item.type == "span"]
+    span = next(
+        span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    )
 
-    assert span["op"] == OP.GEN_AI_CHAT
-    assert span["description"] == "chat model"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    assert span["name"] == "chat model"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
     if send_default_pii and include_prompts:
         assert (
-            span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
             == '[{"role": "user", "content": "Hello, Claude"}]'
         )
-        assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!"
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!"
 
     else:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
 
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL"
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["max_tokens"]
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+    assert (
+        span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID]
+        == "msg_01XFDUDYJgAACzvnptvVoYEL"
+    )
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["max_tokens"]
 
 
 def test_streaming_create_message_close(
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     server_side_event_chunks,
 ):
@@ -391,7 +396,7 @@ def test_streaming_create_message_close(
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -415,31 +420,34 @@ def test_streaming_create_message_close(
 
             messages.close()
 
-    assert len(events) == 1
-    (event,) = events
-
-    assert event["type"] == "transaction"
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["transaction"] == "anthropic"
 
-    span = next(span for span in event["spans"] if span["op"] == OP.GEN_AI_CHAT)
+    spans = [item.payload for item in items if item.type == "span"]
+    span = next(
+        span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    )
 
-    assert span["op"] == OP.GEN_AI_CHAT
-    assert span["description"] == "chat model"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    assert span["name"] == "chat model"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
     assert (
-        span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
         == '[{"role": "user", "content": "Hello, Claude"}]'
     )
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!"
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!"
 
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL"
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+    assert (
+        span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID]
+        == "msg_01XFDUDYJgAACzvnptvVoYEL"
+    )
 
 
 @pytest.mark.skipif(
@@ -448,7 +456,7 @@ def test_streaming_create_message_close(
 )
 def test_streaming_create_message_api_error(
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     server_side_event_chunks,
 ):
@@ -491,7 +499,7 @@ def test_streaming_create_message_api_error(
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -513,34 +521,36 @@ def test_streaming_create_message_api_error(
             for _ in message:
                 pass
 
-    assert len(events) == 1
-    (event,) = events
-
-    assert event["type"] == "transaction"
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["transaction"] == "anthropic"
 
-    span = next(span for span in event["spans"] if span["op"] == OP.GEN_AI_CHAT)
+    spans = [item.payload for item in items if item.type == "span"]
+    span = next(
+        span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    )
 
-    assert span["op"] == OP.GEN_AI_CHAT
-    assert span["description"] == "chat model"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    assert span["name"] == "chat model"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
     assert (
-        span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
         == '[{"role": "user", "content": "Hello, Claude"}]'
     )
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!"
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!"
 
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL"
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+    assert (
+        span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID]
+        == "msg_01XFDUDYJgAACzvnptvVoYEL"
+    )
 
-    assert span["status"] == "internal_error"
-    assert span["tags"]["status"] == "internal_error"
+    assert span["status"] == "error"
     assert event["contexts"]["trace"]["status"] == "internal_error"
 
 
@@ -555,7 +565,7 @@ def test_streaming_create_message_api_error(
 )
 def test_stream_messages(
     sentry_init,
-    capture_events,
+    capture_items,
     send_default_pii,
     include_prompts,
     get_model_response,
@@ -605,7 +615,7 @@ def test_stream_messages(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -628,42 +638,45 @@ def test_stream_messages(
                 for event in stream:
                     pass
 
-    assert len(events) == 1
-    (event,) = events
-
-    assert event["type"] == "transaction"
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["transaction"] == "anthropic"
 
-    span = next(span for span in event["spans"] if span["op"] == OP.GEN_AI_CHAT)
+    spans = [item.payload for item in items if item.type == "span"]
+    span = next(
+        span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    )
 
-    assert span["op"] == OP.GEN_AI_CHAT
-    assert span["description"] == "chat model"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    assert span["name"] == "chat model"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
     if send_default_pii and include_prompts:
         assert (
-            span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
             == '[{"role": "user", "content": "Hello, Claude"}]'
         )
-        assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!"
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!"
 
     else:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
 
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL"
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["max_tokens"]
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+    assert (
+        span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID]
+        == "msg_01XFDUDYJgAACzvnptvVoYEL"
+    )
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["max_tokens"]
 
 
 def test_stream_messages_close(
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     server_side_event_chunks,
 ):
@@ -711,7 +724,7 @@ def test_stream_messages_close(
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -740,31 +753,34 @@ def test_stream_messages_close(
 
                 stream.close()
 
-    assert len(events) == 1
-    (event,) = events
-
-    assert event["type"] == "transaction"
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["transaction"] == "anthropic"
 
-    span = next(span for span in event["spans"] if span["op"] == OP.GEN_AI_CHAT)
+    spans = [item.payload for item in items if item.type == "span"]
+    span = next(
+        span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    )
 
-    assert span["op"] == OP.GEN_AI_CHAT
-    assert span["description"] == "chat model"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    assert span["name"] == "chat model"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
     assert (
-        span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
         == '[{"role": "user", "content": "Hello, Claude"}]'
     )
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!"
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!"
 
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL"
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+    assert (
+        span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID]
+        == "msg_01XFDUDYJgAACzvnptvVoYEL"
+    )
 
 
 @pytest.mark.skipif(
@@ -773,7 +789,7 @@ def test_stream_messages_close(
 )
 def test_stream_messages_api_error(
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     server_side_event_chunks,
 ):
@@ -816,7 +832,7 @@ def test_stream_messages_api_error(
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -839,34 +855,36 @@ def test_stream_messages_api_error(
                 for event in stream:
                     pass
 
-    assert len(events) == 1
-    (event,) = events
-
-    assert event["type"] == "transaction"
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["transaction"] == "anthropic"
 
-    span = next(span for span in event["spans"] if span["op"] == OP.GEN_AI_CHAT)
+    spans = [item.payload for item in items if item.type == "span"]
+    span = next(
+        span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    )
 
-    assert span["op"] == OP.GEN_AI_CHAT
-    assert span["description"] == "chat model"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    assert span["name"] == "chat model"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
     assert (
-        span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
         == '[{"role": "user", "content": "Hello, Claude"}]'
     )
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!"
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!"
 
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL"
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+    assert (
+        span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID]
+        == "msg_01XFDUDYJgAACzvnptvVoYEL"
+    )
 
-    assert span["status"] == "internal_error"
-    assert span["tags"]["status"] == "internal_error"
+    assert span["status"] == "error"
     assert event["contexts"]["trace"]["status"] == "internal_error"
 
 
@@ -882,7 +900,7 @@ def test_stream_messages_api_error(
 )
 async def test_streaming_create_message_async(
     sentry_init,
-    capture_events,
+    capture_items,
     send_default_pii,
     include_prompts,
     get_model_response,
@@ -936,7 +954,7 @@ async def test_streaming_create_message_async(
         default_integrations=False,
         send_default_pii=send_default_pii,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -958,44 +976,45 @@ async def test_streaming_create_message_async(
             async for _ in message:
                 pass
 
-    assert len(events) == 1
-    (event,) = events
-
-    assert event["type"] == "transaction"
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["transaction"] == "anthropic"
 
-    assert len(event["spans"]) == 1
-    (span,) = event["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
+    assert len(spans) == 1
+    (span,) = spans
 
-    assert span["op"] == OP.GEN_AI_CHAT
-    assert span["description"] == "chat model"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    assert span["name"] == "chat model"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
     if send_default_pii and include_prompts:
         assert (
-            span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
             == '[{"role": "user", "content": "Hello, Claude"}]'
         )
-        assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!"
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!"
 
     else:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
 
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL"
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["max_tokens"]
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+    assert (
+        span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID]
+        == "msg_01XFDUDYJgAACzvnptvVoYEL"
+    )
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["max_tokens"]
 
 
 @pytest.mark.asyncio
 async def test_streaming_create_message_async_close(
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     async_iterator,
     server_side_event_chunks,
@@ -1046,7 +1065,7 @@ async def test_streaming_create_message_async_close(
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -1069,31 +1088,34 @@ async def test_streaming_create_message_async_close(
                 await messages.__anext__()
             await messages.close()
 
-    assert len(events) == 1
-    (event,) = events
-
-    assert event["type"] == "transaction"
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["transaction"] == "anthropic"
 
-    span = next(span for span in event["spans"] if span["op"] == OP.GEN_AI_CHAT)
+    spans = [item.payload for item in items if item.type == "span"]
+    span = next(
+        span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    )
 
-    assert span["op"] == OP.GEN_AI_CHAT
-    assert span["description"] == "chat model"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    assert span["name"] == "chat model"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
     assert (
-        span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
         == '[{"role": "user", "content": "Hello, Claude"}]'
     )
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!"
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!"
 
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL"
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+    assert (
+        span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID]
+        == "msg_01XFDUDYJgAACzvnptvVoYEL"
+    )
 
 
 @pytest.mark.skipif(
@@ -1103,7 +1125,7 @@ async def test_streaming_create_message_async_close(
 @pytest.mark.asyncio
 async def test_streaming_create_message_async_api_error(
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     async_iterator,
     server_side_event_chunks,
@@ -1149,7 +1171,7 @@ async def test_streaming_create_message_async_api_error(
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -1171,34 +1193,36 @@ async def test_streaming_create_message_async_api_error(
             async for _ in message:
                 pass
 
-    assert len(events) == 1
-    (event,) = events
-
-    assert event["type"] == "transaction"
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["transaction"] == "anthropic"
 
-    span = next(span for span in event["spans"] if span["op"] == OP.GEN_AI_CHAT)
+    spans = [item.payload for item in items if item.type == "span"]
+    span = next(
+        span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    )
 
-    assert span["op"] == OP.GEN_AI_CHAT
-    assert span["description"] == "chat model"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    assert span["name"] == "chat model"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
     assert (
-        span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
         == '[{"role": "user", "content": "Hello, Claude"}]'
     )
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!"
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!"
 
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL"
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+    assert (
+        span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID]
+        == "msg_01XFDUDYJgAACzvnptvVoYEL"
+    )
 
-    assert span["status"] == "internal_error"
-    assert span["tags"]["status"] == "internal_error"
+    assert span["status"] == "error"
     assert event["contexts"]["trace"]["status"] == "internal_error"
 
 
@@ -1214,7 +1238,7 @@ async def test_streaming_create_message_async_api_error(
 )
 async def test_stream_message_async(
     sentry_init,
-    capture_events,
+    capture_items,
     send_default_pii,
     include_prompts,
     get_model_response,
@@ -1267,7 +1291,7 @@ async def test_stream_message_async(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -1290,37 +1314,38 @@ async def test_stream_message_async(
                 async for event in stream:
                     pass
 
-    assert len(events) == 1
-    (event,) = events
-
-    assert event["type"] == "transaction"
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["transaction"] == "anthropic"
 
-    assert len(event["spans"]) == 1
-    (span,) = event["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
+    assert len(spans) == 1
+    (span,) = spans
 
-    assert span["op"] == OP.GEN_AI_CHAT
-    assert span["description"] == "chat model"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    assert span["name"] == "chat model"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
     if send_default_pii and include_prompts:
         assert (
-            span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
             == '[{"role": "user", "content": "Hello, Claude"}]'
         )
-        assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!"
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!"
 
     else:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
 
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL"
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+    assert (
+        span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID]
+        == "msg_01XFDUDYJgAACzvnptvVoYEL"
+    )
 
 
 @pytest.mark.skipif(
@@ -1330,7 +1355,7 @@ async def test_stream_message_async(
 @pytest.mark.asyncio
 async def test_stream_messages_async_api_error(
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     async_iterator,
     server_side_event_chunks,
@@ -1376,7 +1401,7 @@ async def test_stream_messages_async_api_error(
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -1399,41 +1424,43 @@ async def test_stream_messages_async_api_error(
                 async for event in stream:
                     pass
 
-    assert len(events) == 1
-    (event,) = events
-
-    assert event["type"] == "transaction"
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["transaction"] == "anthropic"
 
-    span = next(span for span in event["spans"] if span["op"] == OP.GEN_AI_CHAT)
+    spans = [item.payload for item in items if item.type == "span"]
+    span = next(
+        span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    )
 
-    assert span["op"] == OP.GEN_AI_CHAT
-    assert span["description"] == "chat model"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    assert span["name"] == "chat model"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
     assert (
-        span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
         == '[{"role": "user", "content": "Hello, Claude"}]'
     )
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!"
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!"
 
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL"
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+    assert (
+        span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID]
+        == "msg_01XFDUDYJgAACzvnptvVoYEL"
+    )
 
-    assert span["status"] == "internal_error"
-    assert span["tags"]["status"] == "internal_error"
+    assert span["status"] == "error"
     assert event["contexts"]["trace"]["status"] == "internal_error"
 
 
 @pytest.mark.asyncio
 async def test_stream_messages_async_close(
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     async_iterator,
     server_side_event_chunks,
@@ -1484,7 +1511,7 @@ async def test_stream_messages_async_close(
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -1515,31 +1542,34 @@ async def test_stream_messages_async_close(
 
                 await stream.close()
 
-    assert len(events) == 1
-    (event,) = events
-
-    assert event["type"] == "transaction"
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["transaction"] == "anthropic"
 
-    span = next(span for span in event["spans"] if span["op"] == OP.GEN_AI_CHAT)
+    spans = [item.payload for item in items if item.type == "span"]
+    span = next(
+        span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    )
 
-    assert span["op"] == OP.GEN_AI_CHAT
-    assert span["description"] == "chat model"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    assert span["name"] == "chat model"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
     assert (
-        span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
         == '[{"role": "user", "content": "Hello, Claude"}]'
     )
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!"
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!"
 
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL"
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+    assert (
+        span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID]
+        == "msg_01XFDUDYJgAACzvnptvVoYEL"
+    )
 
 
 @pytest.mark.skipif(
@@ -1557,7 +1587,7 @@ async def test_stream_messages_async_close(
 )
 def test_streaming_create_message_with_input_json_delta(
     sentry_init,
-    capture_events,
+    capture_items,
     send_default_pii,
     include_prompts,
     get_model_response,
@@ -1637,7 +1667,7 @@ def test_streaming_create_message_with_input_json_delta(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -1659,38 +1689,36 @@ def test_streaming_create_message_with_input_json_delta(
             for _ in message:
                 pass
 
-    assert len(events) == 1
-    (event,) = events
-
-    assert event["type"] == "transaction"
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["transaction"] == "anthropic"
 
-    assert len(event["spans"]) == 1
-    (span,) = event["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
+    assert len(spans) == 1
+    (span,) = spans
 
-    assert span["op"] == OP.GEN_AI_CHAT
-    assert span["description"] == "chat model"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    assert span["name"] == "chat model"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
     if send_default_pii and include_prompts:
         assert (
-            span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
             == '[{"role": "user", "content": "What is the weather like in San Francisco?"}]'
         )
         assert (
-            span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+            span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
             == '{"location": "San Francisco, CA"}'
         )
     else:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
 
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 366
-    assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 41
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 407
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 366
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 41
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 407
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
 
 
 @pytest.mark.skipif(
@@ -1708,7 +1736,7 @@ def test_streaming_create_message_with_input_json_delta(
 )
 def test_stream_messages_with_input_json_delta(
     sentry_init,
-    capture_events,
+    capture_items,
     send_default_pii,
     include_prompts,
     get_model_response,
@@ -1788,7 +1816,7 @@ def test_stream_messages_with_input_json_delta(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -1811,38 +1839,36 @@ def test_stream_messages_with_input_json_delta(
                 for event in stream:
                     pass
 
-    assert len(events) == 1
-    (event,) = events
-
-    assert event["type"] == "transaction"
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["transaction"] == "anthropic"
 
-    assert len(event["spans"]) == 1
-    (span,) = event["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
+    assert len(spans) == 1
+    (span,) = spans
 
-    assert span["op"] == OP.GEN_AI_CHAT
-    assert span["description"] == "chat model"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    assert span["name"] == "chat model"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
     if send_default_pii and include_prompts:
         assert (
-            span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
             == '[{"role": "user", "content": "What is the weather like in San Francisco?"}]'
         )
         assert (
-            span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+            span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
             == '{"location": "San Francisco, CA"}'
         )
     else:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
 
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 366
-    assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 41
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 407
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 366
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 41
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 407
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
 
 
 @pytest.mark.asyncio
@@ -1861,7 +1887,7 @@ def test_stream_messages_with_input_json_delta(
 )
 async def test_streaming_create_message_with_input_json_delta_async(
     sentry_init,
-    capture_events,
+    capture_items,
     send_default_pii,
     include_prompts,
     get_model_response,
@@ -1947,7 +1973,7 @@ async def test_streaming_create_message_with_input_json_delta_async(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -1969,39 +1995,37 @@ async def test_streaming_create_message_with_input_json_delta_async(
             async for _ in message:
                 pass
 
-    assert len(events) == 1
-    (event,) = events
-
-    assert event["type"] == "transaction"
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["transaction"] == "anthropic"
 
-    assert len(event["spans"]) == 1
-    (span,) = event["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
+    assert len(spans) == 1
+    (span,) = spans
 
-    assert span["op"] == OP.GEN_AI_CHAT
-    assert span["description"] == "chat model"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    assert span["name"] == "chat model"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
     if send_default_pii and include_prompts:
         assert (
-            span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
             == '[{"role": "user", "content": "What is the weather like in San Francisco?"}]'
         )
         assert (
-            span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+            span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
             == '{"location": "San Francisco, CA"}'
         )
 
     else:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
 
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 366
-    assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 41
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 407
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 366
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 41
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 407
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
 
 
 @pytest.mark.asyncio
@@ -2020,7 +2044,7 @@ async def test_streaming_create_message_with_input_json_delta_async(
 )
 async def test_stream_message_with_input_json_delta_async(
     sentry_init,
-    capture_events,
+    capture_items,
     send_default_pii,
     include_prompts,
     get_model_response,
@@ -2106,7 +2130,7 @@ async def test_stream_message_with_input_json_delta_async(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -2129,44 +2153,42 @@ async def test_stream_message_with_input_json_delta_async(
                 async for event in stream:
                     pass
 
-    assert len(events) == 1
-    (event,) = events
-
-    assert event["type"] == "transaction"
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["transaction"] == "anthropic"
 
-    assert len(event["spans"]) == 1
-    (span,) = event["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
+    assert len(spans) == 1
+    (span,) = spans
 
-    assert span["op"] == OP.GEN_AI_CHAT
-    assert span["description"] == "chat model"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    assert span["name"] == "chat model"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
     if send_default_pii and include_prompts:
         assert (
-            span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
             == '[{"role": "user", "content": "What is the weather like in San Francisco?"}]'
         )
         assert (
-            span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+            span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
             == '{"location": "San Francisco, CA"}'
         )
 
     else:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
 
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 366
-    assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 41
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 407
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 366
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 41
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 407
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
 
 
-def test_exception_message_create(sentry_init, capture_events):
+def test_exception_message_create(sentry_init, capture_items):
     sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
-    events = capture_events()
+    items = capture_items("event", "transaction")
 
     client = Anthropic(api_key="z")
     client.messages._post = mock.Mock(
@@ -2179,14 +2201,16 @@ def test_exception_message_create(sentry_init, capture_events):
             max_tokens=1024,
         )
 
-    (event, transaction) = events
+    (event,) = (item.payload for item in items if item.type == "event")
     assert event["level"] == "error"
+
+    (transaction,) = (item.payload for item in items if item.type == "transaction")
     assert transaction["contexts"]["trace"]["status"] == "internal_error"
 
 
-def test_span_status_error(sentry_init, capture_events):
+def test_span_status_error(sentry_init, capture_items):
     sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
-    events = capture_events()
+    items = capture_items("event", "span")
 
     with start_transaction(name="anthropic"):
         client = Anthropic(api_key="z")
@@ -2200,18 +2224,19 @@ def test_span_status_error(sentry_init, capture_events):
                 max_tokens=1024,
             )
 
-    (error, transaction) = events
+    (error,) = (item.payload for item in items if item.type == "event")
     assert error["level"] == "error"
-    assert transaction["spans"][0]["status"] == "internal_error"
-    assert transaction["spans"][0]["tags"]["status"] == "internal_error"
-    assert transaction["spans"][0]["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert transaction["spans"][0]["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+
+    spans = [item.payload for item in items if item.type == "span"]
+    assert spans[0]["status"] == "error"
+    assert spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert spans[0]["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
 
 
 @pytest.mark.asyncio
-async def test_span_status_error_async(sentry_init, capture_events):
+async def test_span_status_error_async(sentry_init, capture_items):
     sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
-    events = capture_events()
+    items = capture_items("event", "span")
 
     with start_transaction(name="anthropic"):
         client = AsyncAnthropic(api_key="z")
@@ -2225,18 +2250,19 @@ async def test_span_status_error_async(sentry_init, capture_events):
                 max_tokens=1024,
             )
 
-    (error, transaction) = events
+    (error,) = (item.payload for item in items if item.type == "event")
     assert error["level"] == "error"
-    assert transaction["spans"][0]["status"] == "internal_error"
-    assert transaction["spans"][0]["tags"]["status"] == "internal_error"
-    assert transaction["spans"][0]["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert transaction["spans"][0]["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+
+    spans = [item.payload for item in items if item.type == "span"]
+    assert spans[0]["status"] == "error"
+    assert spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert spans[0]["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
 
 
 @pytest.mark.asyncio
-async def test_exception_message_create_async(sentry_init, capture_events):
+async def test_exception_message_create_async(sentry_init, capture_items):
     sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
-    events = capture_events()
+    items = capture_items("event", "transaction")
 
     client = AsyncAnthropic(api_key="z")
     client.messages._post = AsyncMock(
@@ -2249,17 +2275,19 @@ async def test_exception_message_create_async(sentry_init, capture_events):
             max_tokens=1024,
         )
 
-    (event, transaction) = events
+    (event,) = (item.payload for item in items if item.type == "event")
     assert event["level"] == "error"
+
+    (transaction,) = (item.payload for item in items if item.type == "transaction")
     assert transaction["contexts"]["trace"]["status"] == "internal_error"
 
 
-def test_span_origin(sentry_init, capture_events):
+def test_span_origin(sentry_init, capture_items):
     sentry_init(
         integrations=[AnthropicIntegration()],
         traces_sample_rate=1.0,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     client = Anthropic(api_key="z")
     client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE)
@@ -2274,21 +2302,22 @@ def test_span_origin(sentry_init, capture_events):
     with start_transaction(name="anthropic"):
         client.messages.create(max_tokens=1024, messages=messages, model="model")
 
-    (event,) = events
-
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["contexts"]["trace"]["origin"] == "manual"
-    assert event["spans"][0]["origin"] == "auto.ai.anthropic"
-    assert event["spans"][0]["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert event["spans"][0]["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+
+    spans = [item.payload for item in items if item.type == "span"]
+    assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.anthropic"
+    assert spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert spans[0]["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
 
 
 @pytest.mark.asyncio
-async def test_span_origin_async(sentry_init, capture_events):
+async def test_span_origin_async(sentry_init, capture_items):
     sentry_init(
         integrations=[AnthropicIntegration()],
         traces_sample_rate=1.0,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     client = AsyncAnthropic(api_key="z")
     client.messages._post = AsyncMock(return_value=EXAMPLE_MESSAGE)
@@ -2303,12 +2332,13 @@ async def test_span_origin_async(sentry_init, capture_events):
     with start_transaction(name="anthropic"):
         await client.messages.create(max_tokens=1024, messages=messages, model="model")
 
-    (event,) = events
-
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["contexts"]["trace"]["origin"] == "manual"
-    assert event["spans"][0]["origin"] == "auto.ai.anthropic"
-    assert event["spans"][0]["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert event["spans"][0]["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+
+    spans = [item.payload for item in items if item.type == "span"]
+    assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.anthropic"
+    assert spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert spans[0]["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
 
 
 @pytest.mark.skipif(
@@ -2392,7 +2422,7 @@ def test_set_output_data_with_input_json_delta(sentry_init):
     ],
 )
 def test_anthropic_message_role_mapping(
-    sentry_init, capture_events, test_message, expected_role
+    sentry_init, capture_items, test_message, expected_role
 ):
     """Test that Anthropic integration properly maps message roles like 'ai' to 'assistant'"""
     sentry_init(
@@ -2400,7 +2430,7 @@ def test_anthropic_message_role_mapping(
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     client = Anthropic(api_key="z")
 
@@ -2425,29 +2455,28 @@ def mock_messages_create(*args, **kwargs):
             model="claude-3-opus", max_tokens=10, messages=test_messages
         )
 
-    (event,) = events
-    span = event["spans"][0]
+    span = next(item.payload for item in items if item.type == "span")
 
     # Verify that the span was created correctly
-    assert span["op"] == "gen_ai.chat"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]
+    assert span["attributes"]["sentry.op"] == "gen_ai.chat"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]
 
     # Parse the stored messages
-    stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
 
     assert stored_messages[0]["role"] == expected_role
 
 
-def test_anthropic_message_truncation(sentry_init, capture_events):
+def test_anthropic_message_truncation(sentry_init, capture_items):
     """Test that large messages are truncated properly in Anthropic integration."""
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     client = Anthropic(api_key="z")
     client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE)
@@ -2466,21 +2495,18 @@ def test_anthropic_message_truncation(sentry_init, capture_events):
     with start_transaction():
         client.messages.create(max_tokens=1024, messages=messages, model="model")
 
-    assert len(events) > 0
-    tx = events[0]
-    assert tx["type"] == "transaction"
-
+    spans = [item.payload for item in items if item.type == "span"]
     chat_spans = [
-        span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT
+        span for span in spans if span["attributes"].get("sentry.op") == OP.GEN_AI_CHAT
     ]
     assert len(chat_spans) > 0
 
     chat_span = chat_spans[0]
-    assert chat_span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert chat_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"]
+    assert chat_span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert chat_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["attributes"]
 
-    messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+    messages_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
     assert isinstance(messages_data, str)
 
     parsed_messages = json.loads(messages_data)
@@ -2488,18 +2514,19 @@ def test_anthropic_message_truncation(sentry_init, capture_events):
     assert len(parsed_messages) == 1
     assert "small message 5" in str(parsed_messages[0])
 
+    tx = next(item.payload for item in items if item.type == "transaction")
     assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5
 
 
 @pytest.mark.asyncio
-async def test_anthropic_message_truncation_async(sentry_init, capture_events):
+async def test_anthropic_message_truncation_async(sentry_init, capture_items):
     """Test that large messages are truncated properly in Anthropic integration."""
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     client = AsyncAnthropic(api_key="z")
     client.messages._post = mock.AsyncMock(return_value=EXAMPLE_MESSAGE)
@@ -2518,21 +2545,18 @@ async def test_anthropic_message_truncation_async(sentry_init, capture_events):
     with start_transaction():
         await client.messages.create(max_tokens=1024, messages=messages, model="model")
 
-    assert len(events) > 0
-    tx = events[0]
-    assert tx["type"] == "transaction"
-
+    spans = [item.payload for item in items if item.type == "span"]
     chat_spans = [
-        span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT
+        span for span in spans if span["attributes"].get("sentry.op") == OP.GEN_AI_CHAT
     ]
     assert len(chat_spans) > 0
 
     chat_span = chat_spans[0]
-    assert chat_span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert chat_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"]
+    assert chat_span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert chat_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["attributes"]
 
-    messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+    messages_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
     assert isinstance(messages_data, str)
 
     parsed_messages = json.loads(messages_data)
@@ -2540,6 +2564,7 @@ async def test_anthropic_message_truncation_async(sentry_init, capture_events):
     assert len(parsed_messages) == 1
     assert "small message 5" in str(parsed_messages[0])
 
+    tx = next(item.payload for item in items if item.type == "transaction")
     assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5
 
 
@@ -2553,7 +2578,7 @@ async def test_anthropic_message_truncation_async(sentry_init, capture_events):
     ],
 )
 def test_nonstreaming_create_message_with_system_prompt(
-    sentry_init, capture_events, send_default_pii, include_prompts
+    sentry_init, capture_items, send_default_pii, include_prompts
 ):
     """Test that system prompts are properly captured in GEN_AI_REQUEST_MESSAGES."""
     sentry_init(
@@ -2561,7 +2586,7 @@ def test_nonstreaming_create_message_with_system_prompt(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
     client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE)
 
@@ -2586,46 +2611,46 @@ def test_nonstreaming_create_message_with_system_prompt(
     assert usage.input_tokens == 10
     assert usage.output_tokens == 20
 
-    assert len(events) == 1
-    (event,) = events
-
-    assert event["type"] == "transaction"
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["transaction"] == "anthropic"
 
-    assert len(event["spans"]) == 1
-    (span,) = event["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
+    assert len(spans) == 1
+    (span,) = spans
 
-    assert span["op"] == OP.GEN_AI_CHAT
-    assert span["description"] == "chat model"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    assert span["name"] == "chat model"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
     if send_default_pii and include_prompts:
-        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["data"]
+        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["attributes"]
         system_instructions = json.loads(
-            span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
+            span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
         )
         assert system_instructions == [
             {"type": "text", "content": "You are a helpful assistant."}
         ]
 
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]
-        stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]
+        stored_messages = json.loads(
+            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        )
         assert len(stored_messages) == 1
         assert stored_messages[0]["role"] == "user"
         assert stored_messages[0]["content"] == "Hello, Claude"
-        assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude."
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude."
     else:
-        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"]
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
 
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["end_turn"]
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["end_turn"]
 
 
 @pytest.mark.asyncio
@@ -2639,7 +2664,7 @@ def test_nonstreaming_create_message_with_system_prompt(
     ],
 )
 async def test_nonstreaming_create_message_with_system_prompt_async(
-    sentry_init, capture_events, send_default_pii, include_prompts
+    sentry_init, capture_items, send_default_pii, include_prompts
 ):
     """Test that system prompts are properly captured in GEN_AI_REQUEST_MESSAGES (async)."""
     sentry_init(
@@ -2647,7 +2672,7 @@ async def test_nonstreaming_create_message_with_system_prompt_async(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
     client = AsyncAnthropic(api_key="z")
     client.messages._post = AsyncMock(return_value=EXAMPLE_MESSAGE)
 
@@ -2672,46 +2697,46 @@ async def test_nonstreaming_create_message_with_system_prompt_async(
     assert usage.input_tokens == 10
     assert usage.output_tokens == 20
 
-    assert len(events) == 1
-    (event,) = events
-
-    assert event["type"] == "transaction"
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["transaction"] == "anthropic"
 
-    assert len(event["spans"]) == 1
-    (span,) = event["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
+    assert len(spans) == 1
+    (span,) = spans
 
-    assert span["op"] == OP.GEN_AI_CHAT
-    assert span["description"] == "chat model"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    assert span["name"] == "chat model"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
     if send_default_pii and include_prompts:
-        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["data"]
+        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["attributes"]
         system_instructions = json.loads(
-            span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
+            span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
         )
         assert system_instructions == [
             {"type": "text", "content": "You are a helpful assistant."}
         ]
 
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]
-        stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]
+        stored_messages = json.loads(
+            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        )
         assert len(stored_messages) == 1
         assert stored_messages[0]["role"] == "user"
         assert stored_messages[0]["content"] == "Hello, Claude"
-        assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude."
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude."
     else:
-        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"]
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
 
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["end_turn"]
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["end_turn"]
 
 
 @pytest.mark.parametrize(
@@ -2725,7 +2750,7 @@ async def test_nonstreaming_create_message_with_system_prompt_async(
 )
 def test_streaming_create_message_with_system_prompt(
     sentry_init,
-    capture_events,
+    capture_items,
     send_default_pii,
     include_prompts,
     get_model_response,
@@ -2776,7 +2801,7 @@ def test_streaming_create_message_with_system_prompt(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -2802,46 +2827,46 @@ def test_streaming_create_message_with_system_prompt(
             for _ in message:
                 pass
 
-    assert len(events) == 1
-    (event,) = events
-
-    assert event["type"] == "transaction"
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["transaction"] == "anthropic"
 
-    assert len(event["spans"]) == 1
-    (span,) = event["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
+    assert len(spans) == 1
+    (span,) = spans
 
-    assert span["op"] == OP.GEN_AI_CHAT
-    assert span["description"] == "chat model"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    assert span["name"] == "chat model"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
     if send_default_pii and include_prompts:
-        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["data"]
+        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["attributes"]
         system_instructions = json.loads(
-            span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
+            span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
         )
         assert system_instructions == [
             {"type": "text", "content": "You are a helpful assistant."}
         ]
 
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]
-        stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]
+        stored_messages = json.loads(
+            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        )
         assert len(stored_messages) == 1
         assert stored_messages[0]["role"] == "user"
         assert stored_messages[0]["content"] == "Hello, Claude"
-        assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!"
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!"
 
     else:
-        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"]
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
 
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
 
 
 @pytest.mark.parametrize(
@@ -2855,7 +2880,7 @@ def test_streaming_create_message_with_system_prompt(
 )
 def test_stream_messages_with_system_prompt(
     sentry_init,
-    capture_events,
+    capture_items,
     send_default_pii,
     include_prompts,
     get_model_response,
@@ -2906,7 +2931,7 @@ def test_stream_messages_with_system_prompt(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -2930,46 +2955,46 @@ def test_stream_messages_with_system_prompt(
                 for event in stream:
                     pass
 
-    assert len(events) == 1
-    (event,) = events
-
-    assert event["type"] == "transaction"
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["transaction"] == "anthropic"
 
-    assert len(event["spans"]) == 1
-    (span,) = event["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
+    assert len(spans) == 1
+    (span,) = spans
 
-    assert span["op"] == OP.GEN_AI_CHAT
-    assert span["description"] == "chat model"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    assert span["name"] == "chat model"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
     if send_default_pii and include_prompts:
-        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["data"]
+        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["attributes"]
         system_instructions = json.loads(
-            span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
+            span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
         )
         assert system_instructions == [
             {"type": "text", "content": "You are a helpful assistant."}
         ]
 
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]
-        stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]
+        stored_messages = json.loads(
+            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        )
         assert len(stored_messages) == 1
         assert stored_messages[0]["role"] == "user"
         assert stored_messages[0]["content"] == "Hello, Claude"
-        assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!"
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!"
 
     else:
-        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"]
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
 
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
 
 
 @pytest.mark.asyncio
@@ -2984,7 +3009,7 @@ def test_stream_messages_with_system_prompt(
 )
 async def test_stream_message_with_system_prompt_async(
     sentry_init,
-    capture_events,
+    capture_items,
     send_default_pii,
     include_prompts,
     get_model_response,
@@ -3038,7 +3063,7 @@ async def test_stream_message_with_system_prompt_async(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -3062,46 +3087,46 @@ async def test_stream_message_with_system_prompt_async(
                 async for event in stream:
                     pass
 
-    assert len(events) == 1
-    (event,) = events
-
-    assert event["type"] == "transaction"
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["transaction"] == "anthropic"
 
-    assert len(event["spans"]) == 1
-    (span,) = event["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
+    assert len(spans) == 1
+    (span,) = spans
 
-    assert span["op"] == OP.GEN_AI_CHAT
-    assert span["description"] == "chat model"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    assert span["name"] == "chat model"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
     if send_default_pii and include_prompts:
-        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["data"]
+        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["attributes"]
         system_instructions = json.loads(
-            span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
+            span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
         )
         assert system_instructions == [
             {"type": "text", "content": "You are a helpful assistant."}
         ]
 
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]
-        stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]
+        stored_messages = json.loads(
+            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        )
         assert len(stored_messages) == 1
         assert stored_messages[0]["role"] == "user"
         assert stored_messages[0]["content"] == "Hello, Claude"
-        assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!"
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!"
 
     else:
-        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"]
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
 
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
 
 
 @pytest.mark.asyncio
@@ -3116,7 +3141,7 @@ async def test_stream_message_with_system_prompt_async(
 )
 async def test_streaming_create_message_with_system_prompt_async(
     sentry_init,
-    capture_events,
+    capture_items,
     send_default_pii,
     include_prompts,
     get_model_response,
@@ -3170,7 +3195,7 @@ async def test_streaming_create_message_with_system_prompt_async(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -3196,56 +3221,56 @@ async def test_streaming_create_message_with_system_prompt_async(
             async for _ in message:
                 pass
 
-    assert len(events) == 1
-    (event,) = events
-
-    assert event["type"] == "transaction"
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["transaction"] == "anthropic"
 
-    assert len(event["spans"]) == 1
-    (span,) = event["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
+    assert len(spans) == 1
+    (span,) = spans
 
-    assert span["op"] == OP.GEN_AI_CHAT
-    assert span["description"] == "chat model"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    assert span["name"] == "chat model"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
     if send_default_pii and include_prompts:
-        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["data"]
+        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["attributes"]
         system_instructions = json.loads(
-            span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
+            span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
         )
         assert system_instructions == [
             {"type": "text", "content": "You are a helpful assistant."}
         ]
 
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]
-        stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]
+        stored_messages = json.loads(
+            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        )
         assert len(stored_messages) == 1
         assert stored_messages[0]["role"] == "user"
         assert stored_messages[0]["content"] == "Hello, Claude"
-        assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!"
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!"
 
     else:
-        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"]
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
 
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
 
 
-def test_system_prompt_with_complex_structure(sentry_init, capture_events):
+def test_system_prompt_with_complex_structure(sentry_init, capture_items):
     """Test that complex system prompt structures (list of text blocks) are properly captured."""
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
     client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE)
 
@@ -3268,17 +3293,18 @@ def test_system_prompt_with_complex_structure(sentry_init, capture_events):
         )
 
     assert response == EXAMPLE_MESSAGE
-    assert len(events) == 1
-    (event,) = events
 
-    assert len(event["spans"]) == 1
-    (span,) = event["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
+    assert len(spans) == 1
+    (span,) = spans
 
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
 
-    assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["data"]
-    system_instructions = json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS])
+    assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["attributes"]
+    system_instructions = json.loads(
+        span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
+    )
 
     # System content should be a list of text blocks
     assert isinstance(system_instructions, list)
@@ -3287,8 +3313,8 @@ def test_system_prompt_with_complex_structure(sentry_init, capture_events):
         {"type": "text", "content": "Be concise and clear."},
     ]
 
-    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]
-    stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]
+    stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
 
     assert len(stored_messages) == 1
     assert stored_messages[0]["role"] == "user"
@@ -3490,14 +3516,14 @@ def test_transform_message_content_list_anthropic():
 # Integration tests for binary data in messages
 
 
-def test_message_with_base64_image(sentry_init, capture_events):
+def test_message_with_base64_image(sentry_init, capture_items):
     """Test that messages with base64 images are properly captured."""
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
     client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE)
 
@@ -3521,12 +3547,11 @@ def test_message_with_base64_image(sentry_init, capture_events):
     with start_transaction(name="anthropic"):
         client.messages.create(max_tokens=1024, messages=messages, model="model")
 
-    assert len(events) == 1
-    (event,) = events
-    (span,) = event["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
+    (span,) = spans
 
-    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]
-    stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]
+    stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
 
     assert len(stored_messages) == 1
     assert stored_messages[0]["role"] == "user"
@@ -3541,14 +3566,14 @@ def test_message_with_base64_image(sentry_init, capture_events):
     }
 
 
-def test_message_with_url_image(sentry_init, capture_events):
+def test_message_with_url_image(sentry_init, capture_items):
     """Test that messages with URL-referenced images are properly captured."""
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
     client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE)
 
@@ -3571,11 +3596,10 @@ def test_message_with_url_image(sentry_init, capture_events):
     with start_transaction(name="anthropic"):
         client.messages.create(max_tokens=1024, messages=messages, model="model")
 
-    assert len(events) == 1
-    (event,) = events
-    (span,) = event["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
+    (span,) = spans
 
-    stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     content = stored_messages[0]["content"]
     assert content[1] == {
         "type": "uri",
@@ -3585,14 +3609,14 @@ def test_message_with_url_image(sentry_init, capture_events):
     }
 
 
-def test_message_with_file_image(sentry_init, capture_events):
+def test_message_with_file_image(sentry_init, capture_items):
     """Test that messages with file_id-referenced images are properly captured."""
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
     client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE)
 
@@ -3616,11 +3640,10 @@ def test_message_with_file_image(sentry_init, capture_events):
     with start_transaction(name="anthropic"):
         client.messages.create(max_tokens=1024, messages=messages, model="model")
 
-    assert len(events) == 1
-    (event,) = events
-    (span,) = event["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
+    (span,) = spans
 
-    stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     content = stored_messages[0]["content"]
     assert content[1] == {
         "type": "file",
@@ -3630,14 +3653,14 @@ def test_message_with_file_image(sentry_init, capture_events):
     }
 
 
-def test_message_with_base64_pdf(sentry_init, capture_events):
+def test_message_with_base64_pdf(sentry_init, capture_items):
     """Test that messages with base64-encoded PDF documents are properly captured."""
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
     client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE)
 
@@ -3651,7 +3674,7 @@ def test_message_with_base64_pdf(sentry_init, capture_events):
                     "source": {
                         "type": "base64",
                         "media_type": "application/pdf",
-                        "data": "JVBERi0xLjQKJeLj...base64pdfdata",
+                        "attributes": "JVBERi0xLjQKJeLj...base64pdfdata",
                     },
                 },
             ],
@@ -3661,11 +3684,10 @@ def test_message_with_base64_pdf(sentry_init, capture_events):
     with start_transaction(name="anthropic"):
         client.messages.create(max_tokens=1024, messages=messages, model="model")
 
-    assert len(events) == 1
-    (event,) = events
-    (span,) = event["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
+    (span,) = spans
 
-    stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     content = stored_messages[0]["content"]
     assert content[1] == {
         "type": "blob",
@@ -3675,14 +3697,14 @@ def test_message_with_base64_pdf(sentry_init, capture_events):
     }
 
 
-def test_message_with_url_pdf(sentry_init, capture_events):
+def test_message_with_url_pdf(sentry_init, capture_items):
     """Test that messages with URL-referenced PDF documents are properly captured."""
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
     client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE)
 
@@ -3705,11 +3727,10 @@ def test_message_with_url_pdf(sentry_init, capture_events):
     with start_transaction(name="anthropic"):
         client.messages.create(max_tokens=1024, messages=messages, model="model")
 
-    assert len(events) == 1
-    (event,) = events
-    (span,) = event["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
+    (span,) = spans
 
-    stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     content = stored_messages[0]["content"]
     assert content[1] == {
         "type": "uri",
@@ -3719,14 +3740,14 @@ def test_message_with_url_pdf(sentry_init, capture_events):
     }
 
 
-def test_message_with_file_document(sentry_init, capture_events):
+def test_message_with_file_document(sentry_init, capture_items):
     """Test that messages with file_id-referenced documents are properly captured."""
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
     client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE)
 
@@ -3750,11 +3771,10 @@ def test_message_with_file_document(sentry_init, capture_events):
     with start_transaction(name="anthropic"):
         client.messages.create(max_tokens=1024, messages=messages, model="model")
 
-    assert len(events) == 1
-    (event,) = events
-    (span,) = event["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
+    (span,) = spans
 
-    stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     content = stored_messages[0]["content"]
     assert content[1] == {
         "type": "file",
@@ -3764,14 +3784,14 @@ def test_message_with_file_document(sentry_init, capture_events):
     }
 
 
-def test_message_with_mixed_content(sentry_init, capture_events):
+def test_message_with_mixed_content(sentry_init, capture_items):
     """Test that messages with mixed content (text, images, documents) are properly captured."""
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
     client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE)
 
@@ -3785,7 +3805,7 @@ def test_message_with_mixed_content(sentry_init, capture_events):
                     "source": {
                         "type": "base64",
                         "media_type": "image/png",
-                        "data": "iVBORw0KGgo...base64imagedata",
+                        "attributes": "iVBORw0KGgo...base64imagedata",
                     },
                 },
                 {
@@ -3800,7 +3820,7 @@ def test_message_with_mixed_content(sentry_init, capture_events):
                     "source": {
                         "type": "base64",
                         "media_type": "application/pdf",
-                        "data": "JVBERi0xLjQK...base64pdfdata",
+                        "attributes": "JVBERi0xLjQK...base64pdfdata",
                     },
                 },
                 {"type": "text", "text": "Please provide a detailed analysis."},
@@ -3811,11 +3831,10 @@ def test_message_with_mixed_content(sentry_init, capture_events):
     with start_transaction(name="anthropic"):
         client.messages.create(max_tokens=1024, messages=messages, model="model")
 
-    assert len(events) == 1
-    (event,) = events
-    (span,) = event["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
+    (span,) = spans
 
-    stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     content = stored_messages[0]["content"]
 
     assert len(content) == 5
@@ -3847,14 +3866,14 @@ def test_message_with_mixed_content(sentry_init, capture_events):
     }
 
 
-def test_message_with_multiple_images_different_formats(sentry_init, capture_events):
+def test_message_with_multiple_images_different_formats(sentry_init, capture_items):
     """Test that messages with multiple images of different source types are handled."""
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
     client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE)
 
@@ -3867,7 +3886,7 @@ def test_message_with_multiple_images_different_formats(sentry_init, capture_eve
                     "source": {
                         "type": "base64",
                         "media_type": "image/jpeg",
-                        "data": "base64data1...",
+                        "attributes": "base64data1...",
                     },
                 },
                 {
@@ -3893,11 +3912,10 @@ def test_message_with_multiple_images_different_formats(sentry_init, capture_eve
     with start_transaction(name="anthropic"):
         client.messages.create(max_tokens=1024, messages=messages, model="model")
 
-    assert len(events) == 1
-    (event,) = events
-    (span,) = event["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
+    (span,) = spans
 
-    stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     content = stored_messages[0]["content"]
 
     assert len(content) == 4
@@ -3922,14 +3940,14 @@ def test_message_with_multiple_images_different_formats(sentry_init, capture_eve
     assert content[3] == {"type": "text", "text": "Compare these three images."}
 
 
-def test_binary_content_not_stored_when_pii_disabled(sentry_init, capture_events):
+def test_binary_content_not_stored_when_pii_disabled(sentry_init, capture_items):
     """Test that binary content is not stored when send_default_pii is False."""
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=False,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
     client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE)
 
@@ -3943,7 +3961,7 @@ def test_binary_content_not_stored_when_pii_disabled(sentry_init, capture_events
                     "source": {
                         "type": "base64",
                         "media_type": "image/jpeg",
-                        "data": "base64encodeddatahere...",
+                        "attributes": "base64encodeddatahere...",
                     },
                 },
             ],
@@ -3953,22 +3971,21 @@ def test_binary_content_not_stored_when_pii_disabled(sentry_init, capture_events
     with start_transaction(name="anthropic"):
         client.messages.create(max_tokens=1024, messages=messages, model="model")
 
-    assert len(events) == 1
-    (event,) = events
-    (span,) = event["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
+    (span,) = spans
 
     # Messages should not be stored
-    assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
+    assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
 
 
-def test_binary_content_not_stored_when_prompts_disabled(sentry_init, capture_events):
+def test_binary_content_not_stored_when_prompts_disabled(sentry_init, capture_items):
     """Test that binary content is not stored when include_prompts is False."""
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
     client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE)
 
@@ -3982,7 +3999,7 @@ def test_binary_content_not_stored_when_prompts_disabled(sentry_init, capture_ev
                     "source": {
                         "type": "base64",
                         "media_type": "image/jpeg",
-                        "data": "base64encodeddatahere...",
+                        "attributes": "base64encodeddatahere...",
                     },
                 },
             ],
@@ -3992,18 +4009,17 @@ def test_binary_content_not_stored_when_prompts_disabled(sentry_init, capture_ev
     with start_transaction(name="anthropic"):
         client.messages.create(max_tokens=1024, messages=messages, model="model")
 
-    assert len(events) == 1
-    (event,) = events
-    (span,) = event["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
+    (span,) = spans
 
     # Messages should not be stored
-    assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
+    assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
 
 
-def test_cache_tokens_nonstreaming(sentry_init, capture_events):
+def test_cache_tokens_nonstreaming(sentry_init, capture_items):
     """Test cache read/write tokens are tracked for non-streaming responses."""
     sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
-    events = capture_events()
+    items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
 
     client.messages._post = mock.Mock(
@@ -4029,16 +4045,16 @@ def test_cache_tokens_nonstreaming(sentry_init, capture_events):
             model="claude-3-5-sonnet-20241022",
         )
 
-    (span,) = events[0]["spans"]
+    (span,) = [item.payload for item in items if item.type == "span"]
     # input_tokens normalized: 100 + 80 (cache_read) + 20 (cache_write) = 200
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 200
-    assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 50
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 250
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 200
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 50
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 250
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20
 
 
-def test_input_tokens_include_cache_write_nonstreaming(sentry_init, capture_events):
+def test_input_tokens_include_cache_write_nonstreaming(sentry_init, capture_items):
     """
     Test that gen_ai.usage.input_tokens includes cache_write tokens (non-streaming).
 
@@ -4051,7 +4067,7 @@ def test_input_tokens_include_cache_write_nonstreaming(sentry_init, capture_even
               cache_creation_input_tokens=2846, cache_read_input_tokens=0)
     """
     sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
-    events = capture_events()
+    items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
 
     client.messages._post = mock.Mock(
@@ -4077,16 +4093,16 @@ def test_input_tokens_include_cache_write_nonstreaming(sentry_init, capture_even
             model="claude-sonnet-4-20250514",
         )
 
-    (span,) = events[0]["spans"]
+    (span,) = [item.payload for item in items if item.type == "span"]
 
     # input_tokens should be total: 19 (non-cached) + 2846 (cache_write) = 2865
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 2879  # 2865 + 14
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 0
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 2846
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 2879  # 2865 + 14
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 0
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 2846
 
 
-def test_input_tokens_include_cache_read_nonstreaming(sentry_init, capture_events):
+def test_input_tokens_include_cache_read_nonstreaming(sentry_init, capture_items):
     """
     Test that gen_ai.usage.input_tokens includes cache_read tokens (non-streaming).
 
@@ -4099,7 +4115,7 @@ def test_input_tokens_include_cache_read_nonstreaming(sentry_init, capture_event
               cache_creation_input_tokens=0, cache_read_input_tokens=2846)
     """
     sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
-    events = capture_events()
+    items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
 
     client.messages._post = mock.Mock(
@@ -4125,18 +4141,18 @@ def test_input_tokens_include_cache_read_nonstreaming(sentry_init, capture_event
             model="claude-sonnet-4-20250514",
         )
 
-    (span,) = events[0]["spans"]
+    (span,) = [item.payload for item in items if item.type == "span"]
 
     # input_tokens should be total: 19 (non-cached) + 2846 (cache_read) = 2865
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 2879  # 2865 + 14
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 2846
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 0
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 2879  # 2865 + 14
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 2846
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 0
 
 
 def test_input_tokens_include_cache_read_streaming(
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     server_side_event_chunks,
 ):
@@ -4176,7 +4192,7 @@ def test_input_tokens_include_cache_read_streaming(
     )
 
     sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     with mock.patch.object(
         client._client,
@@ -4192,18 +4208,18 @@ def test_input_tokens_include_cache_read_streaming(
             ):
                 pass
 
-    (span,) = events[0]["spans"]
+    (span,) = [item.payload for item in items if item.type == "span"]
 
     # input_tokens should be total: 19 + 2846 = test_stream_messages_input_tokens_include_cache_read_streaming
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 2879  # 2865 + 14
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 2846
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 0
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 2879  # 2865 + 14
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 2846
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 0
 
 
 def test_stream_messages_input_tokens_include_cache_read_streaming(
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     server_side_event_chunks,
 ):
@@ -4242,7 +4258,7 @@ def test_stream_messages_input_tokens_include_cache_read_streaming(
     )
 
     sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     with mock.patch.object(
         client._client,
@@ -4258,16 +4274,16 @@ def test_stream_messages_input_tokens_include_cache_read_streaming(
                 for event in stream:
                     pass
 
-    (span,) = events[0]["spans"]
+    (span,) = [item.payload for item in items if item.type == "span"]
 
     # input_tokens should be total: 19 + 2846 = 2865
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 2879  # 2865 + 14
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 2846
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 0
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 2879  # 2865 + 14
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 2846
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 0
 
 
-def test_input_tokens_unchanged_without_caching(sentry_init, capture_events):
+def test_input_tokens_unchanged_without_caching(sentry_init, capture_items):
     """
     Test that input_tokens is unchanged when there are no cached tokens.
 
@@ -4275,7 +4291,7 @@ def test_input_tokens_unchanged_without_caching(sentry_init, capture_events):
         Usage(input_tokens=20, output_tokens=12)
     """
     sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
-    events = capture_events()
+    items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
 
     client.messages._post = mock.Mock(
@@ -4299,15 +4315,15 @@ def test_input_tokens_unchanged_without_caching(sentry_init, capture_events):
             model="claude-sonnet-4-20250514",
         )
 
-    (span,) = events[0]["spans"]
+    (span,) = [item.payload for item in items if item.type == "span"]
 
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 20
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 32  # 20 + 12
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 20
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 32  # 20 + 12
 
 
 def test_cache_tokens_streaming(
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     server_side_event_chunks,
 ):
@@ -4343,7 +4359,7 @@ def test_cache_tokens_streaming(
     )
 
     sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     with mock.patch.object(
         client._client,
@@ -4359,17 +4375,17 @@ def test_cache_tokens_streaming(
             ):
                 pass
 
-    (span,) = events[0]["spans"]
+    (span,) = [item.payload for item in items if item.type == "span"]
     # input_tokens normalized: 100 + 80 (cache_read) + 20 (cache_write) = 200
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 200
-    assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 210
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 200
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 210
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20
 
 
 def test_stream_messages_cache_tokens(
-    sentry_init, capture_events, get_model_response, server_side_event_chunks
+    sentry_init, capture_items, get_model_response, server_side_event_chunks
 ):
     """Test cache tokens are tracked for streaming responses."""
     client = Anthropic(api_key="z")
@@ -4403,7 +4419,7 @@ def test_stream_messages_cache_tokens(
     )
 
     sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     with mock.patch.object(
         client._client,
@@ -4419,10 +4435,10 @@ def test_stream_messages_cache_tokens(
                 for event in stream:
                     pass
 
-    (span,) = events[0]["spans"]
+    (span,) = [item.payload for item in items if item.type == "span"]
     # input_tokens normalized: 100 + 80 (cache_read) + 20 (cache_write) = 200
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 200
-    assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 210
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 200
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 210
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20

From f44316dfa45f83d02e7f65908340aeeadcfbe70f Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Thu, 16 Apr 2026 15:24:52 +0200
Subject: [PATCH 12/84] google-genai tests

---
 .../google_genai/test_google_genai.py         | 507 +++++++++---------
 1 file changed, 248 insertions(+), 259 deletions(-)

diff --git a/tests/integrations/google_genai/test_google_genai.py b/tests/integrations/google_genai/test_google_genai.py
index 6e91ba6634..e074b79c8c 100644
--- a/tests/integrations/google_genai/test_google_genai.py
+++ b/tests/integrations/google_genai/test_google_genai.py
@@ -124,14 +124,14 @@ def create_test_config(
     ],
 )
 def test_nonstreaming_generate_content(
-    sentry_init, capture_events, send_default_pii, include_prompts, mock_genai_client
+    sentry_init, capture_items, send_default_pii, include_prompts, mock_genai_client
 ):
     sentry_init(
         integrations=[GoogleGenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     # Mock the HTTP response at the _api_client.request() level
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
@@ -146,38 +146,37 @@ def test_nonstreaming_generate_content(
             mock_genai_client.models.generate_content(
                 model="gemini-1.5-flash", contents="Tell me a joke", config=config
             )
-    assert len(events) == 1
-    (event,) = events
 
-    assert event["type"] == "transaction"
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["transaction"] == "google_genai"
 
-    assert len(event["spans"]) == 1
-    chat_span = event["spans"][0]
+    spans = [item.payload for item in items if item.type == "span"]
+    assert len(spans) == 1
+    chat_span = next(item.payload for item in items if item.type == "span")
 
     # Check chat span
-    assert chat_span["op"] == OP.GEN_AI_CHAT
-    assert chat_span["description"] == "chat gemini-1.5-flash"
-    assert chat_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert chat_span["data"][SPANDATA.GEN_AI_SYSTEM] == "gcp.gemini"
-    assert chat_span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "gemini-1.5-flash"
+    assert chat_span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    assert chat_span["name"] == "chat gemini-1.5-flash"
+    assert chat_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert chat_span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "gcp.gemini"
+    assert chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "gemini-1.5-flash"
 
     if send_default_pii and include_prompts:
         # Response text is stored as a JSON array
-        response_text = chat_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+        response_text = chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
         # Parse the JSON array
         response_texts = json.loads(response_text)
         assert response_texts == ["Hello! How can I help you today?"]
     else:
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_span["data"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_span["attributes"]
 
     # Check token usage
-    assert chat_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+    assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
     # Output tokens now include reasoning tokens: candidates_token_count (20) + thoughts_token_count (3) = 23
-    assert chat_span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 23
-    assert chat_span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
-    assert chat_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 5
-    assert chat_span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS_REASONING] == 3
+    assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 23
+    assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
+    assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 5
+    assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS_REASONING] == 3
 
 
 @pytest.mark.parametrize("generate_content_config", (False, True))
@@ -210,7 +209,7 @@ def test_nonstreaming_generate_content(
 )
 def test_generate_content_with_system_instruction(
     sentry_init,
-    capture_events,
+    capture_items,
     mock_genai_client,
     generate_content_config,
     system_instructions,
@@ -221,7 +220,7 @@ def test_generate_content_with_system_instruction(
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("span")
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
 
@@ -243,16 +242,15 @@ def test_generate_content_with_system_instruction(
                 config=config,
             )
 
-    (event,) = events
-    invoke_span = event["spans"][0]
+    invoke_span = next(item.payload for item in items if item.type == "span")
 
     if expected_texts is None:
-        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in invoke_span["data"]
+        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in invoke_span["attributes"]
         return
 
     # (PII is enabled and include_prompts is True in this test)
     system_instructions = json.loads(
-        invoke_span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
+        invoke_span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
     )
 
     assert system_instructions == [
@@ -260,12 +258,12 @@ def test_generate_content_with_system_instruction(
     ]
 
 
-def test_generate_content_with_tools(sentry_init, capture_events, mock_genai_client):
+def test_generate_content_with_tools(sentry_init, capture_items, mock_genai_client):
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
     )
-    events = capture_events()
+    items = capture_items("span")
 
     # Create a mock tool function
     def get_weather(location: str) -> str:
@@ -319,18 +317,17 @@ def get_weather(location: str) -> str:
                 model="gemini-1.5-flash", contents="What's the weather?", config=config
             )
 
-    (event,) = events
-    invoke_span = event["spans"][0]
+    invoke_span = next(item.payload for item in items if item.type == "span")
 
     # Check that tools are recorded (data is serialized as a string)
-    tools_data_str = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS]
+    tools_data_str = invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS]
     # Parse the JSON string to verify content
     tools_data = json.loads(tools_data_str)
     assert len(tools_data) == 2
 
     # The order of tools may not be guaranteed, so sort by name and description for comparison
     sorted_tools = sorted(
-        tools_data, key=lambda t: (t.get("name", ""), t.get("description", ""))
+        tools_data, key=lambda t: (t.get("name", ""), t.get("name", ""))
     )
 
     # The function tool
@@ -342,13 +339,13 @@ def get_weather(location: str) -> str:
     assert sorted_tools[1]["description"] == "Get weather information (tool object)"
 
 
-def test_tool_execution(sentry_init, capture_events):
+def test_tool_execution(sentry_init, capture_items):
     sentry_init(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("span")
 
     # Create a mock tool function
     def get_weather(location: str) -> str:
@@ -366,25 +363,25 @@ def get_weather(location: str) -> str:
 
     assert result == "The weather in San Francisco is sunny"
 
-    (event,) = events
-    assert len(event["spans"]) == 1
-    tool_span = event["spans"][0]
+    spans = [item.payload for item in items if item.type == "span"]
+    assert len(spans) == 1
+    tool_span = next(item.payload for item in items if item.type == "span")
 
-    assert tool_span["op"] == OP.GEN_AI_EXECUTE_TOOL
-    assert tool_span["description"] == "execute_tool get_weather"
-    assert tool_span["data"][SPANDATA.GEN_AI_TOOL_NAME] == "get_weather"
+    assert tool_span["attributes"]["sentry.op"] == OP.GEN_AI_EXECUTE_TOOL
+    assert tool_span["name"] == "execute_tool get_weather"
+    assert tool_span["attributes"][SPANDATA.GEN_AI_TOOL_NAME] == "get_weather"
     assert (
-        tool_span["data"][SPANDATA.GEN_AI_TOOL_DESCRIPTION]
+        tool_span["attributes"][SPANDATA.GEN_AI_TOOL_DESCRIPTION]
         == "Get the weather for a location"
     )
 
 
-def test_error_handling(sentry_init, capture_events, mock_genai_client):
+def test_error_handling(sentry_init, capture_items, mock_genai_client):
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
     )
-    events = capture_events()
+    items = capture_items("event", "transaction")
 
     # Mock an error at the HTTP level
     with mock.patch.object(
@@ -399,8 +396,8 @@ def test_error_handling(sentry_init, capture_events, mock_genai_client):
                 )
 
     # Should have both transaction and error events
-    assert len(events) == 2
-    error_event, transaction_event = events
+    assert len([item for item in items if item.type == "transaction"]) == 1
+    (error_event,) = (item.payload for item in items if item.type == "event")
 
     assert error_event["level"] == "error"
     assert error_event["exception"]["values"][0]["type"] == "Exception"
@@ -408,14 +405,14 @@ def test_error_handling(sentry_init, capture_events, mock_genai_client):
     assert error_event["exception"]["values"][0]["mechanism"]["type"] == "google_genai"
 
 
-def test_streaming_generate_content(sentry_init, capture_events, mock_genai_client):
+def test_streaming_generate_content(sentry_init, capture_items, mock_genai_client):
     """Test streaming with generate_content_stream, verifying chunk accumulation."""
     sentry_init(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("span")
 
     # Create streaming chunks - simulating a multi-chunk response
     # Chunk 1: First part of text with partial usage metadata
@@ -497,40 +494,41 @@ def test_streaming_generate_content(sentry_init, capture_events, mock_genai_clie
     assert collected_chunks[1].candidates[0].content.parts[0].text == "How can I "
     assert collected_chunks[2].candidates[0].content.parts[0].text == "help you today?"
 
-    (event,) = events
-
-    assert len(event["spans"]) == 1
-    chat_span = event["spans"][0]
+    spans = [item.payload for item in items if item.type == "span"]
+    assert len(spans) == 1
+    chat_span = next(item.payload for item in items if item.type == "span")
 
     # Check that streaming flag is set on both spans
-    assert chat_span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+    assert chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
 
     # Verify accumulated response text (all chunks combined)
     expected_full_text = "Hello! How can I help you today?"
     # Response text is stored as a JSON string
-    chat_response_text = json.loads(chat_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT])
+    chat_response_text = json.loads(
+        chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+    )
     assert chat_response_text == [expected_full_text]
 
     # Verify finish reasons (only the final chunk has a finish reason)
     # When there's a single finish reason, it's stored as a plain string (not JSON)
-    assert SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS in chat_span["data"]
-    assert chat_span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == "STOP"
-    assert chat_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert chat_span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
-    assert chat_span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 25
-    assert chat_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 5
-    assert chat_span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS_REASONING] == 3
+    assert SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS in chat_span["attributes"]
+    assert chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == "STOP"
+    assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+    assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
+    assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 25
+    assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 5
+    assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS_REASONING] == 3
 
     # Verify model name
-    assert chat_span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "gemini-1.5-flash"
+    assert chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "gemini-1.5-flash"
 
 
-def test_span_origin(sentry_init, capture_events, mock_genai_client):
+def test_span_origin(sentry_init, capture_items, mock_genai_client):
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
     )
-    events = capture_events()
+    items = capture_items("span", "transaction")
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
 
@@ -543,22 +541,21 @@ def test_span_origin(sentry_init, capture_events, mock_genai_client):
                 model="gemini-1.5-flash", contents="Test origin", config=config
             )
 
-    (event,) = events
-
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["contexts"]["trace"]["origin"] == "manual"
-    for span in event["spans"]:
-        assert span["origin"] == "auto.ai.google_genai"
 
+    spans = [item.payload for item in items if item.type == "span"]
+    for span in spans:
+        assert span["attributes"]["sentry.origin"] == "auto.ai.google_genai"
 
-def test_response_without_usage_metadata(
-    sentry_init, capture_events, mock_genai_client
-):
+
+def test_response_without_usage_metadata(sentry_init, capture_items, mock_genai_client):
     """Test handling of responses without usage metadata"""
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
     )
-    events = capture_events()
+    items = capture_items("span")
 
     # Response without usage metadata
     response_json = {
@@ -584,23 +581,22 @@ def test_response_without_usage_metadata(
                 model="gemini-1.5-flash", contents="Test", config=config
             )
 
-    (event,) = events
-    chat_span = event["spans"][0]
+    chat_span = next(item.payload for item in items if item.type == "span")
 
     # Usage data should not be present
-    assert SPANDATA.GEN_AI_USAGE_INPUT_TOKENS not in chat_span["data"]
-    assert SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS not in chat_span["data"]
-    assert SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS not in chat_span["data"]
+    assert SPANDATA.GEN_AI_USAGE_INPUT_TOKENS not in chat_span["attributes"]
+    assert SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS not in chat_span["attributes"]
+    assert SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS not in chat_span["attributes"]
 
 
-def test_multiple_candidates(sentry_init, capture_events, mock_genai_client):
+def test_multiple_candidates(sentry_init, capture_items, mock_genai_client):
     """Test handling of multiple response candidates"""
     sentry_init(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("span")
 
     # Response with multiple candidates
     multi_candidate_json = {
@@ -638,12 +634,11 @@ def test_multiple_candidates(sentry_init, capture_events, mock_genai_client):
                 model="gemini-1.5-flash", contents="Generate multiple", config=config
             )
 
-    (event,) = events
-    chat_span = event["spans"][0]
+    chat_span = next(item.payload for item in items if item.type == "span")
 
     # Should capture all responses
     # Response text is stored as a JSON string when there are multiple responses
-    response_text = chat_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+    response_text = chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
     if isinstance(response_text, str) and response_text.startswith("["):
         # It's a JSON array
         response_list = json.loads(response_text)
@@ -654,18 +649,18 @@ def test_multiple_candidates(sentry_init, capture_events, mock_genai_client):
 
     # Finish reasons are serialized as JSON
     finish_reasons = json.loads(
-        chat_span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS]
+        chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS]
     )
     assert finish_reasons == ["STOP", "MAX_TOKENS"]
 
 
-def test_all_configuration_parameters(sentry_init, capture_events, mock_genai_client):
+def test_all_configuration_parameters(sentry_init, capture_items, mock_genai_client):
     """Test that all configuration parameters are properly recorded"""
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
     )
-    events = capture_events()
+    items = capture_items("span")
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
 
@@ -686,26 +681,25 @@ def test_all_configuration_parameters(sentry_init, capture_events, mock_genai_cl
                 model="gemini-1.5-flash", contents="Test all params", config=config
             )
 
-    (event,) = events
-    invoke_span = event["spans"][0]
+    invoke_span = next(item.payload for item in items if item.type == "span")
 
     # Check all parameters are recorded
-    assert invoke_span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.8
-    assert invoke_span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.95
-    assert invoke_span["data"][SPANDATA.GEN_AI_REQUEST_TOP_K] == 40
-    assert invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 2048
-    assert invoke_span["data"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1
-    assert invoke_span["data"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2
-    assert invoke_span["data"][SPANDATA.GEN_AI_REQUEST_SEED] == 12345
+    assert invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.8
+    assert invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.95
+    assert invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_K] == 40
+    assert invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 2048
+    assert invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1
+    assert invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2
+    assert invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_SEED] == 12345
 
 
-def test_empty_response(sentry_init, capture_events, mock_genai_client):
+def test_empty_response(sentry_init, capture_items, mock_genai_client):
     """Test handling of minimal response with no content"""
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
     )
-    events = capture_events()
+    items = capture_items("span")
 
     # Minimal response with empty candidates array
     minimal_response_json = {"candidates": []}
@@ -723,20 +717,20 @@ def test_empty_response(sentry_init, capture_events, mock_genai_client):
     assert response is not None
     assert len(response.candidates) == 0
 
-    (event,) = events
     # Should still create spans even with empty candidates
-    assert len(event["spans"]) == 1
+    spans = [item.payload for item in items if item.type == "span"]
+    assert len(spans) == 1
 
 
 def test_response_with_different_id_fields(
-    sentry_init, capture_events, mock_genai_client
+    sentry_init, capture_items, mock_genai_client
 ):
     """Test handling of different response ID field names"""
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
     )
-    events = capture_events()
+    items = capture_items("span")
 
     # Response with response_id and model_version
     response_json = {
@@ -763,20 +757,21 @@ def test_response_with_different_id_fields(
                 model="gemini-1.5-flash", contents="Test", config=create_test_config()
             )
 
-    (event,) = events
-    chat_span = event["spans"][0]
+    chat_span = next(item.payload for item in items if item.type == "span")
 
-    assert chat_span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "resp-456"
-    assert chat_span["data"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "gemini-1.5-flash-001"
+    assert chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID] == "resp-456"
+    assert (
+        chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL]
+        == "gemini-1.5-flash-001"
+    )
 
 
-def test_tool_with_async_function(sentry_init, capture_events):
+def test_tool_with_async_function(sentry_init):
     """Test that async tool functions are properly wrapped"""
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
     )
-    capture_events()
 
     # Create an async tool function
     async def async_tool(param: str) -> str:
@@ -792,14 +787,14 @@ async def async_tool(param: str) -> str:
     assert hasattr(wrapped_async_tool, "__wrapped__")  # Should preserve original
 
 
-def test_contents_as_none(sentry_init, capture_events, mock_genai_client):
+def test_contents_as_none(sentry_init, capture_items, mock_genai_client):
     """Test handling when contents parameter is None"""
     sentry_init(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("span")
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
 
@@ -811,22 +806,21 @@ def test_contents_as_none(sentry_init, capture_events, mock_genai_client):
                 model="gemini-1.5-flash", contents=None, config=create_test_config()
             )
 
-    (event,) = events
-    invoke_span = event["spans"][0]
+    invoke_span = next(item.payload for item in items if item.type == "span")
 
     # Should handle None contents gracefully
-    messages = invoke_span["data"].get(SPANDATA.GEN_AI_REQUEST_MESSAGES, [])
+    messages = invoke_span["attributes"].get(SPANDATA.GEN_AI_REQUEST_MESSAGES, [])
     # Should only have system message if any, not user message
     assert all(msg["role"] != "user" or msg["content"] is not None for msg in messages)
 
 
-def test_tool_calls_extraction(sentry_init, capture_events, mock_genai_client):
+def test_tool_calls_extraction(sentry_init, capture_items, mock_genai_client):
     """Test extraction of tool/function calls from response"""
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
     )
-    events = capture_events()
+    items = capture_items("span")
 
     # Response with function calls
     function_call_response_json = {
@@ -875,14 +869,17 @@ def test_tool_calls_extraction(sentry_init, capture_events, mock_genai_client):
                 config=create_test_config(),
             )
 
-    (event,) = events
-    chat_span = event["spans"][0]  # The chat span
+    chat_span = next(
+        item.payload for item in items if item.type == "span"
+    )  # The chat span
 
     # Check that tool calls are extracted and stored
-    assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in chat_span["data"]
+    assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in chat_span["attributes"]
 
     # Parse the JSON string to verify content
-    tool_calls = json.loads(chat_span["data"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS])
+    tool_calls = json.loads(
+        chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS]
+    )
 
     assert len(tool_calls) == 2
 
@@ -902,16 +899,14 @@ def test_tool_calls_extraction(sentry_init, capture_events, mock_genai_client):
     assert json.loads(tool_calls[1]["arguments"]) == {"timezone": "PST"}
 
 
-def test_google_genai_message_truncation(
-    sentry_init, capture_events, mock_genai_client
-):
+def test_google_genai_message_truncation(sentry_init, capture_items, mock_genai_client):
     """Test that large messages are truncated properly in Google GenAI integration."""
     sentry_init(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("span")
 
     large_content = (
         "This is a very long message that will exceed our size limits. " * 1000
@@ -930,11 +925,10 @@ def test_google_genai_message_truncation(
                 config=create_test_config(),
             )
 
-    (event,) = events
-    invoke_span = event["spans"][0]
-    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["data"]
+    invoke_span = next(item.payload for item in items if item.type == "span")
+    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["attributes"]
 
-    messages_data = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+    messages_data = invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
     assert isinstance(messages_data, str)
 
     parsed_messages = json.loads(messages_data)
@@ -980,14 +974,14 @@ def test_google_genai_message_truncation(
     ],
 )
 def test_embed_content(
-    sentry_init, capture_events, send_default_pii, include_prompts, mock_genai_client
+    sentry_init, capture_items, send_default_pii, include_prompts, mock_genai_client
 ):
     sentry_init(
         integrations=[GoogleGenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     # Mock the HTTP response at the _api_client.request() level
     mock_http_response = create_mock_http_response(EXAMPLE_EMBED_RESPONSE_JSON)
@@ -1006,47 +1000,49 @@ def test_embed_content(
                 ],
             )
 
-    assert len(events) == 1
-    (event,) = events
-
-    assert event["type"] == "transaction"
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["transaction"] == "google_genai_embeddings"
 
     # Should have 1 span for embeddings
-    assert len(event["spans"]) == 1
-    (embed_span,) = event["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
+    assert len(spans) == 1
+    (embed_span,) = spans
 
     # Check embeddings span
-    assert embed_span["op"] == OP.GEN_AI_EMBEDDINGS
-    assert embed_span["description"] == "embeddings text-embedding-004"
-    assert embed_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings"
-    assert embed_span["data"][SPANDATA.GEN_AI_SYSTEM] == "gcp.gemini"
-    assert embed_span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-004"
+    assert embed_span["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS
+    assert embed_span["name"] == "embeddings text-embedding-004"
+    assert embed_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings"
+    assert embed_span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "gcp.gemini"
+    assert (
+        embed_span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-004"
+    )
 
     # Check input texts if PII is allowed
     if send_default_pii and include_prompts:
-        input_texts = json.loads(embed_span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT])
+        input_texts = json.loads(
+            embed_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
+        )
         assert input_texts == [
             "What is your name?",
             "What is your favorite color?",
         ]
     else:
-        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embed_span["data"]
+        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embed_span["attributes"]
 
     # Check usage data (sum of token counts from statistics: 10 + 15 = 25)
     # Note: Only available in newer versions with ContentEmbeddingStatistics
-    if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["data"]:
-        assert embed_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 25
+    if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["attributes"]:
+        assert embed_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 25
 
 
-def test_embed_content_string_input(sentry_init, capture_events, mock_genai_client):
+def test_embed_content_string_input(sentry_init, capture_items, mock_genai_client):
     """Test embed_content with a single string instead of list."""
     sentry_init(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("span")
 
     # Mock response with single embedding
     single_embed_response = {
@@ -1074,25 +1070,25 @@ def test_embed_content_string_input(sentry_init, capture_events, mock_genai_clie
                 contents="Single text input",
             )
 
-    (event,) = events
-    (embed_span,) = event["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
+    (embed_span,) = spans
 
     # Check that single string is handled correctly
-    input_texts = json.loads(embed_span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT])
+    input_texts = json.loads(embed_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT])
     assert input_texts == ["Single text input"]
     # Should use token_count from statistics (5), not billable_character_count (10)
     # Note: Only available in newer versions with ContentEmbeddingStatistics
-    if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["data"]:
-        assert embed_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5
+    if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["attributes"]:
+        assert embed_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5
 
 
-def test_embed_content_error_handling(sentry_init, capture_events, mock_genai_client):
+def test_embed_content_error_handling(sentry_init, capture_items, mock_genai_client):
     """Test error handling in embed_content."""
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
     )
-    events = capture_events()
+    items = capture_items("transaction", "event")
 
     # Mock an error at the HTTP level
     with mock.patch.object(
@@ -1108,8 +1104,8 @@ def test_embed_content_error_handling(sentry_init, capture_events, mock_genai_cl
                 )
 
     # Should have both transaction and error events
-    assert len(events) == 2
-    error_event, _ = events
+    assert len([item for item in items if item.type == "transaction"]) == 1
+    (error_event,) = (item.payload for item in items if item.type == "event")
 
     assert error_event["level"] == "error"
     assert error_event["exception"]["values"][0]["type"] == "Exception"
@@ -1118,14 +1114,14 @@ def test_embed_content_error_handling(sentry_init, capture_events, mock_genai_cl
 
 
 def test_embed_content_without_statistics(
-    sentry_init, capture_events, mock_genai_client
+    sentry_init, capture_items, mock_genai_client
 ):
     """Test embed_content response without statistics (older package versions)."""
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
     )
-    events = capture_events()
+    items = capture_items("span")
 
     # Response without statistics (typical for older google-genai versions)
     # Embeddings exist but don't have the statistics field
@@ -1150,21 +1146,21 @@ def test_embed_content_without_statistics(
                 contents=["Test without statistics", "Another test"],
             )
 
-    (event,) = events
-    (embed_span,) = event["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
+    (embed_span,) = spans
 
     # No usage tokens since there are no statistics in older versions
     # This is expected and the integration should handle it gracefully
-    assert SPANDATA.GEN_AI_USAGE_INPUT_TOKENS not in embed_span["data"]
+    assert SPANDATA.GEN_AI_USAGE_INPUT_TOKENS not in embed_span["attributes"]
 
 
-def test_embed_content_span_origin(sentry_init, capture_events, mock_genai_client):
+def test_embed_content_span_origin(sentry_init, capture_items, mock_genai_client):
     """Test that embed_content spans have correct origin."""
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     mock_http_response = create_mock_http_response(EXAMPLE_EMBED_RESPONSE_JSON)
 
@@ -1177,11 +1173,12 @@ def test_embed_content_span_origin(sentry_init, capture_events, mock_genai_clien
                 contents=["Test origin"],
             )
 
-    (event,) = events
-
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["contexts"]["trace"]["origin"] == "manual"
-    for span in event["spans"]:
-        assert span["origin"] == "auto.ai.google_genai"
+
+    spans = [item.payload for item in items if item.type == "span"]
+    for span in spans:
+        assert span["attributes"]["sentry.origin"] == "auto.ai.google_genai"
 
 
 @pytest.mark.asyncio
@@ -1195,7 +1192,7 @@ def test_embed_content_span_origin(sentry_init, capture_events, mock_genai_clien
     ],
 )
 async def test_async_embed_content(
-    sentry_init, capture_events, send_default_pii, include_prompts, mock_genai_client
+    sentry_init, capture_items, send_default_pii, include_prompts, mock_genai_client
 ):
     """Test async embed_content method."""
     sentry_init(
@@ -1203,7 +1200,7 @@ async def test_async_embed_content(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     # Mock the async HTTP response
     mock_http_response = create_mock_http_response(EXAMPLE_EMBED_RESPONSE_JSON)
@@ -1222,42 +1219,44 @@ async def test_async_embed_content(
                 ],
             )
 
-    assert len(events) == 1
-    (event,) = events
-
-    assert event["type"] == "transaction"
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["transaction"] == "google_genai_embeddings_async"
 
     # Should have 1 span for embeddings
-    assert len(event["spans"]) == 1
-    (embed_span,) = event["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
+    assert len(spans) == 1
+    (embed_span,) = spans
 
     # Check embeddings span
-    assert embed_span["op"] == OP.GEN_AI_EMBEDDINGS
-    assert embed_span["description"] == "embeddings text-embedding-004"
-    assert embed_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings"
-    assert embed_span["data"][SPANDATA.GEN_AI_SYSTEM] == "gcp.gemini"
-    assert embed_span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-004"
+    assert embed_span["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS
+    assert embed_span["name"] == "embeddings text-embedding-004"
+    assert embed_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings"
+    assert embed_span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "gcp.gemini"
+    assert (
+        embed_span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-004"
+    )
 
     # Check input texts if PII is allowed
     if send_default_pii and include_prompts:
-        input_texts = json.loads(embed_span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT])
+        input_texts = json.loads(
+            embed_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
+        )
         assert input_texts == [
             "What is your name?",
             "What is your favorite color?",
         ]
     else:
-        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embed_span["data"]
+        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embed_span["attributes"]
 
     # Check usage data (sum of token counts from statistics: 10 + 15 = 25)
     # Note: Only available in newer versions with ContentEmbeddingStatistics
-    if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["data"]:
-        assert embed_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 25
+    if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["attributes"]:
+        assert embed_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 25
 
 
 @pytest.mark.asyncio
 async def test_async_embed_content_string_input(
-    sentry_init, capture_events, mock_genai_client
+    sentry_init, capture_items, mock_genai_client
 ):
     """Test async embed_content with a single string instead of list."""
     sentry_init(
@@ -1265,7 +1264,7 @@ async def test_async_embed_content_string_input(
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("span")
 
     # Mock response with single embedding
     single_embed_response = {
@@ -1293,28 +1292,28 @@ async def test_async_embed_content_string_input(
                 contents="Single text input",
             )
 
-    (event,) = events
-    (embed_span,) = event["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
+    (embed_span,) = spans
 
     # Check that single string is handled correctly
-    input_texts = json.loads(embed_span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT])
+    input_texts = json.loads(embed_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT])
     assert input_texts == ["Single text input"]
     # Should use token_count from statistics (5), not billable_character_count (10)
     # Note: Only available in newer versions with ContentEmbeddingStatistics
-    if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["data"]:
-        assert embed_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5
+    if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["attributes"]:
+        assert embed_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5
 
 
 @pytest.mark.asyncio
 async def test_async_embed_content_error_handling(
-    sentry_init, capture_events, mock_genai_client
+    sentry_init, capture_items, mock_genai_client
 ):
     """Test error handling in async embed_content."""
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
     )
-    events = capture_events()
+    items = capture_items("transaction", "event")
 
     # Mock an error at the HTTP level
     with mock.patch.object(
@@ -1330,8 +1329,8 @@ async def test_async_embed_content_error_handling(
                 )
 
     # Should have both transaction and error events
-    assert len(events) == 2
-    error_event, _ = events
+    assert len([item for item in items if item.type == "transaction"]) == 1
+    (error_event,) = (item.payload for item in items if item.type == "event")
 
     assert error_event["level"] == "error"
     assert error_event["exception"]["values"][0]["type"] == "Exception"
@@ -1341,14 +1340,14 @@ async def test_async_embed_content_error_handling(
 
 @pytest.mark.asyncio
 async def test_async_embed_content_without_statistics(
-    sentry_init, capture_events, mock_genai_client
+    sentry_init, capture_items, mock_genai_client
 ):
     """Test async embed_content response without statistics (older package versions)."""
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
     )
-    events = capture_events()
+    items = capture_items("span")
 
     # Response without statistics (typical for older google-genai versions)
     # Embeddings exist but don't have the statistics field
@@ -1373,24 +1372,24 @@ async def test_async_embed_content_without_statistics(
                 contents=["Test without statistics", "Another test"],
             )
 
-    (event,) = events
-    (embed_span,) = event["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
+    (embed_span,) = spans
 
     # No usage tokens since there are no statistics in older versions
     # This is expected and the integration should handle it gracefully
-    assert SPANDATA.GEN_AI_USAGE_INPUT_TOKENS not in embed_span["data"]
+    assert SPANDATA.GEN_AI_USAGE_INPUT_TOKENS not in embed_span["attributes"]
 
 
 @pytest.mark.asyncio
 async def test_async_embed_content_span_origin(
-    sentry_init, capture_events, mock_genai_client
+    sentry_init, capture_items, mock_genai_client
 ):
     """Test that async embed_content spans have correct origin."""
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     mock_http_response = create_mock_http_response(EXAMPLE_EMBED_RESPONSE_JSON)
 
@@ -1403,16 +1402,17 @@ async def test_async_embed_content_span_origin(
                 contents=["Test origin"],
             )
 
-    (event,) = events
-
+    (event,) = [item.payload for item in items if item.type == "transaction"]
     assert event["contexts"]["trace"]["origin"] == "manual"
-    for span in event["spans"]:
-        assert span["origin"] == "auto.ai.google_genai"
+
+    spans = [item.payload for item in items if item.type == "span"]
+    for span in spans:
+        assert span["attributes"]["sentry.origin"] == "auto.ai.google_genai"
 
 
 # Integration tests for generate_content with different input message formats
 def test_generate_content_with_content_object(
-    sentry_init, capture_events, mock_genai_client
+    sentry_init, capture_items, mock_genai_client
 ):
     """Test generate_content with Content object input."""
     sentry_init(
@@ -1420,7 +1420,7 @@ def test_generate_content_with_content_object(
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("span")
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
 
@@ -1437,10 +1437,9 @@ def test_generate_content_with_content_object(
                 model="gemini-1.5-flash", contents=content, config=create_test_config()
             )
 
-    (event,) = events
-    invoke_span = event["spans"][0]
+    invoke_span = next(item.payload for item in items if item.type == "span")
 
-    messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     assert len(messages) == 1
     assert messages[0]["role"] == "user"
     assert messages[0]["content"] == [
@@ -1449,7 +1448,7 @@ def test_generate_content_with_content_object(
 
 
 def test_generate_content_with_dict_format(
-    sentry_init, capture_events, mock_genai_client
+    sentry_init, capture_items, mock_genai_client
 ):
     """Test generate_content with dict format input (ContentDict)."""
     sentry_init(
@@ -1457,7 +1456,7 @@ def test_generate_content_with_dict_format(
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("span")
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
 
@@ -1472,10 +1471,9 @@ def test_generate_content_with_dict_format(
                 model="gemini-1.5-flash", contents=contents, config=create_test_config()
             )
 
-    (event,) = events
-    invoke_span = event["spans"][0]
+    invoke_span = next(item.payload for item in items if item.type == "span")
 
-    messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     assert len(messages) == 1
     assert messages[0]["role"] == "user"
     assert messages[0]["content"] == [
@@ -1483,16 +1481,14 @@ def test_generate_content_with_dict_format(
     ]
 
 
-def test_generate_content_with_file_data(
-    sentry_init, capture_events, mock_genai_client
-):
+def test_generate_content_with_file_data(sentry_init, capture_items, mock_genai_client):
     """Test generate_content with file_data (external file reference)."""
     sentry_init(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("span")
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
 
@@ -1516,10 +1512,9 @@ def test_generate_content_with_file_data(
                 model="gemini-1.5-flash", contents=content, config=create_test_config()
             )
 
-    (event,) = events
-    invoke_span = event["spans"][0]
+    invoke_span = next(item.payload for item in items if item.type == "span")
 
-    messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     assert len(messages) == 1
     assert messages[0]["role"] == "user"
     assert len(messages[0]["content"]) == 2
@@ -1534,7 +1529,7 @@ def test_generate_content_with_file_data(
 
 
 def test_generate_content_with_inline_data(
-    sentry_init, capture_events, mock_genai_client
+    sentry_init, capture_items, mock_genai_client
 ):
     """Test generate_content with inline_data (binary data)."""
     sentry_init(
@@ -1542,7 +1537,7 @@ def test_generate_content_with_inline_data(
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("span")
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
 
@@ -1565,10 +1560,9 @@ def test_generate_content_with_inline_data(
                 model="gemini-1.5-flash", contents=content, config=create_test_config()
             )
 
-    (event,) = events
-    invoke_span = event["spans"][0]
+    invoke_span = next(item.payload for item in items if item.type == "span")
 
-    messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     assert len(messages) == 1
     assert messages[0]["role"] == "user"
     assert len(messages[0]["content"]) == 2
@@ -1580,7 +1574,7 @@ def test_generate_content_with_inline_data(
 
 
 def test_generate_content_with_function_response(
-    sentry_init, capture_events, mock_genai_client
+    sentry_init, capture_items, mock_genai_client
 ):
     """Test generate_content with function_response (tool result)."""
     sentry_init(
@@ -1588,7 +1582,7 @@ def test_generate_content_with_function_response(
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("span")
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
 
@@ -1622,10 +1616,9 @@ def test_generate_content_with_function_response(
                 model="gemini-1.5-flash", contents=contents, config=create_test_config()
             )
 
-    (event,) = events
-    invoke_span = event["spans"][0]
+    invoke_span = next(item.payload for item in items if item.type == "span")
 
-    messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     assert len(messages) == 1
     # First message is user message
     assert messages[0]["role"] == "tool"
@@ -1635,7 +1628,7 @@ def test_generate_content_with_function_response(
 
 
 def test_generate_content_with_mixed_string_and_content(
-    sentry_init, capture_events, mock_genai_client
+    sentry_init, capture_items, mock_genai_client
 ):
     """Test generate_content with mixed string and Content objects in list."""
     sentry_init(
@@ -1643,7 +1636,7 @@ def test_generate_content_with_mixed_string_and_content(
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("span")
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
 
@@ -1668,10 +1661,9 @@ def test_generate_content_with_mixed_string_and_content(
                 model="gemini-1.5-flash", contents=contents, config=create_test_config()
             )
 
-    (event,) = events
-    invoke_span = event["spans"][0]
+    invoke_span = next(item.payload for item in items if item.type == "span")
 
-    messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     assert len(messages) == 1
     # User message
     assert messages[0]["role"] == "user"
@@ -1679,7 +1671,7 @@ def test_generate_content_with_mixed_string_and_content(
 
 
 def test_generate_content_with_part_object_directly(
-    sentry_init, capture_events, mock_genai_client
+    sentry_init, capture_items, mock_genai_client
 ):
     """Test generate_content with Part object directly (not wrapped in Content)."""
     sentry_init(
@@ -1687,7 +1679,7 @@ def test_generate_content_with_part_object_directly(
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("span")
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
 
@@ -1702,17 +1694,16 @@ def test_generate_content_with_part_object_directly(
                 model="gemini-1.5-flash", contents=part, config=create_test_config()
             )
 
-    (event,) = events
-    invoke_span = event["spans"][0]
+    invoke_span = next(item.payload for item in items if item.type == "span")
 
-    messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     assert len(messages) == 1
     assert messages[0]["role"] == "user"
     assert messages[0]["content"] == [{"text": "Direct Part object", "type": "text"}]
 
 
 def test_generate_content_with_list_of_dicts(
-    sentry_init, capture_events, mock_genai_client
+    sentry_init, capture_items, mock_genai_client
 ):
     """
     Test generate_content with list of dict format inputs.
@@ -1726,7 +1717,7 @@ def test_generate_content_with_list_of_dicts(
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("span")
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
 
@@ -1745,17 +1736,16 @@ def test_generate_content_with_list_of_dicts(
                 model="gemini-1.5-flash", contents=contents, config=create_test_config()
             )
 
-    (event,) = events
-    invoke_span = event["spans"][0]
+    invoke_span = next(item.payload for item in items if item.type == "span")
 
-    messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     assert len(messages) == 1
     assert messages[0]["role"] == "user"
     assert messages[0]["content"] == [{"text": "Second user message", "type": "text"}]
 
 
 def test_generate_content_with_dict_inline_data(
-    sentry_init, capture_events, mock_genai_client
+    sentry_init, capture_items, mock_genai_client
 ):
     """Test generate_content with dict format containing inline_data."""
     sentry_init(
@@ -1763,7 +1753,7 @@ def test_generate_content_with_dict_inline_data(
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("span")
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
 
@@ -1784,10 +1774,9 @@ def test_generate_content_with_dict_inline_data(
                 model="gemini-1.5-flash", contents=contents, config=create_test_config()
             )
 
-    (event,) = events
-    invoke_span = event["spans"][0]
+    invoke_span = next(item.payload for item in items if item.type == "span")
 
-    messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     assert len(messages) == 1
     assert messages[0]["role"] == "user"
     assert len(messages[0]["content"]) == 2
@@ -1801,14 +1790,14 @@ def test_generate_content_with_dict_inline_data(
 
 
 def test_generate_content_without_parts_property_inline_data(
-    sentry_init, capture_events, mock_genai_client
+    sentry_init, capture_items, mock_genai_client
 ):
     sentry_init(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("span")
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
 
@@ -1825,10 +1814,9 @@ def test_generate_content_without_parts_property_inline_data(
                 model="gemini-1.5-flash", contents=contents, config=create_test_config()
             )
 
-    (event,) = events
-    invoke_span = event["spans"][0]
+    invoke_span = next(item.payload for item in items if item.type == "span")
 
-    messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
 
     assert len(messages) == 1
 
@@ -1845,14 +1833,14 @@ def test_generate_content_without_parts_property_inline_data(
 
 
 def test_generate_content_without_parts_property_inline_data_and_binary_data_within_string(
-    sentry_init, capture_events, mock_genai_client
+    sentry_init, capture_items, mock_genai_client
 ):
     sentry_init(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("span")
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
 
@@ -1874,10 +1862,9 @@ def test_generate_content_without_parts_property_inline_data_and_binary_data_wit
                 model="gemini-1.5-flash", contents=contents, config=create_test_config()
             )
 
-    (event,) = events
-    invoke_span = event["spans"][0]
+    invoke_span = next(item.payload for item in items if item.type == "span")
 
-    messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     assert len(messages) == 1
     assert messages[0]["role"] == "user"
 
@@ -2162,7 +2149,9 @@ def test_extract_contents_messages_dict_inline_data():
     """Test extract_contents_messages with dict containing inline_data"""
     content_dict = {
         "role": "user",
-        "parts": [{"inline_data": {"data": b"binary_data", "mime_type": "image/gif"}}],
+        "parts": [
+            {"inline_data": {"attributes": b"binary_data", "mime_type": "image/gif"}}
+        ],
     }
     result = extract_contents_messages(content_dict)
 

From ff9c5ec2f2eac0a7fa94b49b40cdd31e172c053f Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Fri, 17 Apr 2026 09:52:20 +0200
Subject: [PATCH 13/84] test litellm

---
 tests/integrations/litellm/test_litellm.py | 477 +++++++++++----------
 1 file changed, 241 insertions(+), 236 deletions(-)

diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py
index a8df5891ce..90807744e7 100644
--- a/tests/integrations/litellm/test_litellm.py
+++ b/tests/integrations/litellm/test_litellm.py
@@ -142,7 +142,7 @@ def __init__(
 def test_nonstreaming_chat_completion(
     reset_litellm_executor,
     sentry_init,
-    capture_events,
+    capture_items,
     send_default_pii,
     include_prompts,
     get_model_response,
@@ -153,7 +153,7 @@ def test_nonstreaming_chat_completion(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [{"role": "user", "content": "Hello!"}]
 
@@ -179,37 +179,36 @@ def test_nonstreaming_chat_completion(
 
             litellm_utils.executor.shutdown(wait=True)
 
-    assert len(events) == 1
-    (event,) = events
-
-    assert event["type"] == "transaction"
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["transaction"] == "litellm test"
 
+    spans = [item.payload for item in items if item.type == "span"]
     chat_spans = list(
         x
-        for x in event["spans"]
-        if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm"
+        for x in spans
+        if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
     )
     assert len(chat_spans) == 1
     span = chat_spans[0]
 
-    assert span["op"] == OP.GEN_AI_CHAT
-    assert span["description"] == "chat gpt-3.5-turbo"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "gpt-3.5-turbo"
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "gpt-3.5-turbo"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai"
-    assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    assert span["name"] == "chat gpt-3.5-turbo"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "gpt-3.5-turbo"
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "gpt-3.5-turbo"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
+    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
 
     if send_default_pii and include_prompts:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT in span["data"]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT in span["attributes"]
     else:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
 
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
 
 
 @pytest.mark.asyncio(loop_scope="session")
@@ -224,7 +223,7 @@ def test_nonstreaming_chat_completion(
 )
 async def test_async_nonstreaming_chat_completion(
     sentry_init,
-    capture_events,
+    capture_items,
     send_default_pii,
     include_prompts,
     get_model_response,
@@ -235,7 +234,7 @@ async def test_async_nonstreaming_chat_completion(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [{"role": "user", "content": "Hello!"}]
 
@@ -262,37 +261,36 @@ async def test_async_nonstreaming_chat_completion(
             await GLOBAL_LOGGING_WORKER.flush()
             await asyncio.sleep(0.5)
 
-    assert len(events) == 1
-    (event,) = events
-
-    assert event["type"] == "transaction"
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["transaction"] == "litellm test"
 
+    spans = [item.payload for item in items if item.type == "span"]
     chat_spans = list(
         x
-        for x in event["spans"]
-        if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm"
+        for x in spans
+        if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
     )
     assert len(chat_spans) == 1
     span = chat_spans[0]
 
-    assert span["op"] == OP.GEN_AI_CHAT
-    assert span["description"] == "chat gpt-3.5-turbo"
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "gpt-3.5-turbo"
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "gpt-3.5-turbo"
-    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai"
-    assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    assert span["name"] == "chat gpt-3.5-turbo"
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "gpt-3.5-turbo"
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "gpt-3.5-turbo"
+    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
+    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
 
     if send_default_pii and include_prompts:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT in span["data"]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT in span["attributes"]
     else:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
 
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
+    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
 
 
 @pytest.mark.parametrize(
@@ -307,7 +305,7 @@ async def test_async_nonstreaming_chat_completion(
 def test_streaming_chat_completion(
     reset_litellm_executor,
     sentry_init,
-    capture_events,
+    capture_items,
     send_default_pii,
     include_prompts,
     get_model_response,
@@ -319,7 +317,7 @@ def test_streaming_chat_completion(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
     )
-    events = capture_events()
+    items = capture_items("span")
 
     messages = [{"role": "user", "content": "Hello!"}]
 
@@ -350,20 +348,18 @@ def test_streaming_chat_completion(
 
             streaming_handler.executor.shutdown(wait=True)
 
-    assert len(events) == 1
-    (event,) = events
-
-    assert event["type"] == "transaction"
+    spans = [item.payload for item in items if item.type == "span"]
     chat_spans = list(
         x
-        for x in event["spans"]
-        if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm"
+        for x in spans
+        if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
     )
     assert len(chat_spans) == 1
     span = chat_spans[0]
 
-    assert span["op"] == OP.GEN_AI_CHAT
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
 
 
 @pytest.mark.asyncio(loop_scope="session")
@@ -378,7 +374,7 @@ def test_streaming_chat_completion(
 )
 async def test_async_streaming_chat_completion(
     sentry_init,
-    capture_events,
+    capture_items,
     send_default_pii,
     include_prompts,
     get_model_response,
@@ -391,7 +387,7 @@ async def test_async_streaming_chat_completion(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [{"role": "user", "content": "Hello!"}]
 
@@ -425,25 +421,23 @@ async def test_async_streaming_chat_completion(
             await GLOBAL_LOGGING_WORKER.flush()
             await asyncio.sleep(0.5)
 
-    assert len(events) == 1
-    (event,) = events
-
-    assert event["type"] == "transaction"
+    spans = [item.payload for item in items if item.type == "span"]
     chat_spans = list(
         x
-        for x in event["spans"]
-        if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm"
+        for x in spans
+        if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
     )
     assert len(chat_spans) == 1
     span = chat_spans[0]
 
-    assert span["op"] == OP.GEN_AI_CHAT
-    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
 
 
 def test_embeddings_create(
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     openai_embedding_model_response,
     clear_litellm_cache,
@@ -459,7 +453,7 @@ def test_embeddings_create(
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     client = OpenAI(api_key="test-key")
 
@@ -485,32 +479,34 @@ def test_embeddings_create(
 
         # Response is processed by litellm, so just check it exists
         assert response is not None
-        assert len(events) == 1
-        (event,) = events
 
-        assert event["type"] == "transaction"
+        spans = [item.payload for item in items if item.type == "span"]
         spans = list(
             x
-            for x in event["spans"]
-            if x["op"] == OP.GEN_AI_EMBEDDINGS and x["origin"] == "auto.ai.litellm"
+            for x in spans
+            if x["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS
+            and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
         )
         assert len(spans) == 1
         span = spans[0]
 
-        assert span["op"] == OP.GEN_AI_EMBEDDINGS
-        assert span["description"] == "embeddings text-embedding-ada-002"
-        assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings"
-        assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5
-        assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-ada-002"
+        assert span["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS
+        assert span["name"] == "embeddings text-embedding-ada-002"
+        assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings"
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5
+        assert (
+            span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL]
+            == "text-embedding-ada-002"
+        )
         # Check that embeddings input is captured (it's JSON serialized)
-        embeddings_input = span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
+        embeddings_input = span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
         assert json.loads(embeddings_input) == ["Hello, world!"]
 
 
 @pytest.mark.asyncio(loop_scope="session")
 async def test_async_embeddings_create(
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     openai_embedding_model_response,
     clear_litellm_cache,
@@ -526,7 +522,7 @@ async def test_async_embeddings_create(
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     client = AsyncOpenAI(api_key="test-key")
 
@@ -553,31 +549,33 @@ async def test_async_embeddings_create(
 
         # Response is processed by litellm, so just check it exists
         assert response is not None
-        assert len(events) == 1
-        (event,) = events
 
-        assert event["type"] == "transaction"
+        spans = [item.payload for item in items if item.type == "span"]
         spans = list(
             x
-            for x in event["spans"]
-            if x["op"] == OP.GEN_AI_EMBEDDINGS and x["origin"] == "auto.ai.litellm"
+            for x in spans
+            if x["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS
+            and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
         )
         assert len(spans) == 1
         span = spans[0]
 
-        assert span["op"] == OP.GEN_AI_EMBEDDINGS
-        assert span["description"] == "embeddings text-embedding-ada-002"
-        assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings"
-        assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5
-        assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-ada-002"
+        assert span["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS
+        assert span["name"] == "embeddings text-embedding-ada-002"
+        assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings"
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5
+        assert (
+            span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL]
+            == "text-embedding-ada-002"
+        )
         # Check that embeddings input is captured (it's JSON serialized)
-        embeddings_input = span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
+        embeddings_input = span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
         assert json.loads(embeddings_input) == ["Hello, world!"]
 
 
 def test_embeddings_create_with_list_input(
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     openai_embedding_model_response,
     clear_litellm_cache,
@@ -588,7 +586,7 @@ def test_embeddings_create_with_list_input(
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     client = OpenAI(api_key="test-key")
 
@@ -614,22 +612,21 @@ def test_embeddings_create_with_list_input(
 
         # Response is processed by litellm, so just check it exists
         assert response is not None
-        assert len(events) == 1
-        (event,) = events
 
-        assert event["type"] == "transaction"
+        spans = [item.payload for item in items if item.type == "span"]
         spans = list(
             x
-            for x in event["spans"]
-            if x["op"] == OP.GEN_AI_EMBEDDINGS and x["origin"] == "auto.ai.litellm"
+            for x in spans
+            if x["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS
+            and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
         )
         assert len(spans) == 1
         span = spans[0]
 
-        assert span["op"] == OP.GEN_AI_EMBEDDINGS
-        assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings"
+        assert span["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS
+        assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings"
         # Check that list of embeddings input is captured (it's JSON serialized)
-        embeddings_input = span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
+        embeddings_input = span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
         assert json.loads(embeddings_input) == [
             "First text",
             "Second text",
@@ -640,7 +637,7 @@ def test_embeddings_create_with_list_input(
 @pytest.mark.asyncio(loop_scope="session")
 async def test_async_embeddings_create_with_list_input(
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     openai_embedding_model_response,
     clear_litellm_cache,
@@ -651,7 +648,7 @@ async def test_async_embeddings_create_with_list_input(
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     client = AsyncOpenAI(api_key="test-key")
 
@@ -678,22 +675,21 @@ async def test_async_embeddings_create_with_list_input(
 
         # Response is processed by litellm, so just check it exists
         assert response is not None
-        assert len(events) == 1
-        (event,) = events
 
-        assert event["type"] == "transaction"
+        spans = [item.payload for item in items if item.type == "span"]
         spans = list(
             x
-            for x in event["spans"]
-            if x["op"] == OP.GEN_AI_EMBEDDINGS and x["origin"] == "auto.ai.litellm"
+            for x in spans
+            if x["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS
+            and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
         )
         assert len(spans) == 1
         span = spans[0]
 
-        assert span["op"] == OP.GEN_AI_EMBEDDINGS
-        assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings"
+        assert span["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS
+        assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings"
         # Check that list of embeddings input is captured (it's JSON serialized)
-        embeddings_input = span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
+        embeddings_input = span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
         assert json.loads(embeddings_input) == [
             "First text",
             "Second text",
@@ -703,7 +699,7 @@ async def test_async_embeddings_create_with_list_input(
 
 def test_embeddings_no_pii(
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     openai_embedding_model_response,
     clear_litellm_cache,
@@ -714,7 +710,7 @@ def test_embeddings_no_pii(
         traces_sample_rate=1.0,
         send_default_pii=False,  # PII disabled
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     client = OpenAI(api_key="test-key")
 
@@ -740,27 +736,26 @@ def test_embeddings_no_pii(
 
         # Response is processed by litellm, so just check it exists
         assert response is not None
-        assert len(events) == 1
-        (event,) = events
 
-        assert event["type"] == "transaction"
+        spans = [item.payload for item in items if item.type == "span"]
         spans = list(
             x
-            for x in event["spans"]
-            if x["op"] == OP.GEN_AI_EMBEDDINGS and x["origin"] == "auto.ai.litellm"
+            for x in spans
+            if x["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS
+            and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
         )
         assert len(spans) == 1
         span = spans[0]
 
-        assert span["op"] == OP.GEN_AI_EMBEDDINGS
+        assert span["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS
         # Check that embeddings input is NOT captured when PII is disabled
-        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["data"]
+        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["attributes"]
 
 
 @pytest.mark.asyncio(loop_scope="session")
 async def test_async_embeddings_no_pii(
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     openai_embedding_model_response,
     clear_litellm_cache,
@@ -771,7 +766,7 @@ async def test_async_embeddings_no_pii(
         traces_sample_rate=1.0,
         send_default_pii=False,  # PII disabled
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     client = AsyncOpenAI(api_key="test-key")
 
@@ -798,31 +793,30 @@ async def test_async_embeddings_no_pii(
 
         # Response is processed by litellm, so just check it exists
         assert response is not None
-        assert len(events) == 1
-        (event,) = events
 
-        assert event["type"] == "transaction"
+        spans = [item.payload for item in items if item.type == "span"]
         spans = list(
             x
-            for x in event["spans"]
-            if x["op"] == OP.GEN_AI_EMBEDDINGS and x["origin"] == "auto.ai.litellm"
+            for x in spans
+            if x["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS
+            and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
         )
         assert len(spans) == 1
         span = spans[0]
 
-        assert span["op"] == OP.GEN_AI_EMBEDDINGS
+        assert span["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS
         # Check that embeddings input is NOT captured when PII is disabled
-        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["data"]
+        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["attributes"]
 
 
 def test_exception_handling(
-    reset_litellm_executor, sentry_init, capture_events, get_rate_limit_model_response
+    reset_litellm_executor, sentry_init, capture_items, get_rate_limit_model_response
 ):
     sentry_init(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
     )
-    events = capture_events()
+    items = capture_items("event")
 
     messages = [{"role": "user", "content": "Hello!"}]
 
@@ -843,22 +837,24 @@ def test_exception_handling(
                     client=client,
                 )
 
-    # Should have error event and transaction
-    assert len(events) >= 1
     # Find the error event
-    error_events = [e for e in events if e.get("level") == "error"]
+    error_events = [
+        item.payload
+        for item in items
+        if item.type == "event" and item.payload.get("level") == "error"
+    ]
     assert len(error_events) == 1
 
 
 @pytest.mark.asyncio(loop_scope="session")
 async def test_async_exception_handling(
-    sentry_init, capture_events, get_rate_limit_model_response
+    sentry_init, capture_items, get_rate_limit_model_response
 ):
     sentry_init(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
     )
-    events = capture_events()
+    items = capture_items("event")
 
     messages = [{"role": "user", "content": "Hello!"}]
 
@@ -879,17 +875,19 @@ async def test_async_exception_handling(
                     client=client,
                 )
 
-    # Should have error event and transaction
-    assert len(events) >= 1
     # Find the error event
-    error_events = [e for e in events if e.get("level") == "error"]
+    error_events = [
+        item.payload
+        for item in items
+        if item.type == "event" and item.payload.get("level") == "error"
+    ]
     assert len(error_events) == 1
 
 
 def test_span_origin(
     reset_litellm_executor,
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     nonstreaming_chat_completions_model_response,
 ):
@@ -897,7 +895,7 @@ def test_span_origin(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [{"role": "user", "content": "Hello!"}]
 
@@ -923,16 +921,17 @@ def test_span_origin(
 
             litellm_utils.executor.shutdown(wait=True)
 
-    (event,) = events
-
+    (event,) = (item.payload for item in items if item.type == "transaction")
     assert event["contexts"]["trace"]["origin"] == "manual"
-    assert event["spans"][0]["origin"] == "auto.ai.litellm"
+
+    spans = [item.payload for item in items if item.type == "span"]
+    assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.litellm"
 
 
 def test_multiple_providers(
     reset_litellm_executor,
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     nonstreaming_chat_completions_model_response,
     nonstreaming_anthropic_model_response,
@@ -943,7 +942,7 @@ def test_multiple_providers(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
     )
-    events = capture_events()
+    items = capture_items("transaction")
 
     messages = [{"role": "user", "content": "Hello!"}]
 
@@ -1015,18 +1014,19 @@ def test_multiple_providers(
 
             litellm_utils.executor.shutdown(wait=True)
 
+    events = [item.payload for item in items if item.type == "transaction"]
     assert len(events) == 3
 
-    for i in range(3):
-        span = events[i]["spans"][0]
+    spans = [item.payload for item in items if item.type == "span"]
+    for span in spans:
         # The provider should be detected by litellm.get_llm_provider
-        assert SPANDATA.GEN_AI_SYSTEM in span["data"]
+        assert SPANDATA.GEN_AI_SYSTEM in span["attributes"]
 
 
 @pytest.mark.asyncio(loop_scope="session")
 async def test_async_multiple_providers(
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     nonstreaming_chat_completions_model_response,
     nonstreaming_anthropic_model_response,
@@ -1037,7 +1037,7 @@ async def test_async_multiple_providers(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [{"role": "user", "content": "Hello!"}]
 
@@ -1112,18 +1112,19 @@ async def test_async_multiple_providers(
             await GLOBAL_LOGGING_WORKER.flush()
             await asyncio.sleep(0.5)
 
+    events = [item.payload for item in items if item.type == "transaction"]
     assert len(events) == 3
 
-    for i in range(3):
-        span = events[i]["spans"][0]
+    spans = [item.payload for item in items if item.type == "span"]
+    for span in spans:
         # The provider should be detected by litellm.get_llm_provider
-        assert SPANDATA.GEN_AI_SYSTEM in span["data"]
+        assert SPANDATA.GEN_AI_SYSTEM in span["attributes"]
 
 
 def test_additional_parameters(
     reset_litellm_executor,
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     nonstreaming_chat_completions_model_response,
 ):
@@ -1132,7 +1133,7 @@ def test_additional_parameters(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [{"role": "user", "content": "Hello!"}]
     client = OpenAI(api_key="test-key")
@@ -1162,26 +1163,27 @@ def test_additional_parameters(
 
             litellm_utils.executor.shutdown(wait=True)
 
-    (event,) = events
+    spans = [item.payload for item in items if item.type == "span"]
     chat_spans = list(
         x
-        for x in event["spans"]
-        if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm"
+        for x in spans
+        if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
     )
     assert len(chat_spans) == 1
     span = chat_spans[0]
 
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.5
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.5
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.5
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.5
 
 
 @pytest.mark.asyncio(loop_scope="session")
 async def test_async_additional_parameters(
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     nonstreaming_chat_completions_model_response,
 ):
@@ -1190,7 +1192,7 @@ async def test_async_additional_parameters(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [{"role": "user", "content": "Hello!"}]
     client = AsyncOpenAI(api_key="test-key")
@@ -1221,26 +1223,27 @@ async def test_async_additional_parameters(
             await GLOBAL_LOGGING_WORKER.flush()
             await asyncio.sleep(0.5)
 
-    (event,) = events
+    spans = [item.payload for item in items if item.type == "span"]
     chat_spans = list(
         x
-        for x in event["spans"]
-        if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm"
+        for x in spans
+        if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
     )
     assert len(chat_spans) == 1
     span = chat_spans[0]
 
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.5
-    assert span["data"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.5
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.5
+    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.5
 
 
 def test_no_integration(
     reset_litellm_executor,
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     nonstreaming_chat_completions_model_response,
 ):
@@ -1248,7 +1251,7 @@ def test_no_integration(
     sentry_init(
         traces_sample_rate=1.0,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [{"role": "user", "content": "Hello!"}]
     client = OpenAI(api_key="test-key")
@@ -1273,13 +1276,12 @@ def test_no_integration(
 
             litellm_utils.executor.shutdown(wait=True)
 
-    (event,) = events
-    # Should still have the transaction, but no child spans since integration is off
-    assert event["type"] == "transaction"
+    spans = [item.payload for item in items if item.type == "span"]
     chat_spans = list(
         x
-        for x in event["spans"]
-        if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm"
+        for x in spans
+        if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
     )
     assert len(chat_spans) == 0
 
@@ -1287,7 +1289,7 @@ def test_no_integration(
 @pytest.mark.asyncio(loop_scope="session")
 async def test_async_no_integration(
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     nonstreaming_chat_completions_model_response,
 ):
@@ -1295,7 +1297,7 @@ async def test_async_no_integration(
     sentry_init(
         traces_sample_rate=1.0,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [{"role": "user", "content": "Hello!"}]
     client = AsyncOpenAI(api_key="test-key")
@@ -1321,24 +1323,23 @@ async def test_async_no_integration(
             await GLOBAL_LOGGING_WORKER.flush()
             await asyncio.sleep(0.5)
 
-    (event,) = events
-    # Should still have the transaction, but no child spans since integration is off
-    assert event["type"] == "transaction"
+    spans = [item.payload for item in items if item.type == "span"]
     chat_spans = list(
         x
-        for x in event["spans"]
-        if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm"
+        for x in spans
+        if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
     )
     assert len(chat_spans) == 0
 
 
-def test_response_without_usage(sentry_init, capture_events):
+def test_response_without_usage(sentry_init, capture_items):
     """Test handling of responses without usage information."""
     sentry_init(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [{"role": "user", "content": "Hello!"}]
 
@@ -1366,12 +1367,11 @@ def test_response_without_usage(sentry_init, capture_events):
             datetime.now(),
         )
 
-    (event,) = events
-    (span,) = event["spans"]
+    (span,) = (item.payload for item in items if item.type == "span")
 
     # Span should still be created even without usage info
-    assert span["op"] == OP.GEN_AI_CHAT
-    assert span["description"] == "chat gpt-3.5-turbo"
+    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    assert span["name"] == "chat gpt-3.5-turbo"
 
 
 def test_integration_setup(sentry_init):
@@ -1387,14 +1387,14 @@ def test_integration_setup(sentry_init):
     assert _failure_callback in (litellm.failure_callback or [])
 
 
-def test_litellm_message_truncation(sentry_init, capture_events):
+def test_litellm_message_truncation(sentry_init, capture_items):
     """Test that large messages are truncated properly in LiteLLM integration."""
     sentry_init(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     large_content = (
         "This is a very long message that will exceed our size limits. " * 1000
@@ -1422,25 +1422,24 @@ def test_litellm_message_truncation(sentry_init, capture_events):
             datetime.now(),
         )
 
-    assert len(events) > 0
-    tx = events[0]
-    assert tx["type"] == "transaction"
-
+    spans = [item.payload for item in items if item.type == "span"]
     chat_spans = [
-        span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT
+        span for span in spans if span["attributes"].get("sentry.op") == OP.GEN_AI_CHAT
     ]
     assert len(chat_spans) > 0
 
     chat_span = chat_spans[0]
-    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"]
+    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["attributes"]
 
-    messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+    messages_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
     assert isinstance(messages_data, str)
 
     parsed_messages = json.loads(messages_data)
     assert isinstance(parsed_messages, list)
     assert len(parsed_messages) == 1
     assert "small message 5" in str(parsed_messages[0])
+
+    tx = next(item.payload for item in items if item.type == "transaction")
     assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5
 
 
@@ -1452,7 +1451,7 @@ def test_litellm_message_truncation(sentry_init, capture_events):
 def test_binary_content_encoding_image_url(
     reset_litellm_executor,
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     nonstreaming_chat_completions_model_response,
 ):
@@ -1461,7 +1460,7 @@ def test_binary_content_encoding_image_url(
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -1498,15 +1497,16 @@ def test_binary_content_encoding_image_url(
 
             litellm_utils.executor.shutdown(wait=True)
 
-    (event,) = events
+    spans = [item.payload for item in items if item.type == "span"]
     chat_spans = list(
         x
-        for x in event["spans"]
-        if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm"
+        for x in spans
+        if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
     )
     assert len(chat_spans) == 1
     span = chat_spans[0]
-    messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    messages_data = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
 
     blob_item = next(
         (
@@ -1530,7 +1530,7 @@ def test_binary_content_encoding_image_url(
 @pytest.mark.asyncio(loop_scope="session")
 async def test_async_binary_content_encoding_image_url(
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     nonstreaming_chat_completions_model_response,
 ):
@@ -1539,7 +1539,7 @@ async def test_async_binary_content_encoding_image_url(
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -1577,15 +1577,16 @@ async def test_async_binary_content_encoding_image_url(
             await GLOBAL_LOGGING_WORKER.flush()
             await asyncio.sleep(0.5)
 
-    (event,) = events
+    spans = [item.payload for item in items if item.type == "span"]
     chat_spans = list(
         x
-        for x in event["spans"]
-        if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm"
+        for x in spans
+        if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
     )
     assert len(chat_spans) == 1
     span = chat_spans[0]
-    messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    messages_data = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
 
     blob_item = next(
         (
@@ -1609,7 +1610,7 @@ async def test_async_binary_content_encoding_image_url(
 def test_binary_content_encoding_mixed_content(
     reset_litellm_executor,
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     nonstreaming_chat_completions_model_response,
 ):
@@ -1618,7 +1619,7 @@ def test_binary_content_encoding_mixed_content(
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -1656,15 +1657,16 @@ def test_binary_content_encoding_mixed_content(
 
             litellm_utils.executor.shutdown(wait=True)
 
-    (event,) = events
+    spans = [item.payload for item in items if item.type == "span"]
     chat_spans = list(
         x
-        for x in event["spans"]
-        if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm"
+        for x in spans
+        if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
     )
     assert len(chat_spans) == 1
     span = chat_spans[0]
-    messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    messages_data = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
 
     content_items = [
         item for msg in messages_data if "content" in msg for item in msg["content"]
@@ -1676,7 +1678,7 @@ def test_binary_content_encoding_mixed_content(
 @pytest.mark.asyncio(loop_scope="session")
 async def test_async_binary_content_encoding_mixed_content(
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     nonstreaming_chat_completions_model_response,
 ):
@@ -1685,7 +1687,7 @@ async def test_async_binary_content_encoding_mixed_content(
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -1724,15 +1726,16 @@ async def test_async_binary_content_encoding_mixed_content(
             await GLOBAL_LOGGING_WORKER.flush()
             await asyncio.sleep(0.5)
 
-    (event,) = events
+    spans = [item.payload for item in items if item.type == "span"]
     chat_spans = list(
         x
-        for x in event["spans"]
-        if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm"
+        for x in spans
+        if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
     )
     assert len(chat_spans) == 1
     span = chat_spans[0]
-    messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    messages_data = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
 
     content_items = [
         item for msg in messages_data if "content" in msg for item in msg["content"]
@@ -1744,7 +1747,7 @@ async def test_async_binary_content_encoding_mixed_content(
 def test_binary_content_encoding_uri_type(
     reset_litellm_executor,
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     nonstreaming_chat_completions_model_response,
 ):
@@ -1753,7 +1756,7 @@ def test_binary_content_encoding_uri_type(
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -1789,15 +1792,16 @@ def test_binary_content_encoding_uri_type(
 
             litellm_utils.executor.shutdown(wait=True)
 
-    (event,) = events
+    spans = [item.payload for item in items if item.type == "span"]
     chat_spans = list(
         x
-        for x in event["spans"]
-        if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm"
+        for x in spans
+        if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
     )
     assert len(chat_spans) == 1
     span = chat_spans[0]
-    messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    messages_data = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
 
     uri_item = next(
         (
@@ -1816,7 +1820,7 @@ def test_binary_content_encoding_uri_type(
 @pytest.mark.asyncio(loop_scope="session")
 async def test_async_binary_content_encoding_uri_type(
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
     nonstreaming_chat_completions_model_response,
 ):
@@ -1825,7 +1829,7 @@ async def test_async_binary_content_encoding_uri_type(
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -1862,15 +1866,16 @@ async def test_async_binary_content_encoding_uri_type(
             await GLOBAL_LOGGING_WORKER.flush()
             await asyncio.sleep(0.5)
 
-    (event,) = events
+    spans = [item.payload for item in items if item.type == "span"]
     chat_spans = list(
         x
-        for x in event["spans"]
-        if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm"
+        for x in spans
+        if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
     )
     assert len(chat_spans) == 1
     span = chat_spans[0]
-    messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    messages_data = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
 
     uri_item = next(
         (

From b92ae36dcfa27debc12b7c5bcaa7793434fec187 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Fri, 17 Apr 2026 10:12:04 +0200
Subject: [PATCH 14/84] test huggingface_hub

---
 .../huggingface_hub/test_huggingface_hub.py   | 231 +++++++++++-------
 1 file changed, 139 insertions(+), 92 deletions(-)

diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py
index 9dd15ca4b5..6b4402bc52 100644
--- a/tests/integrations/huggingface_hub/test_huggingface_hub.py
+++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py
@@ -471,7 +471,7 @@ def mock_hf_chat_completion_api_streaming_tools(httpx_mock):
 @pytest.mark.parametrize("include_prompts", [True, False])
 def test_text_generation(
     sentry_init: "Any",
-    capture_events: "Any",
+    capture_items: "Any",
     send_default_pii: "Any",
     include_prompts: "Any",
     mock_hf_text_generation_api: "Any",
@@ -481,7 +481,7 @@ def test_text_generation(
         send_default_pii=send_default_pii,
         integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)],
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     client = InferenceClient(model="test-model")
 
@@ -492,23 +492,22 @@ def test_text_generation(
             details=True,
         )
 
-    (transaction,) = events
-
+    spans = [item.payload for item in items if item.type == "span"]
     span = None
-    for sp in transaction["spans"]:
-        if sp["op"].startswith("gen_ai"):
+    for sp in spans:
+        if sp["attributes"]["sentry.op"].startswith("gen_ai"):
             assert span is None, "there is exactly one gen_ai span"
             span = sp
         else:
             # there should be no other spans, just the gen_ai span
             # and optionally some http.client spans from talking to the hf api
-            assert sp["op"] == "http.client"
+            assert sp["attributes"]["sentry.op"] == "http.client"
 
     assert span is not None
 
-    assert span["op"] == "gen_ai.text_completion"
-    assert span["description"] == "text_completion test-model"
-    assert span["origin"] == "auto.ai.huggingface_hub"
+    assert span["attributes"]["sentry.op"] == "gen_ai.text_completion"
+    assert span["name"] == "text_completion test-model"
+    assert span["attributes"]["sentry.origin"] == "auto.ai.huggingface_hub"
 
     expected_data = {
         "gen_ai.operation.name": "text_completion",
@@ -516,6 +515,14 @@ def test_text_generation(
         "gen_ai.response.finish_reasons": "length",
         "gen_ai.response.streaming": False,
         "gen_ai.usage.total_tokens": 10,
+        "sentry.environment": "production",
+        "sentry.op": "gen_ai.text_completion",
+        "sentry.origin": "auto.ai.huggingface_hub",
+        "sentry.release": mock.ANY,
+        "sentry.sdk.name": "sentry.python",
+        "sentry.sdk.version": "2.58.0",
+        "sentry.segment.id": mock.ANY,
+        "sentry.segment.name": "test",
         "thread.id": mock.ANY,
         "thread.name": mock.ANY,
     }
@@ -528,10 +535,10 @@ def test_text_generation(
         assert "gen_ai.request.messages" not in expected_data
         assert "gen_ai.response.text" not in expected_data
 
-    assert span["data"] == expected_data
+    assert span["attributes"] == expected_data
 
     # text generation does not set the response model
-    assert "gen_ai.response.model" not in span["data"]
+    assert "gen_ai.response.model" not in span["attributes"]
 
 
 @pytest.mark.httpx_mock(assert_all_requests_were_expected=False)
@@ -539,7 +546,7 @@ def test_text_generation(
 @pytest.mark.parametrize("include_prompts", [True, False])
 def test_text_generation_streaming(
     sentry_init: "Any",
-    capture_events: "Any",
+    capture_items: "Any",
     send_default_pii: "Any",
     include_prompts: "Any",
     mock_hf_text_generation_api_streaming: "Any",
@@ -549,7 +556,7 @@ def test_text_generation_streaming(
         send_default_pii=send_default_pii,
         integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)],
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     client = InferenceClient(model="test-model")
 
@@ -561,23 +568,22 @@ def test_text_generation_streaming(
         ):
             pass
 
-    (transaction,) = events
-
+    spans = [item.payload for item in items if item.type == "span"]
     span = None
-    for sp in transaction["spans"]:
-        if sp["op"].startswith("gen_ai"):
+    for sp in spans:
+        if sp["attributes"]["sentry.op"].startswith("gen_ai"):
             assert span is None, "there is exactly one gen_ai span"
             span = sp
         else:
             # there should be no other spans, just the gen_ai span
             # and optionally some http.client spans from talking to the hf api
-            assert sp["op"] == "http.client"
+            assert sp["attributes"]["sentry.op"] == "http.client"
 
     assert span is not None
 
-    assert span["op"] == "gen_ai.text_completion"
-    assert span["description"] == "text_completion test-model"
-    assert span["origin"] == "auto.ai.huggingface_hub"
+    assert span["attributes"]["sentry.op"] == "gen_ai.text_completion"
+    assert span["name"] == "text_completion test-model"
+    assert span["attributes"]["sentry.origin"] == "auto.ai.huggingface_hub"
 
     expected_data = {
         "gen_ai.operation.name": "text_completion",
@@ -585,6 +591,14 @@ def test_text_generation_streaming(
         "gen_ai.response.finish_reasons": "length",
         "gen_ai.response.streaming": True,
         "gen_ai.usage.total_tokens": 10,
+        "sentry.environment": "production",
+        "sentry.op": "gen_ai.text_completion",
+        "sentry.origin": "auto.ai.huggingface_hub",
+        "sentry.release": mock.ANY,
+        "sentry.sdk.name": "sentry.python",
+        "sentry.sdk.version": "2.58.0",
+        "sentry.segment.id": mock.ANY,
+        "sentry.segment.name": "test",
         "thread.id": mock.ANY,
         "thread.name": mock.ANY,
     }
@@ -597,10 +611,10 @@ def test_text_generation_streaming(
         assert "gen_ai.request.messages" not in expected_data
         assert "gen_ai.response.text" not in expected_data
 
-    assert span["data"] == expected_data
+    assert span["attributes"] == expected_data
 
     # text generation does not set the response model
-    assert "gen_ai.response.model" not in span["data"]
+    assert "gen_ai.response.model" not in span["attributes"]
 
 
 @pytest.mark.httpx_mock(assert_all_requests_were_expected=False)
@@ -608,7 +622,7 @@ def test_text_generation_streaming(
 @pytest.mark.parametrize("include_prompts", [True, False])
 def test_chat_completion(
     sentry_init: "Any",
-    capture_events: "Any",
+    capture_items: "Any",
     send_default_pii: "Any",
     include_prompts: "Any",
     mock_hf_chat_completion_api: "Any",
@@ -618,7 +632,7 @@ def test_chat_completion(
         send_default_pii=send_default_pii,
         integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)],
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     client = get_hf_provider_inference_client()
 
@@ -628,23 +642,22 @@ def test_chat_completion(
             stream=False,
         )
 
-    (transaction,) = events
-
+    spans = [item.payload for item in items if item.type == "span"]
     span = None
-    for sp in transaction["spans"]:
-        if sp["op"].startswith("gen_ai"):
+    for sp in spans:
+        if sp["attributes"]["sentry.op"].startswith("gen_ai"):
             assert span is None, "there is exactly one gen_ai span"
             span = sp
         else:
             # there should be no other spans, just the gen_ai span
             # and optionally some http.client spans from talking to the hf api
-            assert sp["op"] == "http.client"
+            assert sp["attributes"]["sentry.op"] == "http.client"
 
     assert span is not None
 
-    assert span["op"] == "gen_ai.chat"
-    assert span["description"] == "chat test-model"
-    assert span["origin"] == "auto.ai.huggingface_hub"
+    assert span["attributes"]["sentry.op"] == "gen_ai.chat"
+    assert span["name"] == "chat test-model"
+    assert span["attributes"]["sentry.origin"] == "auto.ai.huggingface_hub"
 
     expected_data = {
         "gen_ai.operation.name": "chat",
@@ -655,6 +668,14 @@ def test_chat_completion(
         "gen_ai.usage.input_tokens": 10,
         "gen_ai.usage.output_tokens": 8,
         "gen_ai.usage.total_tokens": 18,
+        "sentry.environment": "production",
+        "sentry.op": "gen_ai.chat",
+        "sentry.origin": "auto.ai.huggingface_hub",
+        "sentry.release": mock.ANY,
+        "sentry.sdk.name": "sentry.python",
+        "sentry.sdk.version": "2.58.0",
+        "sentry.segment.id": mock.ANY,
+        "sentry.segment.name": "test",
         "thread.id": mock.ANY,
         "thread.name": mock.ANY,
     }
@@ -671,7 +692,7 @@ def test_chat_completion(
         assert "gen_ai.request.messages" not in expected_data
         assert "gen_ai.response.text" not in expected_data
 
-    assert span["data"] == expected_data
+    assert span["attributes"] == expected_data
 
 
 @pytest.mark.httpx_mock(assert_all_requests_were_expected=False)
@@ -679,7 +700,7 @@ def test_chat_completion(
 @pytest.mark.parametrize("include_prompts", [True, False])
 def test_chat_completion_streaming(
     sentry_init: "Any",
-    capture_events: "Any",
+    capture_items: "Any",
     send_default_pii: "Any",
     include_prompts: "Any",
     mock_hf_chat_completion_api_streaming: "Any",
@@ -689,7 +710,7 @@ def test_chat_completion_streaming(
         send_default_pii=send_default_pii,
         integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)],
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     client = get_hf_provider_inference_client()
 
@@ -701,23 +722,22 @@ def test_chat_completion_streaming(
             )
         )
 
-    (transaction,) = events
-
+    spans = [item.payload for item in items if item.type == "span"]
     span = None
-    for sp in transaction["spans"]:
-        if sp["op"].startswith("gen_ai"):
+    for sp in spans:
+        if sp["attributes"]["sentry.op"].startswith("gen_ai"):
             assert span is None, "there is exactly one gen_ai span"
             span = sp
         else:
             # there should be no other spans, just the gen_ai span
             # and optionally some http.client spans from talking to the hf api
-            assert sp["op"] == "http.client"
+            assert sp["attributes"]["sentry.op"] == "http.client"
 
     assert span is not None
 
-    assert span["op"] == "gen_ai.chat"
-    assert span["description"] == "chat test-model"
-    assert span["origin"] == "auto.ai.huggingface_hub"
+    assert span["attributes"]["sentry.op"] == "gen_ai.chat"
+    assert span["name"] == "chat test-model"
+    assert span["attributes"]["sentry.origin"] == "auto.ai.huggingface_hub"
 
     expected_data = {
         "gen_ai.operation.name": "chat",
@@ -725,6 +745,14 @@ def test_chat_completion_streaming(
         "gen_ai.response.finish_reasons": "stop",
         "gen_ai.response.model": "test-model-123",
         "gen_ai.response.streaming": True,
+        "sentry.environment": "production",
+        "sentry.op": "gen_ai.chat",
+        "sentry.origin": "auto.ai.huggingface_hub",
+        "sentry.release": mock.ANY,
+        "sentry.sdk.name": "sentry.python",
+        "sentry.sdk.version": "2.58.0",
+        "sentry.segment.id": mock.ANY,
+        "sentry.segment.name": "test",
         "thread.id": mock.ANY,
         "thread.name": mock.ANY,
     }
@@ -744,15 +772,15 @@ def test_chat_completion_streaming(
         assert "gen_ai.request.messages" not in expected_data
         assert "gen_ai.response.text" not in expected_data
 
-    assert span["data"] == expected_data
+    assert span["attributes"] == expected_data
 
 
 @pytest.mark.httpx_mock(assert_all_requests_were_expected=False)
 def test_chat_completion_api_error(
-    sentry_init: "Any", capture_events: "Any", mock_hf_api_with_errors: "Any"
+    sentry_init: "Any", capture_items: "Any", mock_hf_api_with_errors: "Any"
 ) -> None:
     sentry_init(traces_sample_rate=1.0)
-    events = capture_events()
+    items = capture_items("event", "transaction", "span")
 
     client = get_hf_provider_inference_client()
 
@@ -762,32 +790,29 @@ def test_chat_completion_api_error(
                 messages=[{"role": "user", "content": "Hello!"}],
             )
 
-    (
-        error,
-        transaction,
-    ) = events
-
+    (error,) = (item.payload for item in items if item.type == "event")
     assert error["exception"]["values"][0]["mechanism"]["type"] == "huggingface_hub"
     assert not error["exception"]["values"][0]["mechanism"]["handled"]
 
+    spans = [item.payload for item in items if item.type == "span"]
     span = None
-    for sp in transaction["spans"]:
-        if sp["op"].startswith("gen_ai"):
+    for sp in spans:
+        if sp["attributes"]["sentry.op"].startswith("gen_ai"):
             assert span is None, "there is exactly one gen_ai span"
             span = sp
         else:
             # there should be no other spans, just the gen_ai span
             # and optionally some http.client spans from talking to the hf api
-            assert sp["op"] == "http.client"
+            assert sp["attributes"]["sentry.op"] == "http.client"
 
     assert span is not None
 
-    assert span["op"] == "gen_ai.chat"
-    assert span["description"] == "chat test-model"
-    assert span["origin"] == "auto.ai.huggingface_hub"
-    assert span["status"] == "internal_error"
-    assert span.get("tags", {}).get("status") == "internal_error"
+    assert span["attributes"]["sentry.op"] == "gen_ai.chat"
+    assert span["name"] == "chat test-model"
+    assert span["attributes"]["sentry.origin"] == "auto.ai.huggingface_hub"
+    assert span["status"] == "error"
 
+    (transaction,) = (item.payload for item in items if item.type == "transaction")
     assert (
         error["contexts"]["trace"]["trace_id"]
         == transaction["contexts"]["trace"]["trace_id"]
@@ -795,18 +820,26 @@ def test_chat_completion_api_error(
     expected_data = {
         "gen_ai.operation.name": "chat",
         "gen_ai.request.model": "test-model",
+        "sentry.environment": "production",
+        "sentry.op": "gen_ai.chat",
+        "sentry.origin": "auto.ai.huggingface_hub",
+        "sentry.release": mock.ANY,
+        "sentry.sdk.name": "sentry.python",
+        "sentry.sdk.version": "2.58.0",
+        "sentry.segment.id": mock.ANY,
+        "sentry.segment.name": "test",
         "thread.id": mock.ANY,
         "thread.name": mock.ANY,
     }
-    assert span["data"] == expected_data
+    assert span["attributes"] == expected_data
 
 
 @pytest.mark.httpx_mock(assert_all_requests_were_expected=False)
 def test_span_status_error(
-    sentry_init: "Any", capture_events: "Any", mock_hf_api_with_errors: "Any"
+    sentry_init: "Any", capture_items: "Any", mock_hf_api_with_errors: "Any"
 ) -> None:
     sentry_init(traces_sample_rate=1.0)
-    events = capture_events()
+    items = capture_items("event", "transaction", "span")
 
     client = get_hf_provider_inference_client()
 
@@ -816,22 +849,22 @@ def test_span_status_error(
                 messages=[{"role": "user", "content": "Hello!"}],
             )
 
-    (error, transaction) = events
+    (error,) = [item.payload for item in items if item.type == "event"]
     assert error["level"] == "error"
 
+    spans = [item.payload for item in items if item.type == "span"]
     span = None
-    for sp in transaction["spans"]:
-        if sp["op"].startswith("gen_ai"):
+    for sp in spans:
+        if sp["attributes"]["sentry.op"].startswith("gen_ai"):
             assert span is None, "there is exactly one gen_ai span"
             span = sp
         else:
             # there should be no other spans, just the gen_ai span
             # and optionally some http.client spans from talking to the hf api
-            assert sp["op"] == "http.client"
+            assert sp["attributes"]["sentry.op"] == "http.client"
 
     assert span is not None
-    assert span["status"] == "internal_error"
-    assert span["tags"]["status"] == "internal_error"
+    assert span["status"] == "error"
 
 
 @pytest.mark.httpx_mock(assert_all_requests_were_expected=False)
@@ -839,7 +872,7 @@ def test_span_status_error(
 @pytest.mark.parametrize("include_prompts", [True, False])
 def test_chat_completion_with_tools(
     sentry_init: "Any",
-    capture_events: "Any",
+    capture_items: "Any",
     send_default_pii: "Any",
     include_prompts: "Any",
     mock_hf_chat_completion_api_tools: "Any",
@@ -849,7 +882,7 @@ def test_chat_completion_with_tools(
         send_default_pii=send_default_pii,
         integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)],
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     client = get_hf_provider_inference_client()
 
@@ -875,23 +908,22 @@ def test_chat_completion_with_tools(
             tool_choice="auto",
         )
 
-    (transaction,) = events
-
+    spans = [item.payload for item in items if item.type == "span"]
     span = None
-    for sp in transaction["spans"]:
-        if sp["op"].startswith("gen_ai"):
+    for sp in spans:
+        if sp["attributes"]["sentry.op"].startswith("gen_ai"):
             assert span is None, "there is exactly one gen_ai span"
             span = sp
         else:
             # there should be no other spans, just the gen_ai span
             # and optionally some http.client spans from talking to the hf api
-            assert sp["op"] == "http.client"
+            assert sp["attributes"]["sentry.op"] == "http.client"
 
     assert span is not None
 
-    assert span["op"] == "gen_ai.chat"
-    assert span["description"] == "chat test-model"
-    assert span["origin"] == "auto.ai.huggingface_hub"
+    assert span["attributes"]["sentry.op"] == "gen_ai.chat"
+    assert span["name"] == "chat test-model"
+    assert span["attributes"]["sentry.origin"] == "auto.ai.huggingface_hub"
 
     expected_data = {
         "gen_ai.operation.name": "chat",
@@ -902,6 +934,14 @@ def test_chat_completion_with_tools(
         "gen_ai.usage.input_tokens": 10,
         "gen_ai.usage.output_tokens": 8,
         "gen_ai.usage.total_tokens": 18,
+        "sentry.environment": "production",
+        "sentry.op": "gen_ai.chat",
+        "sentry.origin": "auto.ai.huggingface_hub",
+        "sentry.release": mock.ANY,
+        "sentry.sdk.name": "sentry.python",
+        "sentry.sdk.version": "2.58.0",
+        "sentry.segment.id": mock.ANY,
+        "sentry.segment.name": "test",
         "thread.id": mock.ANY,
         "thread.name": mock.ANY,
     }
@@ -919,7 +959,7 @@ def test_chat_completion_with_tools(
         assert "gen_ai.response.text" not in expected_data
         assert "gen_ai.response.tool_calls" not in expected_data
 
-    assert span["data"] == expected_data
+    assert span["attributes"] == expected_data
 
 
 @pytest.mark.httpx_mock(assert_all_requests_were_expected=False)
@@ -927,7 +967,7 @@ def test_chat_completion_with_tools(
 @pytest.mark.parametrize("include_prompts", [True, False])
 def test_chat_completion_streaming_with_tools(
     sentry_init: "Any",
-    capture_events: "Any",
+    capture_items: "Any",
     send_default_pii: "Any",
     include_prompts: "Any",
     mock_hf_chat_completion_api_streaming_tools: "Any",
@@ -937,7 +977,7 @@ def test_chat_completion_streaming_with_tools(
         send_default_pii=send_default_pii,
         integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)],
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     client = get_hf_provider_inference_client()
 
@@ -966,23 +1006,22 @@ def test_chat_completion_streaming_with_tools(
             )
         )
 
-    (transaction,) = events
-
+    spans = [item.payload for item in items if item.type == "span"]
     span = None
-    for sp in transaction["spans"]:
-        if sp["op"].startswith("gen_ai"):
+    for sp in spans:
+        if sp["attributes"]["sentry.op"].startswith("gen_ai"):
             assert span is None, "there is exactly one gen_ai span"
             span = sp
         else:
             # there should be no other spans, just the gen_ai span
             # and optionally some http.client spans from talking to the hf api
-            assert sp["op"] == "http.client"
+            assert sp["attributes"]["sentry.op"] == "http.client"
 
     assert span is not None
 
-    assert span["op"] == "gen_ai.chat"
-    assert span["description"] == "chat test-model"
-    assert span["origin"] == "auto.ai.huggingface_hub"
+    assert span["attributes"]["sentry.op"] == "gen_ai.chat"
+    assert span["name"] == "chat test-model"
+    assert span["attributes"]["sentry.origin"] == "auto.ai.huggingface_hub"
 
     expected_data = {
         "gen_ai.operation.name": "chat",
@@ -991,6 +1030,14 @@ def test_chat_completion_streaming_with_tools(
         "gen_ai.response.finish_reasons": "tool_calls",
         "gen_ai.response.model": "test-model-123",
         "gen_ai.response.streaming": True,
+        "sentry.environment": "production",
+        "sentry.op": "gen_ai.chat",
+        "sentry.origin": "auto.ai.huggingface_hub",
+        "sentry.release": mock.ANY,
+        "sentry.sdk.name": "sentry.python",
+        "sentry.sdk.version": "2.58.0",
+        "sentry.segment.id": mock.ANY,
+        "sentry.segment.name": "test",
         "thread.id": mock.ANY,
         "thread.name": mock.ANY,
     }
@@ -1014,4 +1061,4 @@ def test_chat_completion_streaming_with_tools(
         assert "gen_ai.response.text" not in expected_data
         assert "gen_ai.response.tool_calls" not in expected_data
 
-    assert span["data"] == expected_data
+    assert span["attributes"] == expected_data

From 907ca1d981ac652ce8e31015f5addd4af04316c1 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Fri, 17 Apr 2026 10:31:41 +0200
Subject: [PATCH 15/84] test langchain

---
 .../integrations/langchain/test_langchain.py  | 590 ++++++++++--------
 1 file changed, 319 insertions(+), 271 deletions(-)

diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py
index 498a5d6f4a..f709d12129 100644
--- a/tests/integrations/langchain/test_langchain.py
+++ b/tests/integrations/langchain/test_langchain.py
@@ -97,7 +97,7 @@ def _llm_type(self) -> str:
 
 def test_langchain_text_completion(
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
 ):
     sentry_init(
@@ -109,7 +109,7 @@ def test_langchain_text_completion(
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     model_response = get_model_response(
         Completion(
@@ -149,25 +149,29 @@ def test_langchain_text_completion(
             input_text = "What is the capital of France?"
             model.invoke(input_text, config={"run_name": "my-snazzy-pipeline"})
 
-    tx = events[0]
+    tx = next(item.payload for item in items if item.type == "transaction")
     assert tx["type"] == "transaction"
 
+    spans = [item.payload for item in items if item.type == "span"]
     llm_spans = [
         span
-        for span in tx.get("spans", [])
-        if span.get("op") == "gen_ai.text_completion"
+        for span in spans
+        if span["attributes"].get("sentry.op") == "gen_ai.text_completion"
     ]
     assert len(llm_spans) > 0
 
     llm_span = llm_spans[0]
-    assert llm_span["description"] == "text_completion gpt-3.5-turbo"
-    assert llm_span["data"]["gen_ai.system"] == "openai"
-    assert llm_span["data"]["gen_ai.pipeline.name"] == "my-snazzy-pipeline"
-    assert llm_span["data"]["gen_ai.request.model"] == "gpt-3.5-turbo"
-    assert llm_span["data"]["gen_ai.response.text"] == "The capital of France is Paris."
-    assert llm_span["data"]["gen_ai.usage.total_tokens"] == 25
-    assert llm_span["data"]["gen_ai.usage.input_tokens"] == 10
-    assert llm_span["data"]["gen_ai.usage.output_tokens"] == 15
+    assert llm_span["name"] == "text_completion gpt-3.5-turbo"
+    assert llm_span["attributes"]["gen_ai.system"] == "openai"
+    assert llm_span["attributes"]["gen_ai.pipeline.name"] == "my-snazzy-pipeline"
+    assert llm_span["attributes"]["gen_ai.request.model"] == "gpt-3.5-turbo"
+    assert (
+        llm_span["attributes"]["gen_ai.response.text"]
+        == "The capital of France is Paris."
+    )
+    assert llm_span["attributes"]["gen_ai.usage.total_tokens"] == 25
+    assert llm_span["attributes"]["gen_ai.usage.input_tokens"] == 10
+    assert llm_span["attributes"]["gen_ai.usage.output_tokens"] == 15
 
 
 @pytest.mark.skipif(
@@ -196,7 +200,7 @@ def test_langchain_text_completion(
 )
 def test_langchain_create_agent(
     sentry_init,
-    capture_events,
+    capture_items,
     send_default_pii,
     include_prompts,
     system_instructions_content,
@@ -213,7 +217,7 @@ def test_langchain_create_agent(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     model_response = get_model_response(
         nonstreaming_responses_model_response,
@@ -250,22 +254,23 @@ def test_langchain_create_agent(
                 },
             )
 
-    tx = events[0]
+    tx = next(item.payload for item in items if item.type == "transaction")
     assert tx["type"] == "transaction"
     assert tx["contexts"]["trace"]["origin"] == "manual"
 
-    chat_spans = list(x for x in tx["spans"] if x["op"] == "gen_ai.chat")
+    spans = [item.payload for item in items if item.type == "span"]
+    chat_spans = list(x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.chat")
     assert len(chat_spans) == 1
-    assert chat_spans[0]["origin"] == "auto.ai.langchain"
+    assert chat_spans[0]["attributes"]["sentry.origin"] == "auto.ai.langchain"
 
-    assert chat_spans[0]["data"]["gen_ai.system"] == "openai-chat"
-    assert chat_spans[0]["data"]["gen_ai.usage.input_tokens"] == 10
-    assert chat_spans[0]["data"]["gen_ai.usage.output_tokens"] == 20
-    assert chat_spans[0]["data"]["gen_ai.usage.total_tokens"] == 30
+    assert chat_spans[0]["attributes"]["gen_ai.system"] == "openai-chat"
+    assert chat_spans[0]["attributes"]["gen_ai.usage.input_tokens"] == 10
+    assert chat_spans[0]["attributes"]["gen_ai.usage.output_tokens"] == 20
+    assert chat_spans[0]["attributes"]["gen_ai.usage.total_tokens"] == 30
 
     if send_default_pii and include_prompts:
         assert (
-            chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+            chat_spans[0]["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
             == "Hello, how can I help you?"
         )
 
@@ -276,7 +281,9 @@ def test_langchain_create_agent(
                     "type": "text",
                     "content": "You are very powerful assistant, but don't know current events",
                 }
-            ] == json.loads(chat_spans[0]["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS])
+            ] == json.loads(
+                chat_spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
+            )
         else:
             assert [
                 {
@@ -287,11 +294,17 @@ def test_langchain_create_agent(
                     "type": "text",
                     "content": "Be concise and clear.",
                 },
-            ] == json.loads(chat_spans[0]["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS])
+            ] == json.loads(
+                chat_spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
+            )
     else:
-        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[0].get("data", {})
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[0].get("data", {})
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[0].get("data", {})
+        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[0].get(
+            "attributes", {}
+        )
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[0].get(
+            "attributes", {}
+        )
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[0].get("attributes", {})
 
 
 @pytest.mark.skipif(
@@ -309,7 +322,7 @@ def test_langchain_create_agent(
 )
 def test_tool_execution_span(
     sentry_init,
-    capture_events,
+    capture_items,
     send_default_pii,
     include_prompts,
     get_model_response,
@@ -324,7 +337,7 @@ def test_tool_execution_span(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     responses = responses_tool_call_model_responses(
         tool_name="get_word_length",
@@ -400,60 +413,71 @@ def test_tool_execution_span(
                 },
             )
 
-    tx = events[0]
+    tx = next(item.payload for item in items if item.type == "transaction")
     assert tx["type"] == "transaction"
     assert tx["contexts"]["trace"]["origin"] == "manual"
 
-    chat_spans = list(x for x in tx["spans"] if x["op"] == "gen_ai.chat")
+    spans = [item.payload for item in items if item.type == "span"]
+    chat_spans = list(x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.chat")
     assert len(chat_spans) == 2
 
-    tool_exec_spans = list(x for x in tx["spans"] if x["op"] == "gen_ai.execute_tool")
+    tool_exec_spans = list(
+        x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.execute_tool"
+    )
     assert len(tool_exec_spans) == 1
     tool_exec_span = tool_exec_spans[0]
 
-    assert chat_spans[0]["origin"] == "auto.ai.langchain"
-    assert chat_spans[1]["origin"] == "auto.ai.langchain"
-    assert tool_exec_span["origin"] == "auto.ai.langchain"
+    assert chat_spans[0]["attributes"]["sentry.origin"] == "auto.ai.langchain"
+    assert chat_spans[1]["attributes"]["sentry.origin"] == "auto.ai.langchain"
+    assert tool_exec_span["attributes"]["sentry.origin"] == "auto.ai.langchain"
 
-    assert chat_spans[0]["data"]["gen_ai.usage.input_tokens"] == 142
-    assert chat_spans[0]["data"]["gen_ai.usage.output_tokens"] == 50
-    assert chat_spans[0]["data"]["gen_ai.usage.total_tokens"] == 192
-    assert chat_spans[0]["data"]["gen_ai.system"] == "openai-chat"
+    assert chat_spans[0]["attributes"]["gen_ai.usage.input_tokens"] == 142
+    assert chat_spans[0]["attributes"]["gen_ai.usage.output_tokens"] == 50
+    assert chat_spans[0]["attributes"]["gen_ai.usage.total_tokens"] == 192
+    assert chat_spans[0]["attributes"]["gen_ai.system"] == "openai-chat"
 
-    assert chat_spans[1]["data"]["gen_ai.usage.input_tokens"] == 89
-    assert chat_spans[1]["data"]["gen_ai.usage.output_tokens"] == 28
-    assert chat_spans[1]["data"]["gen_ai.usage.total_tokens"] == 117
-    assert chat_spans[1]["data"]["gen_ai.system"] == "openai-chat"
+    assert chat_spans[1]["attributes"]["gen_ai.usage.input_tokens"] == 89
+    assert chat_spans[1]["attributes"]["gen_ai.usage.output_tokens"] == 28
+    assert chat_spans[1]["attributes"]["gen_ai.usage.total_tokens"] == 117
+    assert chat_spans[1]["attributes"]["gen_ai.system"] == "openai-chat"
 
     if send_default_pii and include_prompts:
-        assert "word" in tool_exec_span["data"][SPANDATA.GEN_AI_TOOL_INPUT]
+        assert "word" in tool_exec_span["attributes"][SPANDATA.GEN_AI_TOOL_INPUT]
 
-        assert "5" in chat_spans[1]["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+        assert "5" in chat_spans[1]["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
 
         # Verify tool calls are recorded when PII is enabled
-        assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in chat_spans[0].get("data", {}), (
+        assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in chat_spans[0].get(
+            "attributes", {}
+        ), (
             "Tool calls should be recorded when send_default_pii=True and include_prompts=True"
         )
-        tool_calls_data = chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS]
+        tool_calls_data = chat_spans[0]["attributes"][
+            SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS
+        ]
         assert isinstance(tool_calls_data, str)
         assert "get_word_length" in tool_calls_data
     else:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[0].get("data", {})
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[0].get("data", {})
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[1].get("data", {})
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[1].get("data", {})
-        assert SPANDATA.GEN_AI_TOOL_INPUT not in tool_exec_span.get("data", {})
-        assert SPANDATA.GEN_AI_TOOL_OUTPUT not in tool_exec_span.get("data", {})
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[0].get(
+            "attributes", {}
+        )
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[0].get("attributes", {})
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[1].get(
+            "attributes", {}
+        )
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[1].get("attributes", {})
+        assert SPANDATA.GEN_AI_TOOL_INPUT not in tool_exec_span.get("attributes", {})
+        assert SPANDATA.GEN_AI_TOOL_OUTPUT not in tool_exec_span.get("attributes", {})
 
         # Verify tool calls are NOT recorded when PII is disabled
         assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in chat_spans[0].get(
-            "data", {}
+            "attributes", {}
         ), (
             f"Tool calls should NOT be recorded when send_default_pii={send_default_pii} "
             f"and include_prompts={include_prompts}"
         )
         assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in chat_spans[1].get(
-            "data", {}
+            "attributes", {}
         ), (
             f"Tool calls should NOT be recorded when send_default_pii={send_default_pii} "
             f"and include_prompts={include_prompts}"
@@ -461,7 +485,7 @@ def test_tool_execution_span(
 
     # Verify that available tools are always recorded regardless of PII settings
     for chat_span in chat_spans:
-        tools_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS]
+        tools_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS]
         assert "get_word_length" in tools_data
 
 
@@ -488,7 +512,7 @@ def test_tool_execution_span(
 )
 def test_langchain_openai_tools_agent(
     sentry_init,
-    capture_events,
+    capture_items,
     send_default_pii,
     include_prompts,
     system_instructions_content,
@@ -505,7 +529,7 @@ def test_langchain_openai_tools_agent(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     prompt = ChatPromptTemplate.from_messages(
         [
@@ -700,40 +724,47 @@ def test_langchain_openai_tools_agent(
         with start_transaction():
             list(agent_executor.stream({"input": "How many letters in the word eudca"}))
 
-    tx = events[0]
+    tx = next(item.payload for item in items if item.type == "transaction")
     assert tx["type"] == "transaction"
     assert tx["contexts"]["trace"]["origin"] == "manual"
 
-    invoke_agent_span = next(x for x in tx["spans"] if x["op"] == "gen_ai.invoke_agent")
-    chat_spans = list(x for x in tx["spans"] if x["op"] == "gen_ai.chat")
-    tool_exec_span = next(x for x in tx["spans"] if x["op"] == "gen_ai.execute_tool")
+    spans = [item.payload for item in items if item.type == "span"]
+    invoke_agent_span = next(
+        x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.invoke_agent"
+    )
+    chat_spans = list(x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.chat")
+    tool_exec_span = next(
+        x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.execute_tool"
+    )
 
     assert len(chat_spans) == 2
 
-    assert invoke_agent_span["origin"] == "auto.ai.langchain"
-    assert chat_spans[0]["origin"] == "auto.ai.langchain"
-    assert chat_spans[1]["origin"] == "auto.ai.langchain"
-    assert tool_exec_span["origin"] == "auto.ai.langchain"
+    assert invoke_agent_span["attributes"]["sentry.origin"] == "auto.ai.langchain"
+    assert chat_spans[0]["attributes"]["sentry.origin"] == "auto.ai.langchain"
+    assert chat_spans[1]["attributes"]["sentry.origin"] == "auto.ai.langchain"
+    assert tool_exec_span["attributes"]["sentry.origin"] == "auto.ai.langchain"
 
     # We can't guarantee anything about the "shape" of the langchain execution graph
-    assert len(list(x for x in tx["spans"] if x["op"] == "gen_ai.chat")) > 0
+    assert (
+        len(list(x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.chat")) > 0
+    )
 
     # Token usage is only available in newer versions of langchain (v0.2+)
     # where usage_metadata is supported on AIMessageChunk
-    if "gen_ai.usage.input_tokens" in chat_spans[0]["data"]:
-        assert chat_spans[0]["data"]["gen_ai.usage.input_tokens"] == 142
-        assert chat_spans[0]["data"]["gen_ai.usage.output_tokens"] == 50
-        assert chat_spans[0]["data"]["gen_ai.usage.total_tokens"] == 192
+    if "gen_ai.usage.input_tokens" in chat_spans[0]["attributes"]:
+        assert chat_spans[0]["attributes"]["gen_ai.usage.input_tokens"] == 142
+        assert chat_spans[0]["attributes"]["gen_ai.usage.output_tokens"] == 50
+        assert chat_spans[0]["attributes"]["gen_ai.usage.total_tokens"] == 192
 
-    if "gen_ai.usage.input_tokens" in chat_spans[1]["data"]:
-        assert chat_spans[1]["data"]["gen_ai.usage.input_tokens"] == 89
-        assert chat_spans[1]["data"]["gen_ai.usage.output_tokens"] == 28
-        assert chat_spans[1]["data"]["gen_ai.usage.total_tokens"] == 117
+    if "gen_ai.usage.input_tokens" in chat_spans[1]["attributes"]:
+        assert chat_spans[1]["attributes"]["gen_ai.usage.input_tokens"] == 89
+        assert chat_spans[1]["attributes"]["gen_ai.usage.output_tokens"] == 28
+        assert chat_spans[1]["attributes"]["gen_ai.usage.total_tokens"] == 117
 
     if send_default_pii and include_prompts:
-        assert "5" in chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
-        assert "word" in tool_exec_span["data"][SPANDATA.GEN_AI_TOOL_INPUT]
-        assert 5 == int(tool_exec_span["data"][SPANDATA.GEN_AI_TOOL_OUTPUT])
+        assert "5" in chat_spans[0]["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+        assert "word" in tool_exec_span["attributes"][SPANDATA.GEN_AI_TOOL_INPUT]
+        assert 5 == int(tool_exec_span["attributes"][SPANDATA.GEN_AI_TOOL_OUTPUT])
 
         param_id = request.node.callspec.id
         if "string" in param_id:
@@ -742,7 +773,9 @@ def test_langchain_openai_tools_agent(
                     "type": "text",
                     "content": "You are very powerful assistant, but don't know current events",
                 }
-            ] == json.loads(chat_spans[0]["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS])
+            ] == json.loads(
+                chat_spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
+            )
         else:
             assert [
                 {
@@ -753,15 +786,21 @@ def test_langchain_openai_tools_agent(
                     "type": "text",
                     "content": "Be concise and clear.",
                 },
-            ] == json.loads(chat_spans[0]["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS])
+            ] == json.loads(
+                chat_spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
+            )
 
-        assert "5" in chat_spans[1]["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+        assert "5" in chat_spans[1]["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
 
         # Verify tool calls are recorded when PII is enabled
-        assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in chat_spans[0].get("data", {}), (
+        assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in chat_spans[0].get(
+            "attributes", {}
+        ), (
             "Tool calls should be recorded when send_default_pii=True and include_prompts=True"
         )
-        tool_calls_data = chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS]
+        tool_calls_data = chat_spans[0]["attributes"][
+            SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS
+        ]
         assert isinstance(tool_calls_data, (list, str))  # Could be serialized
         if isinstance(tool_calls_data, str):
             assert "get_word_length" in tool_calls_data
@@ -770,45 +809,55 @@ def test_langchain_openai_tools_agent(
             tool_call_str = str(tool_calls_data)
             assert "get_word_length" in tool_call_str
     else:
-        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[0].get("data", {})
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[0].get("data", {})
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[0].get("data", {})
-        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[1].get("data", {})
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[1].get("data", {})
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[1].get("data", {})
-        assert SPANDATA.GEN_AI_TOOL_INPUT not in tool_exec_span.get("data", {})
-        assert SPANDATA.GEN_AI_TOOL_OUTPUT not in tool_exec_span.get("data", {})
+        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[0].get(
+            "attributes", {}
+        )
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[0].get(
+            "attributes", {}
+        )
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[0].get("attributes", {})
+        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[1].get(
+            "attributes", {}
+        )
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[1].get(
+            "attributes", {}
+        )
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[1].get("attributes", {})
+        assert SPANDATA.GEN_AI_TOOL_INPUT not in tool_exec_span.get("attributes", {})
+        assert SPANDATA.GEN_AI_TOOL_OUTPUT not in tool_exec_span.get("attributes", {})
 
         # Verify tool calls are NOT recorded when PII is disabled
         assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in chat_spans[0].get(
-            "data", {}
+            "attributes", {}
         ), (
             f"Tool calls should NOT be recorded when send_default_pii={send_default_pii} "
             f"and include_prompts={include_prompts}"
         )
         assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in chat_spans[1].get(
-            "data", {}
+            "attributes", {}
         ), (
             f"Tool calls should NOT be recorded when send_default_pii={send_default_pii} "
             f"and include_prompts={include_prompts}"
         )
 
     # Verify finish_reasons is always an array of strings
-    assert chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [
+    assert chat_spans[0]["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [
         "function_call"
     ]
-    assert chat_spans[1]["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["stop"]
+    assert chat_spans[1]["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [
+        "stop"
+    ]
 
     # Verify that available tools are always recorded regardless of PII settings
     for chat_span in chat_spans:
-        tools_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS]
+        tools_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS]
         assert tools_data is not None, (
             "Available tools should always be recorded regardless of PII settings"
         )
         assert "get_word_length" in tools_data
 
 
-def test_langchain_error(sentry_init, capture_events):
+def test_langchain_error(sentry_init, capture_items):
     global llm_type
     llm_type = "acme-llm"
 
@@ -817,7 +866,7 @@ def test_langchain_error(sentry_init, capture_events):
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("event", "transaction", "span")
 
     prompt = ChatPromptTemplate.from_messages(
         [
@@ -843,11 +892,11 @@ def test_langchain_error(sentry_init, capture_events):
     with start_transaction(), pytest.raises(ValueError):
         list(agent_executor.stream({"input": "How many letters in the word eudca"}))
 
-    error = events[0]
+    error = next(item.payload for item in items if item.type == "event")
     assert error["level"] == "error"
 
 
-def test_span_status_error(sentry_init, capture_events):
+def test_span_status_error(sentry_init, capture_items):
     global llm_type
     llm_type = "acme-llm"
 
@@ -855,7 +904,7 @@ def test_span_status_error(sentry_init, capture_events):
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
     )
-    events = capture_events()
+    items = capture_items("event", "transaction", "span")
 
     with start_transaction(name="test"):
         prompt = ChatPromptTemplate.from_messages(
@@ -884,10 +933,13 @@ def test_span_status_error(sentry_init, capture_events):
         with pytest.raises(ValueError):
             list(agent_executor.stream({"input": "How many letters in the word eudca"}))
 
-    (error, transaction) = events
+    error = next(item.payload for item in items if item.type == "event")
     assert error["level"] == "error"
-    assert transaction["spans"][0]["status"] == "internal_error"
-    assert transaction["spans"][0]["tags"]["status"] == "internal_error"
+
+    spans = [item.payload for item in items if item.type == "span"]
+    assert spans[0]["status"] == "error"
+
+    (transaction,) = (item.payload for item in items if item.type == "transaction")
     assert transaction["contexts"]["trace"]["status"] == "internal_error"
 
 
@@ -1100,7 +1152,7 @@ def test_langchain_callback_list_existing_callback(sentry_init):
         assert handler is sentry_callback
 
 
-def test_langchain_message_role_mapping(sentry_init, capture_events):
+def test_langchain_message_role_mapping(sentry_init, capture_items):
     """Test that message roles are properly normalized in langchain integration."""
     global llm_type
     llm_type = "openai-chat"
@@ -1110,7 +1162,7 @@ def test_langchain_message_role_mapping(sentry_init, capture_events):
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     prompt = ChatPromptTemplate.from_messages(
         [
@@ -1146,19 +1198,18 @@ def test_langchain_message_role_mapping(sentry_init, capture_events):
     with start_transaction():
         list(agent_executor.stream({"input": test_input}))
 
-    assert len(events) > 0
-    tx = events[0]
-    assert tx["type"] == "transaction"
-
+    spans = [item.payload for item in items if item.type == "span"]
     # Find spans with gen_ai operation that should have message data
     gen_ai_spans = [
-        span for span in tx.get("spans", []) if span.get("op", "").startswith("gen_ai")
+        span
+        for span in spans
+        if span["attributes"].get("sentry.op", "").startswith("gen_ai")
     ]
 
     # Check if any span has message data with normalized roles
     message_data_found = False
     for span in gen_ai_spans:
-        span_data = span.get("data", {})
+        span_data = span.get("attributes", {})
         if SPANDATA.GEN_AI_REQUEST_MESSAGES in span_data:
             message_data_found = True
             messages_data = span_data[SPANDATA.GEN_AI_REQUEST_MESSAGES]
@@ -1239,7 +1290,7 @@ def test_langchain_message_role_normalization_units():
     assert normalized[5] == "string message"  # String message unchanged
 
 
-def test_langchain_message_truncation(sentry_init, capture_events):
+def test_langchain_message_truncation(sentry_init, capture_items):
     """Test that large messages are truncated properly in Langchain integration."""
     from langchain_core.outputs import LLMResult, Generation
 
@@ -1248,7 +1299,7 @@ def test_langchain_message_truncation(sentry_init, capture_events):
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     callback = SentryLangchainCallback(max_span_map_size=100, include_prompts=True)
 
@@ -1291,23 +1342,23 @@ def test_langchain_message_truncation(sentry_init, capture_events):
         )
         callback.on_llm_end(response=response, run_id=run_id)
 
-    assert len(events) > 0
-    tx = events[0]
+    tx = next(item.payload for item in items if item.type == "transaction")
     assert tx["type"] == "transaction"
 
+    spans = [item.payload for item in items if item.type == "span"]
     llm_spans = [
         span
-        for span in tx.get("spans", [])
-        if span.get("op") == "gen_ai.text_completion"
+        for span in spans
+        if span["attributes"].get("sentry.op") == "gen_ai.text_completion"
     ]
     assert len(llm_spans) > 0
 
     llm_span = llm_spans[0]
-    assert llm_span["data"]["gen_ai.operation.name"] == "text_completion"
-    assert llm_span["data"][SPANDATA.GEN_AI_PIPELINE_NAME] == "my_pipeline"
+    assert llm_span["attributes"]["gen_ai.operation.name"] == "text_completion"
+    assert llm_span["attributes"][SPANDATA.GEN_AI_PIPELINE_NAME] == "my_pipeline"
 
-    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in llm_span["data"]
-    messages_data = llm_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in llm_span["attributes"]
+    messages_data = llm_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
     assert isinstance(messages_data, str)
 
     parsed_messages = json.loads(messages_data)
@@ -1327,7 +1378,7 @@ def test_langchain_message_truncation(sentry_init, capture_events):
     ],
 )
 def test_langchain_embeddings_sync(
-    sentry_init, capture_events, send_default_pii, include_prompts
+    sentry_init, capture_items, send_default_pii, include_prompts
 ):
     """Test that sync embedding methods (embed_documents, embed_query) are properly traced."""
     try:
@@ -1340,7 +1391,7 @@ def test_langchain_embeddings_sync(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     # Mock the actual API call
     with mock.patch.object(
@@ -1362,27 +1413,28 @@ def test_langchain_embeddings_sync(
         assert len(result) == 2
         mock_embed_documents.assert_called_once()
 
-    # Check captured events
-    assert len(events) >= 1
-    tx = events[0]
-    assert tx["type"] == "transaction"
-
+    spans = [item.payload for item in items if item.type == "span"]
     # Find embeddings span
     embeddings_spans = [
-        span for span in tx.get("spans", []) if span.get("op") == "gen_ai.embeddings"
+        span
+        for span in spans
+        if span["attributes"].get("sentry.op") == "gen_ai.embeddings"
     ]
     assert len(embeddings_spans) == 1
 
     embeddings_span = embeddings_spans[0]
-    assert embeddings_span["description"] == "embeddings text-embedding-ada-002"
-    assert embeddings_span["origin"] == "auto.ai.langchain"
-    assert embeddings_span["data"]["gen_ai.operation.name"] == "embeddings"
-    assert embeddings_span["data"]["gen_ai.request.model"] == "text-embedding-ada-002"
+    assert embeddings_span["name"] == "embeddings text-embedding-ada-002"
+    assert embeddings_span["attributes"]["sentry.origin"] == "auto.ai.langchain"
+    assert embeddings_span["attributes"]["gen_ai.operation.name"] == "embeddings"
+    assert (
+        embeddings_span["attributes"]["gen_ai.request.model"]
+        == "text-embedding-ada-002"
+    )
 
     # Check if input is captured based on PII settings
     if send_default_pii and include_prompts:
-        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in embeddings_span["data"]
-        input_data = embeddings_span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
+        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in embeddings_span["attributes"]
+        input_data = embeddings_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
         # Could be serialized as string
         if isinstance(input_data, str):
             assert "Hello world" in input_data
@@ -1391,7 +1443,9 @@ def test_langchain_embeddings_sync(
             assert "Hello world" in input_data
             assert "Test document" in input_data
     else:
-        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embeddings_span.get("data", {})
+        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embeddings_span.get(
+            "attributes", {}
+        )
 
 
 @pytest.mark.parametrize(
@@ -1402,7 +1456,7 @@ def test_langchain_embeddings_sync(
     ],
 )
 def test_langchain_embeddings_embed_query(
-    sentry_init, capture_events, send_default_pii, include_prompts
+    sentry_init, capture_items, send_default_pii, include_prompts
 ):
     """Test that embed_query method is properly traced."""
     try:
@@ -1415,7 +1469,7 @@ def test_langchain_embeddings_embed_query(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     # Mock the actual API call
     with mock.patch.object(
@@ -1436,32 +1490,35 @@ def test_langchain_embeddings_embed_query(
         assert len(result) == 3
         mock_embed_query.assert_called_once()
 
-    # Check captured events
-    assert len(events) >= 1
-    tx = events[0]
-    assert tx["type"] == "transaction"
-
+    spans = [item.payload for item in items if item.type == "span"]
     # Find embeddings span
     embeddings_spans = [
-        span for span in tx.get("spans", []) if span.get("op") == "gen_ai.embeddings"
+        span
+        for span in spans
+        if span["attributes"].get("sentry.op") == "gen_ai.embeddings"
     ]
     assert len(embeddings_spans) == 1
 
     embeddings_span = embeddings_spans[0]
-    assert embeddings_span["data"]["gen_ai.operation.name"] == "embeddings"
-    assert embeddings_span["data"]["gen_ai.request.model"] == "text-embedding-ada-002"
+    assert embeddings_span["attributes"]["gen_ai.operation.name"] == "embeddings"
+    assert (
+        embeddings_span["attributes"]["gen_ai.request.model"]
+        == "text-embedding-ada-002"
+    )
 
     # Check if input is captured based on PII settings
     if send_default_pii and include_prompts:
-        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in embeddings_span["data"]
-        input_data = embeddings_span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
+        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in embeddings_span["attributes"]
+        input_data = embeddings_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
         # Could be serialized as string
         if isinstance(input_data, str):
             assert "What is the capital of France?" in input_data
         else:
             assert "What is the capital of France?" in input_data
     else:
-        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embeddings_span.get("data", {})
+        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embeddings_span.get(
+            "attributes", {}
+        )
 
 
 @pytest.mark.parametrize(
@@ -1473,7 +1530,7 @@ def test_langchain_embeddings_embed_query(
 )
 @pytest.mark.asyncio
 async def test_langchain_embeddings_async(
-    sentry_init, capture_events, send_default_pii, include_prompts
+    sentry_init, capture_items, send_default_pii, include_prompts
 ):
     """Test that async embedding methods (aembed_documents, aembed_query) are properly traced."""
     try:
@@ -1486,7 +1543,7 @@ async def test_langchain_embeddings_async(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     async def mock_aembed_documents(self, texts):
         return [[0.1, 0.2, 0.3] for _ in texts]
@@ -1512,38 +1569,41 @@ async def mock_aembed_documents(self, texts):
         assert len(result) == 2
         mock_aembed.assert_called_once()
 
-    # Check captured events
-    assert len(events) >= 1
-    tx = events[0]
-    assert tx["type"] == "transaction"
-
+    spans = [item.payload for item in items if item.type == "span"]
     # Find embeddings span
     embeddings_spans = [
-        span for span in tx.get("spans", []) if span.get("op") == "gen_ai.embeddings"
+        span
+        for span in spans
+        if span["attributes"].get("sentry.op") == "gen_ai.embeddings"
     ]
     assert len(embeddings_spans) == 1
 
     embeddings_span = embeddings_spans[0]
-    assert embeddings_span["description"] == "embeddings text-embedding-ada-002"
-    assert embeddings_span["origin"] == "auto.ai.langchain"
-    assert embeddings_span["data"]["gen_ai.operation.name"] == "embeddings"
-    assert embeddings_span["data"]["gen_ai.request.model"] == "text-embedding-ada-002"
+    assert embeddings_span["name"] == "embeddings text-embedding-ada-002"
+    assert embeddings_span["attributes"]["sentry.origin"] == "auto.ai.langchain"
+    assert embeddings_span["attributes"]["gen_ai.operation.name"] == "embeddings"
+    assert (
+        embeddings_span["attributes"]["gen_ai.request.model"]
+        == "text-embedding-ada-002"
+    )
 
     # Check if input is captured based on PII settings
     if send_default_pii and include_prompts:
-        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in embeddings_span["data"]
-        input_data = embeddings_span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
+        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in embeddings_span["attributes"]
+        input_data = embeddings_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
         # Could be serialized as string
         if isinstance(input_data, str):
             assert "Async hello" in input_data or "Async test document" in input_data
         else:
             assert "Async hello" in input_data or "Async test document" in input_data
     else:
-        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embeddings_span.get("data", {})
+        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embeddings_span.get(
+            "attributes", {}
+        )
 
 
 @pytest.mark.asyncio
-async def test_langchain_embeddings_aembed_query(sentry_init, capture_events):
+async def test_langchain_embeddings_aembed_query(sentry_init, capture_items):
     """Test that aembed_query method is properly traced."""
     try:
         from langchain_openai import OpenAIEmbeddings
@@ -1555,7 +1615,7 @@ async def test_langchain_embeddings_aembed_query(sentry_init, capture_events):
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     async def mock_aembed_query(self, text):
         return [0.1, 0.2, 0.3]
@@ -1579,24 +1639,25 @@ async def mock_aembed_query(self, text):
         assert len(result) == 3
         mock_aembed.assert_called_once()
 
-    # Check captured events
-    assert len(events) >= 1
-    tx = events[0]
-    assert tx["type"] == "transaction"
-
+    spans = [item.payload for item in items if item.type == "span"]
     # Find embeddings span
     embeddings_spans = [
-        span for span in tx.get("spans", []) if span.get("op") == "gen_ai.embeddings"
+        span
+        for span in spans
+        if span["attributes"].get("sentry.op") == "gen_ai.embeddings"
     ]
     assert len(embeddings_spans) == 1
 
     embeddings_span = embeddings_spans[0]
-    assert embeddings_span["data"]["gen_ai.operation.name"] == "embeddings"
-    assert embeddings_span["data"]["gen_ai.request.model"] == "text-embedding-ada-002"
+    assert embeddings_span["attributes"]["gen_ai.operation.name"] == "embeddings"
+    assert (
+        embeddings_span["attributes"]["gen_ai.request.model"]
+        == "text-embedding-ada-002"
+    )
 
     # Check if input is captured
-    assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in embeddings_span["data"]
-    input_data = embeddings_span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
+    assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in embeddings_span["attributes"]
+    input_data = embeddings_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
     # Could be serialized as string
     if isinstance(input_data, str):
         assert "Async query test" in input_data
@@ -1604,7 +1665,7 @@ async def mock_aembed_query(self, text):
         assert "Async query test" in input_data
 
 
-def test_langchain_embeddings_no_model_name(sentry_init, capture_events):
+def test_langchain_embeddings_no_model_name(sentry_init, capture_items):
     """Test embeddings when model name is not available."""
     try:
         from langchain_openai import OpenAIEmbeddings
@@ -1615,7 +1676,7 @@ def test_langchain_embeddings_no_model_name(sentry_init, capture_events):
         integrations=[LangchainIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     # Mock the actual API call and remove model attribute
     with mock.patch.object(
@@ -1635,28 +1696,26 @@ def test_langchain_embeddings_no_model_name(sentry_init, capture_events):
         with start_transaction(name="test_embeddings_no_model"):
             embeddings.embed_documents(["Test"])
 
-    # Check captured events
-    assert len(events) >= 1
-    tx = events[0]
-    assert tx["type"] == "transaction"
-
+    spans = [item.payload for item in items if item.type == "span"]
     # Find embeddings span
     embeddings_spans = [
-        span for span in tx.get("spans", []) if span.get("op") == "gen_ai.embeddings"
+        span
+        for span in spans
+        if span["attributes"].get("sentry.op") == "gen_ai.embeddings"
     ]
     assert len(embeddings_spans) == 1
 
     embeddings_span = embeddings_spans[0]
-    assert embeddings_span["description"] == "embeddings"
-    assert embeddings_span["data"]["gen_ai.operation.name"] == "embeddings"
+    assert embeddings_span["name"] == "embeddings"
+    assert embeddings_span["attributes"]["gen_ai.operation.name"] == "embeddings"
     # Model name should not be set if not available
     assert (
-        "gen_ai.request.model" not in embeddings_span["data"]
-        or embeddings_span["data"]["gen_ai.request.model"] is None
+        "gen_ai.request.model" not in embeddings_span["attributes"]
+        or embeddings_span["attributes"]["gen_ai.request.model"] is None
     )
 
 
-def test_langchain_embeddings_integration_disabled(sentry_init, capture_events):
+def test_langchain_embeddings_integration_disabled(sentry_init, capture_items):
     """Test that embeddings are not traced when integration is disabled."""
     try:
         from langchain_openai import OpenAIEmbeddings
@@ -1665,7 +1724,7 @@ def test_langchain_embeddings_integration_disabled(sentry_init, capture_events):
 
     # Initialize without LangchainIntegration
     sentry_init(traces_sample_rate=1.0)
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     with mock.patch.object(
         OpenAIEmbeddings,
@@ -1680,18 +1739,17 @@ def test_langchain_embeddings_integration_disabled(sentry_init, capture_events):
             embeddings.embed_documents(["Test"])
 
     # Check that no embeddings spans were created
-    if events:
-        tx = events[0]
-        embeddings_spans = [
-            span
-            for span in tx.get("spans", [])
-            if span.get("op") == "gen_ai.embeddings"
-        ]
-        # Should be empty since integration is disabled
-        assert len(embeddings_spans) == 0
+    spans = [item.payload for item in items if item.type == "span"]
+    embeddings_spans = [
+        span
+        for span in spans
+        if span["attributes"].get("sentry.op") == "gen_ai.embeddings"
+    ]
+    # Should be empty since integration is disabled
+    assert len(embeddings_spans) == 0
 
 
-def test_langchain_embeddings_multiple_providers(sentry_init, capture_events):
+def test_langchain_embeddings_multiple_providers(sentry_init, capture_items):
     """Test that embeddings work with different providers."""
     try:
         from langchain_openai import OpenAIEmbeddings, AzureOpenAIEmbeddings
@@ -1703,7 +1761,7 @@ def test_langchain_embeddings_multiple_providers(sentry_init, capture_events):
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     # Mock both providers
     with mock.patch.object(
@@ -1731,26 +1789,24 @@ def test_langchain_embeddings_multiple_providers(sentry_init, capture_events):
             openai_embeddings.embed_documents(["OpenAI test"])
             azure_embeddings.embed_documents(["Azure test"])
 
-    # Check captured events
-    assert len(events) >= 1
-    tx = events[0]
-    assert tx["type"] == "transaction"
-
+    spans = [item.payload for item in items if item.type == "span"]
     # Find embeddings spans
     embeddings_spans = [
-        span for span in tx.get("spans", []) if span.get("op") == "gen_ai.embeddings"
+        span
+        for span in spans
+        if span["attributes"].get("sentry.op") == "gen_ai.embeddings"
     ]
     # Should have 2 spans, one for each provider
     assert len(embeddings_spans) == 2
 
     # Verify both spans have proper data
     for span in embeddings_spans:
-        assert span["data"]["gen_ai.operation.name"] == "embeddings"
-        assert span["data"]["gen_ai.request.model"] == "text-embedding-ada-002"
-        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in span["data"]
+        assert span["attributes"]["gen_ai.operation.name"] == "embeddings"
+        assert span["attributes"]["gen_ai.request.model"] == "text-embedding-ada-002"
+        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in span["attributes"]
 
 
-def test_langchain_embeddings_error_handling(sentry_init, capture_events):
+def test_langchain_embeddings_error_handling(sentry_init, capture_items):
     """Test that errors in embeddings are properly captured."""
     try:
         from langchain_openai import OpenAIEmbeddings
@@ -1762,7 +1818,7 @@ def test_langchain_embeddings_error_handling(sentry_init, capture_events):
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     # Mock the API call to raise an error
     with mock.patch.object(
@@ -1781,15 +1837,16 @@ def test_langchain_embeddings_error_handling(sentry_init, capture_events):
             with pytest.raises(ValueError):
                 embeddings.embed_documents(["Test"])
 
-    # The error should be captured
-    assert len(events) >= 1
-    # We should have both the transaction and potentially an error event
-    [e for e in events if e.get("level") == "error"]
+    [
+        item.payload
+        for item in items
+        if item.type == "event" and item.payload.get("level") == "error"
+    ]
     # Note: errors might not be auto-captured depending on SDK settings,
     # but the span should still be created
 
 
-def test_langchain_embeddings_multiple_calls(sentry_init, capture_events):
+def test_langchain_embeddings_multiple_calls(sentry_init, capture_items):
     """Test that multiple embeddings calls within a transaction are all traced."""
     try:
         from langchain_openai import OpenAIEmbeddings
@@ -1801,7 +1858,7 @@ def test_langchain_embeddings_multiple_calls(sentry_init, capture_events):
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     # Mock the actual API calls
     with mock.patch.object(
@@ -1828,32 +1885,31 @@ def test_langchain_embeddings_multiple_calls(sentry_init, capture_events):
             # Call embed_documents again
             embeddings.embed_documents(["Third batch"])
 
-    # Check captured events
-    assert len(events) >= 1
-    tx = events[0]
-    assert tx["type"] == "transaction"
-
+    spans = [item.payload for item in items if item.type == "span"]
     # Find embeddings spans - should have 3 (2 embed_documents + 1 embed_query)
     embeddings_spans = [
-        span for span in tx.get("spans", []) if span.get("op") == "gen_ai.embeddings"
+        span
+        for span in spans
+        if span["attributes"].get("sentry.op") == "gen_ai.embeddings"
     ]
     assert len(embeddings_spans) == 3
 
     # Verify all spans have proper data
     for span in embeddings_spans:
-        assert span["data"]["gen_ai.operation.name"] == "embeddings"
-        assert span["data"]["gen_ai.request.model"] == "text-embedding-ada-002"
-        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in span["data"]
+        assert span["attributes"]["gen_ai.operation.name"] == "embeddings"
+        assert span["attributes"]["gen_ai.request.model"] == "text-embedding-ada-002"
+        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in span["attributes"]
 
     # Verify the input data is different for each span
     input_data_list = [
-        span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] for span in embeddings_spans
+        span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
+        for span in embeddings_spans
     ]
     # They should all be different (different inputs)
     assert len(set(str(data) for data in input_data_list)) == 3
 
 
-def test_langchain_embeddings_span_hierarchy(sentry_init, capture_events):
+def test_langchain_embeddings_span_hierarchy(sentry_init, capture_items):
     """Test that embeddings spans are properly nested within parent spans."""
     try:
         from langchain_openai import OpenAIEmbeddings
@@ -1865,7 +1921,7 @@ def test_langchain_embeddings_span_hierarchy(sentry_init, capture_events):
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     # Mock the actual API call
     with mock.patch.object(
@@ -1884,15 +1940,15 @@ def test_langchain_embeddings_span_hierarchy(sentry_init, capture_events):
             with sentry_sdk.start_span(op="custom", name="custom operation"):
                 embeddings.embed_documents(["Test within custom span"])
 
-    # Check captured events
-    assert len(events) >= 1
-    tx = events[0]
-    assert tx["type"] == "transaction"
-
+    spans = [item.payload for item in items if item.type == "span"]
     # Find all spans
     embeddings_spans = [
-        span for span in tx.get("spans", []) if span.get("op") == "gen_ai.embeddings"
+        span
+        for span in spans
+        if span["attributes"].get("sentry.op") == "gen_ai.embeddings"
     ]
+
+    tx = next(item.payload for item in items if item.type == "transaction")
     custom_spans = [span for span in tx.get("spans", []) if span.get("op") == "custom"]
 
     assert len(embeddings_spans) == 1
@@ -1902,11 +1958,11 @@ def test_langchain_embeddings_span_hierarchy(sentry_init, capture_events):
     embeddings_span = embeddings_spans[0]
     custom_span = custom_spans[0]
 
-    assert embeddings_span["data"]["gen_ai.operation.name"] == "embeddings"
+    assert embeddings_span["attributes"]["gen_ai.operation.name"] == "embeddings"
     assert custom_span["description"] == "custom operation"
 
 
-def test_langchain_embeddings_with_list_and_string_inputs(sentry_init, capture_events):
+def test_langchain_embeddings_with_list_and_string_inputs(sentry_init, capture_items):
     """Test that embeddings correctly handle both list and string inputs."""
     try:
         from langchain_openai import OpenAIEmbeddings
@@ -1918,7 +1974,7 @@ def test_langchain_embeddings_with_list_and_string_inputs(sentry_init, capture_e
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     # Mock the actual API calls
     with mock.patch.object(
@@ -1943,21 +1999,19 @@ def test_langchain_embeddings_with_list_and_string_inputs(sentry_init, capture_e
             # embed_query takes a string
             embeddings.embed_query("Single string query")
 
-    # Check captured events
-    assert len(events) >= 1
-    tx = events[0]
-    assert tx["type"] == "transaction"
-
+    spans = [item.payload for item in items if item.type == "span"]
     # Find embeddings spans
     embeddings_spans = [
-        span for span in tx.get("spans", []) if span.get("op") == "gen_ai.embeddings"
+        span
+        for span in spans
+        if span["attributes"].get("sentry.op") == "gen_ai.embeddings"
     ]
     assert len(embeddings_spans) == 2
 
     # Both should have input data captured as lists
     for span in embeddings_spans:
-        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in span["data"]
-        input_data = span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
+        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in span["attributes"]
+        input_data = span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
         # Input should be normalized to list format
         if isinstance(input_data, str):
             # If serialized, should contain the input text
@@ -1975,7 +2029,7 @@ def test_langchain_embeddings_with_list_and_string_inputs(sentry_init, capture_e
 )
 def test_langchain_response_model_extraction(
     sentry_init,
-    capture_events,
+    capture_items,
     response_metadata_model,
     expected_model,
 ):
@@ -1984,7 +2038,7 @@ def test_langchain_response_model_extraction(
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     callback = SentryLangchainCallback(max_span_map_size=100, include_prompts=True)
 
@@ -2009,25 +2063,22 @@ def test_langchain_response_model_extraction(
         response = Mock(generations=[[generation]])
         callback.on_llm_end(response=response, run_id=run_id)
 
-    assert len(events) > 0
-    tx = events[0]
-    assert tx["type"] == "transaction"
-
+    spans = [item.payload for item in items if item.type == "span"]
     llm_spans = [
         span
-        for span in tx.get("spans", [])
-        if span.get("op") == "gen_ai.text_completion"
+        for span in spans
+        if span["attributes"].get("sentry.op") == "gen_ai.text_completion"
     ]
     assert len(llm_spans) > 0
 
     llm_span = llm_spans[0]
-    assert llm_span["data"]["gen_ai.operation.name"] == "text_completion"
+    assert llm_span["attributes"]["gen_ai.operation.name"] == "text_completion"
 
     if expected_model is not None:
-        assert SPANDATA.GEN_AI_RESPONSE_MODEL in llm_span["data"]
-        assert llm_span["data"][SPANDATA.GEN_AI_RESPONSE_MODEL] == expected_model
+        assert SPANDATA.GEN_AI_RESPONSE_MODEL in llm_span["attributes"]
+        assert llm_span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == expected_model
     else:
-        assert SPANDATA.GEN_AI_RESPONSE_MODEL not in llm_span.get("data", {})
+        assert SPANDATA.GEN_AI_RESPONSE_MODEL not in llm_span.get("attributes", {})
 
 
 # Tests for multimodal content transformation functions
@@ -2286,13 +2337,13 @@ def test_transform_google_file_data(self):
     ],
 )
 def test_langchain_ai_system_detection(
-    sentry_init, capture_events, ai_type, expected_system
+    sentry_init, capture_items, ai_type, expected_system
 ):
     sentry_init(
         integrations=[LangchainIntegration()],
         traces_sample_rate=1.0,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     callback = SentryLangchainCallback(max_span_map_size=100, include_prompts=True)
 
@@ -2312,23 +2363,20 @@ def test_langchain_ai_system_detection(
         response = Mock(generations=[[generation]])
         callback.on_llm_end(response=response, run_id=run_id)
 
-    assert len(events) > 0
-    tx = events[0]
-    assert tx["type"] == "transaction"
-
+    spans = [item.payload for item in items if item.type == "span"]
     llm_spans = [
         span
-        for span in tx.get("spans", [])
-        if span.get("op") == "gen_ai.text_completion"
+        for span in spans
+        if span["attributes"].get("sentry.op") == "gen_ai.text_completion"
     ]
     assert len(llm_spans) > 0
 
     llm_span = llm_spans[0]
 
     if expected_system is not None:
-        assert llm_span["data"][SPANDATA.GEN_AI_SYSTEM] == expected_system
+        assert llm_span["attributes"][SPANDATA.GEN_AI_SYSTEM] == expected_system
     else:
-        assert SPANDATA.GEN_AI_SYSTEM not in llm_span.get("data", {})
+        assert SPANDATA.GEN_AI_SYSTEM not in llm_span.get("attributes", {})
 
 
 class TestTransformLangchainMessageContent:

From b2542976f0f43bd1160f07f2a6783919d9861588 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Fri, 17 Apr 2026 10:35:14 +0200
Subject: [PATCH 16/84] test langgraph

---
 .../integrations/langgraph/test_langgraph.py  | 386 ++++++++++--------
 1 file changed, 205 insertions(+), 181 deletions(-)

diff --git a/tests/integrations/langgraph/test_langgraph.py b/tests/integrations/langgraph/test_langgraph.py
index 2a385d8a78..e1a3baa0a8 100644
--- a/tests/integrations/langgraph/test_langgraph.py
+++ b/tests/integrations/langgraph/test_langgraph.py
@@ -147,7 +147,7 @@ def test_langgraph_integration_init():
     ],
 )
 def test_state_graph_compile(
-    sentry_init, capture_events, send_default_pii, include_prompts
+    sentry_init, capture_items, send_default_pii, include_prompts
 ):
     """Test StateGraph.compile() wrapper creates proper create_agent span."""
     sentry_init(
@@ -155,7 +155,7 @@ def test_state_graph_compile(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
     graph = MockStateGraph()
 
     def original_compile(self, *args, **kwargs):
@@ -171,21 +171,23 @@ def original_compile(self, *args, **kwargs):
     assert compiled_graph is not None
     assert compiled_graph.name == "test_graph"
 
-    tx = events[0]
-    assert tx["type"] == "transaction"
-
-    agent_spans = [span for span in tx["spans"] if span["op"] == OP.GEN_AI_CREATE_AGENT]
+    spans = [item.payload for item in items if item.type == "span"]
+    agent_spans = [
+        span
+        for span in spans
+        if span["attributes"]["sentry.op"] == OP.GEN_AI_CREATE_AGENT
+    ]
     assert len(agent_spans) == 1
 
     agent_span = agent_spans[0]
-    assert agent_span["description"] == "create_agent test_graph"
-    assert agent_span["origin"] == "auto.ai.langgraph"
-    assert agent_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "create_agent"
-    assert agent_span["data"][SPANDATA.GEN_AI_AGENT_NAME] == "test_graph"
-    assert agent_span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "test-model"
-    assert SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS in agent_span["data"]
-
-    tools_data = agent_span["data"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS]
+    assert agent_span["name"] == "create_agent test_graph"
+    assert agent_span["attributes"]["sentry.origin"] == "auto.ai.langgraph"
+    assert agent_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "create_agent"
+    assert agent_span["attributes"][SPANDATA.GEN_AI_AGENT_NAME] == "test_graph"
+    assert agent_span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "test-model"
+    assert SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS in agent_span["attributes"]
+
+    tools_data = agent_span["attributes"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS]
     assert tools_data == ["search_tool", "calculator"]
     assert len(tools_data) == 2
     assert "search_tool" in tools_data
@@ -201,14 +203,14 @@ def original_compile(self, *args, **kwargs):
         (False, False),
     ],
 )
-def test_pregel_invoke(sentry_init, capture_events, send_default_pii, include_prompts):
+def test_pregel_invoke(sentry_init, capture_items, send_default_pii, include_prompts):
     """Test Pregel.invoke() wrapper creates proper invoke_agent span."""
     sentry_init(
         integrations=[LanggraphIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     test_state = {
         "messages": [
@@ -245,26 +247,26 @@ def original_invoke(self, *args, **kwargs):
 
     assert result is not None
 
-    tx = events[0]
-    assert tx["type"] == "transaction"
-
+    spans = [item.payload for item in items if item.type == "span"]
     invoke_spans = [
-        span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT
+        span
+        for span in spans
+        if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
     ]
     assert len(invoke_spans) == 1
 
     invoke_span = invoke_spans[0]
-    assert invoke_span["description"] == "invoke_agent test_graph"
-    assert invoke_span["origin"] == "auto.ai.langgraph"
-    assert invoke_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "invoke_agent"
-    assert invoke_span["data"][SPANDATA.GEN_AI_PIPELINE_NAME] == "test_graph"
-    assert invoke_span["data"][SPANDATA.GEN_AI_AGENT_NAME] == "test_graph"
+    assert invoke_span["name"] == "invoke_agent test_graph"
+    assert invoke_span["attributes"]["sentry.origin"] == "auto.ai.langgraph"
+    assert invoke_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "invoke_agent"
+    assert invoke_span["attributes"][SPANDATA.GEN_AI_PIPELINE_NAME] == "test_graph"
+    assert invoke_span["attributes"][SPANDATA.GEN_AI_AGENT_NAME] == "test_graph"
 
     if send_default_pii and include_prompts:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["data"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT in invoke_span["data"]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["attributes"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT in invoke_span["attributes"]
 
-        request_messages = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        request_messages = invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
 
         if isinstance(request_messages, str):
             import json
@@ -273,11 +275,11 @@ def original_invoke(self, *args, **kwargs):
         assert len(request_messages) == 1
         assert request_messages[0]["content"] == "Of course! How can I assist you?"
 
-        response_text = invoke_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+        response_text = invoke_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
         assert response_text == expected_assistant_response
 
-        assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in invoke_span["data"]
-        tool_calls_data = invoke_span["data"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS]
+        assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in invoke_span["attributes"]
+        tool_calls_data = invoke_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS]
         if isinstance(tool_calls_data, str):
             import json
 
@@ -287,9 +289,11 @@ def original_invoke(self, *args, **kwargs):
         assert tool_calls_data[0]["id"] == "call_test_123"
         assert tool_calls_data[0]["function"]["name"] == "search_tool"
     else:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in invoke_span.get("data", {})
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in invoke_span.get("data", {})
-        assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in invoke_span.get("data", {})
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in invoke_span.get("attributes", {})
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in invoke_span.get("attributes", {})
+        assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in invoke_span.get(
+            "attributes", {}
+        )
 
 
 @pytest.mark.parametrize(
@@ -301,14 +305,14 @@ def original_invoke(self, *args, **kwargs):
         (False, False),
     ],
 )
-def test_pregel_ainvoke(sentry_init, capture_events, send_default_pii, include_prompts):
+def test_pregel_ainvoke(sentry_init, capture_items, send_default_pii, include_prompts):
     """Test Pregel.ainvoke() async wrapper creates proper invoke_agent span."""
     sentry_init(
         integrations=[LanggraphIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
     test_state = {"messages": [MockMessage("What's the weather like?", name="user")]}
     pregel = MockPregelInstance("async_graph")
 
@@ -341,30 +345,30 @@ async def run_test():
     result = asyncio.run(run_test())
     assert result is not None
 
-    tx = events[0]
-    assert tx["type"] == "transaction"
-
+    spans = [item.payload for item in items if item.type == "span"]
     invoke_spans = [
-        span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT
+        span
+        for span in spans
+        if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
     ]
     assert len(invoke_spans) == 1
 
     invoke_span = invoke_spans[0]
-    assert invoke_span["description"] == "invoke_agent async_graph"
-    assert invoke_span["origin"] == "auto.ai.langgraph"
-    assert invoke_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "invoke_agent"
-    assert invoke_span["data"][SPANDATA.GEN_AI_PIPELINE_NAME] == "async_graph"
-    assert invoke_span["data"][SPANDATA.GEN_AI_AGENT_NAME] == "async_graph"
+    assert invoke_span["name"] == "invoke_agent async_graph"
+    assert invoke_span["attributes"]["sentry.origin"] == "auto.ai.langgraph"
+    assert invoke_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "invoke_agent"
+    assert invoke_span["attributes"][SPANDATA.GEN_AI_PIPELINE_NAME] == "async_graph"
+    assert invoke_span["attributes"][SPANDATA.GEN_AI_AGENT_NAME] == "async_graph"
 
     if send_default_pii and include_prompts:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["data"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT in invoke_span["data"]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["attributes"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT in invoke_span["attributes"]
 
-        response_text = invoke_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+        response_text = invoke_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
         assert response_text == expected_assistant_response
 
-        assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in invoke_span["data"]
-        tool_calls_data = invoke_span["data"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS]
+        assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in invoke_span["attributes"]
+        tool_calls_data = invoke_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS]
         if isinstance(tool_calls_data, str):
             import json
 
@@ -374,19 +378,21 @@ async def run_test():
         assert tool_calls_data[0]["id"] == "call_weather_456"
         assert tool_calls_data[0]["function"]["name"] == "get_weather"
     else:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in invoke_span.get("data", {})
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in invoke_span.get("data", {})
-        assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in invoke_span.get("data", {})
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in invoke_span.get("attributes", {})
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in invoke_span.get("attributes", {})
+        assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in invoke_span.get(
+            "attributes", {}
+        )
 
 
-def test_pregel_invoke_error(sentry_init, capture_events):
+def test_pregel_invoke_error(sentry_init, capture_items):
     """Test error handling during graph execution."""
     sentry_init(
         integrations=[LanggraphIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
     test_state = {"messages": [MockMessage("This will fail")]}
     pregel = MockPregelInstance("error_graph")
 
@@ -397,25 +403,26 @@ def original_invoke(self, *args, **kwargs):
         wrapped_invoke = _wrap_pregel_invoke(original_invoke)
         wrapped_invoke(pregel, test_state)
 
-    tx = events[0]
+    spans = [item.payload for item in items if item.type == "span"]
     invoke_spans = [
-        span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT
+        span
+        for span in spans
+        if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
     ]
     assert len(invoke_spans) == 1
 
     invoke_span = invoke_spans[0]
-    assert invoke_span.get("status") == "internal_error"
-    assert invoke_span.get("tags", {}).get("status") == "internal_error"
+    assert invoke_span.get("status") == "error"
 
 
-def test_pregel_ainvoke_error(sentry_init, capture_events):
+def test_pregel_ainvoke_error(sentry_init, capture_items):
     """Test error handling during async graph execution."""
     sentry_init(
         integrations=[LanggraphIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
     test_state = {"messages": [MockMessage("This will fail async")]}
     pregel = MockPregelInstance("async_error_graph")
 
@@ -431,24 +438,25 @@ async def run_error_test():
 
     asyncio.run(run_error_test())
 
-    tx = events[0]
+    spans = [item.payload for item in items if item.type == "span"]
     invoke_spans = [
-        span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT
+        span
+        for span in spans
+        if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
     ]
     assert len(invoke_spans) == 1
 
     invoke_span = invoke_spans[0]
-    assert invoke_span.get("status") == "internal_error"
-    assert invoke_span.get("tags", {}).get("status") == "internal_error"
+    assert invoke_span.get("status") == "error"
 
 
-def test_span_origin(sentry_init, capture_events):
+def test_span_origin(sentry_init, capture_items):
     """Test that span origins are correctly set."""
     sentry_init(
         integrations=[LanggraphIntegration()],
         traces_sample_rate=1.0,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     graph = MockStateGraph()
 
@@ -461,16 +469,17 @@ def original_compile(self, *args, **kwargs):
         wrapped_compile = _wrap_state_graph_compile(original_compile)
         wrapped_compile(graph)
 
-    tx = events[0]
+    tx = next(item.payload for item in items if item.type == "transaction")
     assert tx["contexts"]["trace"]["origin"] == "manual"
 
-    for span in tx["spans"]:
-        assert span["origin"] == "auto.ai.langgraph"
+    spans = [item.payload for item in items if item.type == "span"]
+    for span in spans:
+        assert span["attributes"]["sentry.origin"] == "auto.ai.langgraph"
 
 
 @pytest.mark.parametrize("graph_name", ["my_graph", None, ""])
 def test_pregel_invoke_with_different_graph_names(
-    sentry_init, capture_events, graph_name
+    sentry_init, capture_items, graph_name
 ):
     """Test Pregel.invoke() with different graph name scenarios."""
     sentry_init(
@@ -478,7 +487,7 @@ def test_pregel_invoke_with_different_graph_names(
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     pregel = MockPregelInstance(graph_name) if graph_name else MockPregelInstance()
     if not graph_name:
@@ -492,25 +501,27 @@ def original_invoke(self, *args, **kwargs):
         wrapped_invoke = _wrap_pregel_invoke(original_invoke)
         wrapped_invoke(pregel, {"messages": []})
 
-    tx = events[0]
+    spans = [item.payload for item in items if item.type == "span"]
     invoke_spans = [
-        span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT
+        span
+        for span in spans
+        if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
     ]
     assert len(invoke_spans) == 1
 
     invoke_span = invoke_spans[0]
 
     if graph_name and graph_name.strip():
-        assert invoke_span["description"] == "invoke_agent my_graph"
-        assert invoke_span["data"][SPANDATA.GEN_AI_PIPELINE_NAME] == graph_name
-        assert invoke_span["data"][SPANDATA.GEN_AI_AGENT_NAME] == graph_name
+        assert invoke_span["name"] == "invoke_agent my_graph"
+        assert invoke_span["attributes"][SPANDATA.GEN_AI_PIPELINE_NAME] == graph_name
+        assert invoke_span["attributes"][SPANDATA.GEN_AI_AGENT_NAME] == graph_name
     else:
-        assert invoke_span["description"] == "invoke_agent"
-        assert SPANDATA.GEN_AI_PIPELINE_NAME not in invoke_span.get("data", {})
-        assert SPANDATA.GEN_AI_AGENT_NAME not in invoke_span.get("data", {})
+        assert invoke_span["name"] == "invoke_agent"
+        assert SPANDATA.GEN_AI_PIPELINE_NAME not in invoke_span.get("attributes", {})
+        assert SPANDATA.GEN_AI_AGENT_NAME not in invoke_span.get("attributes", {})
 
 
-def test_pregel_invoke_span_includes_usage_data(sentry_init, capture_events):
+def test_pregel_invoke_span_includes_usage_data(sentry_init, capture_items):
     """
     Test that invoke_agent spans include aggregated usage data from context_wrapper.
     This verifies the new functionality added to track token usage in invoke_agent spans.
@@ -519,7 +530,7 @@ def test_pregel_invoke_span_includes_usage_data(sentry_init, capture_events):
         integrations=[LanggraphIntegration()],
         traces_sample_rate=1.0,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     test_state = {
         "messages": [
@@ -564,29 +575,29 @@ def original_invoke(self, *args, **kwargs):
 
     assert result is not None
 
-    tx = events[0]
-    assert tx["type"] == "transaction"
-
+    spans = [item.payload for item in items if item.type == "span"]
     invoke_spans = [
-        span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT
+        span
+        for span in spans
+        if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
     ]
     assert len(invoke_spans) == 1
 
     invoke_agent_span = invoke_spans[0]
 
     # Verify invoke_agent span has usage data
-    assert invoke_agent_span["description"] == "invoke_agent test_graph"
-    assert "gen_ai.usage.input_tokens" in invoke_agent_span["data"]
-    assert "gen_ai.usage.output_tokens" in invoke_agent_span["data"]
-    assert "gen_ai.usage.total_tokens" in invoke_agent_span["data"]
+    assert invoke_agent_span["name"] == "invoke_agent test_graph"
+    assert "gen_ai.usage.input_tokens" in invoke_agent_span["attributes"]
+    assert "gen_ai.usage.output_tokens" in invoke_agent_span["attributes"]
+    assert "gen_ai.usage.total_tokens" in invoke_agent_span["attributes"]
 
     # The usage should match the mock_usage values (aggregated across all calls)
-    assert invoke_agent_span["data"]["gen_ai.usage.input_tokens"] == 10
-    assert invoke_agent_span["data"]["gen_ai.usage.output_tokens"] == 20
-    assert invoke_agent_span["data"]["gen_ai.usage.total_tokens"] == 30
+    assert invoke_agent_span["attributes"]["gen_ai.usage.input_tokens"] == 10
+    assert invoke_agent_span["attributes"]["gen_ai.usage.output_tokens"] == 20
+    assert invoke_agent_span["attributes"]["gen_ai.usage.total_tokens"] == 30
 
 
-def test_pregel_ainvoke_span_includes_usage_data(sentry_init, capture_events):
+def test_pregel_ainvoke_span_includes_usage_data(sentry_init, capture_items):
     """
     Test that invoke_agent spans include aggregated usage data from context_wrapper.
     This verifies the new functionality added to track token usage in invoke_agent spans.
@@ -595,7 +606,7 @@ def test_pregel_ainvoke_span_includes_usage_data(sentry_init, capture_events):
         integrations=[LanggraphIntegration()],
         traces_sample_rate=1.0,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     test_state = {
         "messages": [
@@ -643,29 +654,29 @@ async def run_test():
     result = asyncio.run(run_test())
     assert result is not None
 
-    tx = events[0]
-    assert tx["type"] == "transaction"
-
+    spans = [item.payload for item in items if item.type == "span"]
     invoke_spans = [
-        span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT
+        span
+        for span in spans
+        if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
     ]
     assert len(invoke_spans) == 1
 
     invoke_agent_span = invoke_spans[0]
 
     # Verify invoke_agent span has usage data
-    assert invoke_agent_span["description"] == "invoke_agent test_graph"
-    assert "gen_ai.usage.input_tokens" in invoke_agent_span["data"]
-    assert "gen_ai.usage.output_tokens" in invoke_agent_span["data"]
-    assert "gen_ai.usage.total_tokens" in invoke_agent_span["data"]
+    assert invoke_agent_span["name"] == "invoke_agent test_graph"
+    assert "gen_ai.usage.input_tokens" in invoke_agent_span["attributes"]
+    assert "gen_ai.usage.output_tokens" in invoke_agent_span["attributes"]
+    assert "gen_ai.usage.total_tokens" in invoke_agent_span["attributes"]
 
     # The usage should match the mock_usage values (aggregated across all calls)
-    assert invoke_agent_span["data"]["gen_ai.usage.input_tokens"] == 10
-    assert invoke_agent_span["data"]["gen_ai.usage.output_tokens"] == 20
-    assert invoke_agent_span["data"]["gen_ai.usage.total_tokens"] == 30
+    assert invoke_agent_span["attributes"]["gen_ai.usage.input_tokens"] == 10
+    assert invoke_agent_span["attributes"]["gen_ai.usage.output_tokens"] == 20
+    assert invoke_agent_span["attributes"]["gen_ai.usage.total_tokens"] == 30
 
 
-def test_pregel_invoke_multiple_llm_calls_aggregate_usage(sentry_init, capture_events):
+def test_pregel_invoke_multiple_llm_calls_aggregate_usage(sentry_init, capture_items):
     """
     Test that invoke_agent spans show aggregated usage across multiple LLM calls
     (e.g., when tools are used and multiple API calls are made).
@@ -674,7 +685,7 @@ def test_pregel_invoke_multiple_llm_calls_aggregate_usage(sentry_init, capture_e
         integrations=[LanggraphIntegration()],
         traces_sample_rate=1.0,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     test_state = {
         "messages": [
@@ -730,23 +741,23 @@ def original_invoke(self, *args, **kwargs):
 
     assert result is not None
 
-    tx = events[0]
-    assert tx["type"] == "transaction"
-
+    spans = [item.payload for item in items if item.type == "span"]
     invoke_spans = [
-        span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT
+        span
+        for span in spans
+        if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
     ]
     assert len(invoke_spans) == 1
     invoke_agent_span = invoke_spans[0]
 
     # Verify invoke_agent span has aggregated usage from both API calls
     # Total: 10 + 20 = 30 input tokens, 5 + 15 = 20 output tokens, 15 + 35 = 50 total
-    assert invoke_agent_span["data"]["gen_ai.usage.input_tokens"] == 30
-    assert invoke_agent_span["data"]["gen_ai.usage.output_tokens"] == 20
-    assert invoke_agent_span["data"]["gen_ai.usage.total_tokens"] == 50
+    assert invoke_agent_span["attributes"]["gen_ai.usage.input_tokens"] == 30
+    assert invoke_agent_span["attributes"]["gen_ai.usage.output_tokens"] == 20
+    assert invoke_agent_span["attributes"]["gen_ai.usage.total_tokens"] == 50
 
 
-def test_pregel_ainvoke_multiple_llm_calls_aggregate_usage(sentry_init, capture_events):
+def test_pregel_ainvoke_multiple_llm_calls_aggregate_usage(sentry_init, capture_items):
     """
     Test that invoke_agent spans show aggregated usage across multiple LLM calls
     (e.g., when tools are used and multiple API calls are made).
@@ -755,7 +766,7 @@ def test_pregel_ainvoke_multiple_llm_calls_aggregate_usage(sentry_init, capture_
         integrations=[LanggraphIntegration()],
         traces_sample_rate=1.0,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     test_state = {
         "messages": [
@@ -814,23 +825,23 @@ async def run_test():
     result = asyncio.run(run_test())
     assert result is not None
 
-    tx = events[0]
-    assert tx["type"] == "transaction"
-
+    spans = [item.payload for item in items if item.type == "span"]
     invoke_spans = [
-        span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT
+        span
+        for span in spans
+        if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
     ]
     assert len(invoke_spans) == 1
     invoke_agent_span = invoke_spans[0]
 
     # Verify invoke_agent span has aggregated usage from both API calls
     # Total: 10 + 20 = 30 input tokens, 5 + 15 = 20 output tokens, 15 + 35 = 50 total
-    assert invoke_agent_span["data"]["gen_ai.usage.input_tokens"] == 30
-    assert invoke_agent_span["data"]["gen_ai.usage.output_tokens"] == 20
-    assert invoke_agent_span["data"]["gen_ai.usage.total_tokens"] == 50
+    assert invoke_agent_span["attributes"]["gen_ai.usage.input_tokens"] == 30
+    assert invoke_agent_span["attributes"]["gen_ai.usage.output_tokens"] == 20
+    assert invoke_agent_span["attributes"]["gen_ai.usage.total_tokens"] == 50
 
 
-def test_pregel_invoke_span_includes_response_model(sentry_init, capture_events):
+def test_pregel_invoke_span_includes_response_model(sentry_init, capture_items):
     """
     Test that invoke_agent spans include the response model.
     When an agent makes multiple LLM calls, it should report the last model used.
@@ -839,7 +850,7 @@ def test_pregel_invoke_span_includes_response_model(sentry_init, capture_events)
         integrations=[LanggraphIntegration()],
         traces_sample_rate=1.0,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     test_state = {
         "messages": [
@@ -884,23 +895,25 @@ def original_invoke(self, *args, **kwargs):
 
     assert result is not None
 
-    tx = events[0]
-    assert tx["type"] == "transaction"
-
+    spans = [item.payload for item in items if item.type == "span"]
     invoke_spans = [
-        span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT
+        span
+        for span in spans
+        if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
     ]
     assert len(invoke_spans) == 1
 
     invoke_agent_span = invoke_spans[0]
 
     # Verify invoke_agent span has response model
-    assert invoke_agent_span["description"] == "invoke_agent test_graph"
-    assert "gen_ai.response.model" in invoke_agent_span["data"]
-    assert invoke_agent_span["data"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14"
+    assert invoke_agent_span["name"] == "invoke_agent test_graph"
+    assert "gen_ai.response.model" in invoke_agent_span["attributes"]
+    assert (
+        invoke_agent_span["attributes"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14"
+    )
 
 
-def test_pregel_ainvoke_span_includes_response_model(sentry_init, capture_events):
+def test_pregel_ainvoke_span_includes_response_model(sentry_init, capture_items):
     """
     Test that invoke_agent spans include the response model.
     When an agent makes multiple LLM calls, it should report the last model used.
@@ -909,7 +922,7 @@ def test_pregel_ainvoke_span_includes_response_model(sentry_init, capture_events
         integrations=[LanggraphIntegration()],
         traces_sample_rate=1.0,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     test_state = {
         "messages": [
@@ -957,23 +970,25 @@ async def run_test():
     result = asyncio.run(run_test())
     assert result is not None
 
-    tx = events[0]
-    assert tx["type"] == "transaction"
-
+    spans = [item.payload for item in items if item.type == "span"]
     invoke_spans = [
-        span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT
+        span
+        for span in spans
+        if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
     ]
     assert len(invoke_spans) == 1
 
     invoke_agent_span = invoke_spans[0]
 
     # Verify invoke_agent span has response model
-    assert invoke_agent_span["description"] == "invoke_agent test_graph"
-    assert "gen_ai.response.model" in invoke_agent_span["data"]
-    assert invoke_agent_span["data"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14"
+    assert invoke_agent_span["name"] == "invoke_agent test_graph"
+    assert "gen_ai.response.model" in invoke_agent_span["attributes"]
+    assert (
+        invoke_agent_span["attributes"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14"
+    )
 
 
-def test_pregel_invoke_span_uses_last_response_model(sentry_init, capture_events):
+def test_pregel_invoke_span_uses_last_response_model(sentry_init, capture_items):
     """
     Test that when an agent makes multiple LLM calls (e.g., with tools),
     the invoke_agent span reports the last response model used.
@@ -982,7 +997,7 @@ def test_pregel_invoke_span_uses_last_response_model(sentry_init, capture_events
         integrations=[LanggraphIntegration()],
         traces_sample_rate=1.0,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     test_state = {
         "messages": [
@@ -1040,22 +1055,24 @@ def original_invoke(self, *args, **kwargs):
 
     assert result is not None
 
-    tx = events[0]
-    assert tx["type"] == "transaction"
-
+    spans = [item.payload for item in items if item.type == "span"]
     invoke_spans = [
-        span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT
+        span
+        for span in spans
+        if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
     ]
     assert len(invoke_spans) == 1
 
     invoke_agent_span = invoke_spans[0]
 
     # Verify invoke_agent span uses the LAST response model
-    assert "gen_ai.response.model" in invoke_agent_span["data"]
-    assert invoke_agent_span["data"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14"
+    assert "gen_ai.response.model" in invoke_agent_span["attributes"]
+    assert (
+        invoke_agent_span["attributes"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14"
+    )
 
 
-def test_pregel_ainvoke_span_uses_last_response_model(sentry_init, capture_events):
+def test_pregel_ainvoke_span_uses_last_response_model(sentry_init, capture_items):
     """
     Test that when an agent makes multiple LLM calls (e.g., with tools),
     the invoke_agent span reports the last response model used.
@@ -1064,7 +1081,7 @@ def test_pregel_ainvoke_span_uses_last_response_model(sentry_init, capture_event
         integrations=[LanggraphIntegration()],
         traces_sample_rate=1.0,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     test_state = {
         "messages": [
@@ -1125,19 +1142,21 @@ async def run_test():
     result = asyncio.run(run_test())
     assert result is not None
 
-    tx = events[0]
-    assert tx["type"] == "transaction"
-
+    spans = [item.payload for item in items if item.type == "span"]
     invoke_spans = [
-        span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT
+        span
+        for span in spans
+        if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
     ]
     assert len(invoke_spans) == 1
 
     invoke_agent_span = invoke_spans[0]
 
     # Verify invoke_agent span uses the LAST response model
-    assert "gen_ai.response.model" in invoke_agent_span["data"]
-    assert invoke_agent_span["data"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14"
+    assert "gen_ai.response.model" in invoke_agent_span["attributes"]
+    assert (
+        invoke_agent_span["attributes"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14"
+    )
 
 
 def test_complex_message_parsing():
@@ -1187,14 +1206,14 @@ def test_complex_message_parsing():
     assert result[2]["function_call"]["name"] == "search"
 
 
-def test_extraction_functions_complex_scenario(sentry_init, capture_events):
+def test_extraction_functions_complex_scenario(sentry_init, capture_items):
     """Test extraction functions with complex scenarios including multiple messages and edge cases."""
     sentry_init(
         integrations=[LanggraphIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     pregel = MockPregelInstance("complex_graph")
     test_state = {"messages": [MockMessage("Complex request", name="user")]}
@@ -1235,21 +1254,23 @@ def original_invoke(self, *args, **kwargs):
 
     assert result is not None
 
-    tx = events[0]
+    spans = [item.payload for item in items if item.type == "span"]
     invoke_spans = [
-        span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT
+        span
+        for span in spans
+        if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
     ]
     assert len(invoke_spans) == 1
 
     invoke_span = invoke_spans[0]
-    assert SPANDATA.GEN_AI_RESPONSE_TEXT in invoke_span["data"]
-    response_text = invoke_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+    assert SPANDATA.GEN_AI_RESPONSE_TEXT in invoke_span["attributes"]
+    response_text = invoke_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
     assert response_text == "Final response"
 
-    assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in invoke_span["data"]
+    assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in invoke_span["attributes"]
     import json
 
-    tool_calls_data = invoke_span["data"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS]
+    tool_calls_data = invoke_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS]
     if isinstance(tool_calls_data, str):
         tool_calls_data = json.loads(tool_calls_data)
 
@@ -1260,14 +1281,14 @@ def original_invoke(self, *args, **kwargs):
     assert tool_calls_data[1]["function"]["name"] == "calculate"
 
 
-def test_langgraph_message_role_mapping(sentry_init, capture_events):
+def test_langgraph_message_role_mapping(sentry_init, capture_items):
     """Test that Langgraph integration properly maps message roles like 'ai' to 'assistant'"""
     sentry_init(
         integrations=[LanggraphIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     # Mock a langgraph message with mixed roles
     class MockMessage:
@@ -1297,17 +1318,18 @@ def __init__(self, content, message_type="human"):
         )
         wrapped_invoke(pregel, state_data)
 
-    (event,) = events
-    span = event["spans"][0]
+    span = next(item.payload for item in items if item.type == "span")
 
     # Verify that the span was created correctly
-    assert span["op"] == "gen_ai.invoke_agent"
+    assert span["attributes"]["sentry.op"] == "gen_ai.invoke_agent"
 
     # If messages were captured, verify role mapping
-    if SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]:
+    if SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]:
         import json
 
-        stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+        stored_messages = json.loads(
+            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        )
 
         # Find messages with specific content to verify role mapping
         ai_message = next(
@@ -1331,7 +1353,7 @@ def __init__(self, content, message_type="human"):
         assert "ai" not in roles
 
 
-def test_langgraph_message_truncation(sentry_init, capture_events):
+def test_langgraph_message_truncation(sentry_init, capture_items):
     """Test that large messages are truncated properly in Langgraph integration."""
     import json
 
@@ -1340,7 +1362,7 @@ def test_langgraph_message_truncation(sentry_init, capture_events):
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     large_content = (
         "This is a very long message that will exceed our size limits. " * 1000
@@ -1365,23 +1387,25 @@ def original_invoke(self, *args, **kwargs):
         result = wrapped_invoke(pregel, test_state)
 
     assert result is not None
-    assert len(events) > 0
-    tx = events[0]
-    assert tx["type"] == "transaction"
 
+    spans = [item.payload for item in items if item.type == "span"]
     invoke_spans = [
-        span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_INVOKE_AGENT
+        span
+        for span in spans
+        if span["attributes"].get("sentry.op") == OP.GEN_AI_INVOKE_AGENT
     ]
     assert len(invoke_spans) > 0
 
     invoke_span = invoke_spans[0]
-    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["data"]
+    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["attributes"]
 
-    messages_data = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+    messages_data = invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
     assert isinstance(messages_data, str)
 
     parsed_messages = json.loads(messages_data)
     assert isinstance(parsed_messages, list)
     assert len(parsed_messages) == 1
     assert "small message 5" in str(parsed_messages[0])
+
+    (tx,) = (item.payload for item in items if item.type == "transaction")
     assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5

From 6f7a0547707a4ed22b8e99fce7c3c948d7ca74c1 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Fri, 17 Apr 2026 10:52:20 +0200
Subject: [PATCH 17/84] accept any as sdk version

---
 .../huggingface_hub/test_huggingface_hub.py          | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py
index 6b4402bc52..98abbb00fa 100644
--- a/tests/integrations/huggingface_hub/test_huggingface_hub.py
+++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py
@@ -596,7 +596,7 @@ def test_text_generation_streaming(
         "sentry.origin": "auto.ai.huggingface_hub",
         "sentry.release": mock.ANY,
         "sentry.sdk.name": "sentry.python",
-        "sentry.sdk.version": "2.58.0",
+        "sentry.sdk.version": mock.ANY,
         "sentry.segment.id": mock.ANY,
         "sentry.segment.name": "test",
         "thread.id": mock.ANY,
@@ -673,7 +673,7 @@ def test_chat_completion(
         "sentry.origin": "auto.ai.huggingface_hub",
         "sentry.release": mock.ANY,
         "sentry.sdk.name": "sentry.python",
-        "sentry.sdk.version": "2.58.0",
+        "sentry.sdk.version": mock.ANY,
         "sentry.segment.id": mock.ANY,
         "sentry.segment.name": "test",
         "thread.id": mock.ANY,
@@ -750,7 +750,7 @@ def test_chat_completion_streaming(
         "sentry.origin": "auto.ai.huggingface_hub",
         "sentry.release": mock.ANY,
         "sentry.sdk.name": "sentry.python",
-        "sentry.sdk.version": "2.58.0",
+        "sentry.sdk.version": mock.ANY,
         "sentry.segment.id": mock.ANY,
         "sentry.segment.name": "test",
         "thread.id": mock.ANY,
@@ -825,7 +825,7 @@ def test_chat_completion_api_error(
         "sentry.origin": "auto.ai.huggingface_hub",
         "sentry.release": mock.ANY,
         "sentry.sdk.name": "sentry.python",
-        "sentry.sdk.version": "2.58.0",
+        "sentry.sdk.version": mock.ANY,
         "sentry.segment.id": mock.ANY,
         "sentry.segment.name": "test",
         "thread.id": mock.ANY,
@@ -939,7 +939,7 @@ def test_chat_completion_with_tools(
         "sentry.origin": "auto.ai.huggingface_hub",
         "sentry.release": mock.ANY,
         "sentry.sdk.name": "sentry.python",
-        "sentry.sdk.version": "2.58.0",
+        "sentry.sdk.version": mock.ANY,
         "sentry.segment.id": mock.ANY,
         "sentry.segment.name": "test",
         "thread.id": mock.ANY,
@@ -1035,7 +1035,7 @@ def test_chat_completion_streaming_with_tools(
         "sentry.origin": "auto.ai.huggingface_hub",
         "sentry.release": mock.ANY,
         "sentry.sdk.name": "sentry.python",
-        "sentry.sdk.version": "2.58.0",
+        "sentry.sdk.version": mock.ANY,
         "sentry.segment.id": mock.ANY,
         "sentry.segment.name": "test",
         "thread.id": mock.ANY,

From 4f871a422c8e6b69abe5160e3629b84550b46f26 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Fri, 17 Apr 2026 12:46:10 +0200
Subject: [PATCH 18/84] pydantic-ai tests

---
 .../pydantic_ai/test_pydantic_ai.py           | 695 ++++++++++--------
 1 file changed, 369 insertions(+), 326 deletions(-)

diff --git a/tests/integrations/pydantic_ai/test_pydantic_ai.py b/tests/integrations/pydantic_ai/test_pydantic_ai.py
index 50ce155f5b..fe34dd0f5d 100644
--- a/tests/integrations/pydantic_ai/test_pydantic_ai.py
+++ b/tests/integrations/pydantic_ai/test_pydantic_ai.py
@@ -53,7 +53,7 @@ def inner():
 
 
 @pytest.mark.asyncio
-async def test_agent_run_async(sentry_init, capture_events, get_test_agent):
+async def test_agent_run_async(sentry_init, capture_items, get_test_agent):
     """
     Test that the integration creates spans for async agent runs.
     """
@@ -63,7 +63,7 @@ async def test_agent_run_async(sentry_init, capture_events, get_test_agent):
         send_default_pii=True,
     )
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     test_agent = get_test_agent()
     result = await test_agent.run("Test input")
@@ -71,8 +71,7 @@ async def test_agent_run_async(sentry_init, capture_events, get_test_agent):
     assert result is not None
     assert result.output is not None
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    (transaction,) = (item.payload for item in items if item.type == "transaction")
 
     # Verify transaction (the transaction IS the invoke_agent span)
     assert transaction["transaction"] == "invoke_agent test_agent"
@@ -81,28 +80,31 @@ async def test_agent_run_async(sentry_init, capture_events, get_test_agent):
     # The transaction itself should have invoke_agent data
     assert transaction["contexts"]["trace"]["op"] == "gen_ai.invoke_agent"
 
+    spans = [item.payload for item in items if item.type == "span"]
     # Find child span types (invoke_agent is the transaction, not a child span)
-    chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
+    chat_spans = [
+        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+    ]
     assert len(chat_spans) >= 1
 
     # Check chat span
     chat_span = chat_spans[0]
-    assert "chat" in chat_span["description"]
-    assert chat_span["data"]["gen_ai.operation.name"] == "chat"
-    assert chat_span["data"]["gen_ai.response.streaming"] is False
-    assert "gen_ai.request.messages" in chat_span["data"]
-    assert "gen_ai.usage.input_tokens" in chat_span["data"]
-    assert "gen_ai.usage.output_tokens" in chat_span["data"]
+    assert "chat" in chat_span["name"]
+    assert chat_span["attributes"]["gen_ai.operation.name"] == "chat"
+    assert chat_span["attributes"]["gen_ai.response.streaming"] is False
+    assert "gen_ai.request.messages" in chat_span["attributes"]
+    assert "gen_ai.usage.input_tokens" in chat_span["attributes"]
+    assert "gen_ai.usage.output_tokens" in chat_span["attributes"]
 
 
 @pytest.mark.asyncio
-async def test_agent_run_async_model_error(sentry_init, capture_events):
+async def test_agent_run_async_model_error(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
     )
 
-    events = capture_events()
+    items = capture_items("event", "transaction", "span")
 
     def failing_model(messages, info):
         raise RuntimeError("model exploded")
@@ -115,17 +117,17 @@ def failing_model(messages, info):
     with pytest.raises(RuntimeError, match="model exploded"):
         await agent.run("Test input")
 
-    (error, transaction) = events
+    (error,) = (item.payload for item in items if item.type == "event")
     assert error["level"] == "error"
 
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
     assert len(spans) == 1
 
-    assert spans[0]["status"] == "internal_error"
+    assert spans[0]["status"] == "error"
 
 
 @pytest.mark.asyncio
-async def test_agent_run_async_usage_data(sentry_init, capture_events, get_test_agent):
+async def test_agent_run_async_usage_data(sentry_init, capture_items, get_test_agent):
     """
     Test that the invoke_agent span includes token usage and model data.
     """
@@ -135,7 +137,7 @@ async def test_agent_run_async_usage_data(sentry_init, capture_events, get_test_
         send_default_pii=True,
     )
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     test_agent = get_test_agent()
     result = await test_agent.run("Test input")
@@ -143,8 +145,7 @@ async def test_agent_run_async_usage_data(sentry_init, capture_events, get_test_
     assert result is not None
     assert result.output is not None
 
-    (transaction,) = events
-
+    (transaction,) = (item.payload for item in items if item.type == "transaction")
     # Verify transaction (the transaction IS the invoke_agent span)
     assert transaction["transaction"] == "invoke_agent test_agent"
 
@@ -170,7 +171,7 @@ async def test_agent_run_async_usage_data(sentry_init, capture_events, get_test_
     assert trace_data["gen_ai.response.model"] == "test"  # Test model name
 
 
-def test_agent_run_sync(sentry_init, capture_events, get_test_agent):
+def test_agent_run_sync(sentry_init, capture_items, get_test_agent):
     """
     Test that the integration creates spans for sync agent runs.
     """
@@ -180,7 +181,7 @@ def test_agent_run_sync(sentry_init, capture_events, get_test_agent):
         send_default_pii=True,
     )
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     test_agent = get_test_agent()
     result = test_agent.run_sync("Test input")
@@ -188,29 +189,31 @@ def test_agent_run_sync(sentry_init, capture_events, get_test_agent):
     assert result is not None
     assert result.output is not None
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
 
     # Verify transaction
+    (transaction,) = (item.payload for item in items if item.type == "transaction")
     assert transaction["transaction"] == "invoke_agent test_agent"
     assert transaction["contexts"]["trace"]["origin"] == "auto.ai.pydantic_ai"
 
     # Find span types
-    chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
+    chat_spans = [
+        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+    ]
     assert len(chat_spans) >= 1
 
     # Verify streaming flag is False for sync
     for chat_span in chat_spans:
-        assert chat_span["data"]["gen_ai.response.streaming"] is False
+        assert chat_span["attributes"]["gen_ai.response.streaming"] is False
 
 
-def test_agent_run_sync_model_error(sentry_init, capture_events):
+def test_agent_run_sync_model_error(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
     )
 
-    events = capture_events()
+    items = capture_items("event", "transaction", "span")
 
     def failing_model(messages, info):
         raise RuntimeError("model exploded")
@@ -223,17 +226,17 @@ def failing_model(messages, info):
     with pytest.raises(RuntimeError, match="model exploded"):
         agent.run_sync("Test input")
 
-    (error, transaction) = events
+    (error,) = (item.payload for item in items if item.type == "event")
     assert error["level"] == "error"
 
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
     assert len(spans) == 1
 
-    assert spans[0]["status"] == "internal_error"
+    assert spans[0]["status"] == "error"
 
 
 @pytest.mark.asyncio
-async def test_agent_run_stream(sentry_init, capture_events, get_test_agent):
+async def test_agent_run_stream(sentry_init, capture_items, get_test_agent):
     """
     Test that the integration creates spans for streaming agent runs.
     """
@@ -243,7 +246,7 @@ async def test_agent_run_stream(sentry_init, capture_events, get_test_agent):
         send_default_pii=True,
     )
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     test_agent = get_test_agent()
     async with test_agent.run_stream("Test input") as result:
@@ -251,31 +254,33 @@ async def test_agent_run_stream(sentry_init, capture_events, get_test_agent):
         async for _ in result.stream_output():
             pass
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
 
     # Verify transaction
+    (transaction,) = (item.payload for item in items if item.type == "transaction")
     assert transaction["transaction"] == "invoke_agent test_agent"
     assert transaction["contexts"]["trace"]["origin"] == "auto.ai.pydantic_ai"
 
     # Find chat spans
-    chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
+    chat_spans = [
+        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+    ]
     assert len(chat_spans) >= 1
 
     # Verify streaming flag is True for streaming
     for chat_span in chat_spans:
-        assert chat_span["data"]["gen_ai.response.streaming"] is True
-        assert "gen_ai.request.messages" in chat_span["data"]
-        assert "gen_ai.usage.input_tokens" in chat_span["data"]
+        assert chat_span["attributes"]["gen_ai.response.streaming"] is True
+        assert "gen_ai.request.messages" in chat_span["attributes"]
+        assert "gen_ai.usage.input_tokens" in chat_span["attributes"]
         # Streaming responses should still have output data
         assert (
-            "gen_ai.response.text" in chat_span["data"]
-            or "gen_ai.response.model" in chat_span["data"]
+            "gen_ai.response.text" in chat_span["attributes"]
+            or "gen_ai.response.model" in chat_span["attributes"]
         )
 
 
 @pytest.mark.asyncio
-async def test_agent_run_stream_events(sentry_init, capture_events, get_test_agent):
+async def test_agent_run_stream_events(sentry_init, capture_items, get_test_agent):
     """
     Test that run_stream_events creates spans (it uses run internally, so non-streaming).
     """
@@ -285,30 +290,31 @@ async def test_agent_run_stream_events(sentry_init, capture_events, get_test_age
         send_default_pii=True,
     )
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     # Consume all events
     test_agent = get_test_agent()
     async for _ in test_agent.run_stream_events("Test input"):
         pass
 
-    (transaction,) = events
-
     # Verify transaction
+    (transaction,) = (item.payload for item in items if item.type == "transaction")
     assert transaction["transaction"] == "invoke_agent test_agent"
 
     # Find chat spans
-    spans = transaction["spans"]
-    chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
+    spans = [item.payload for item in items if item.type == "span"]
+    chat_spans = [
+        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+    ]
     assert len(chat_spans) >= 1
 
     # run_stream_events uses run() internally, so streaming should be False
     for chat_span in chat_spans:
-        assert chat_span["data"]["gen_ai.response.streaming"] is False
+        assert chat_span["attributes"]["gen_ai.response.streaming"] is False
 
 
 @pytest.mark.asyncio
-async def test_agent_with_tools(sentry_init, capture_events, get_test_agent):
+async def test_agent_with_tools(sentry_init, capture_items, get_test_agent):
     """
     Test that tool execution creates execute_tool spans.
     """
@@ -325,34 +331,39 @@ def add_numbers(a: int, b: int) -> int:
         """Add two numbers together."""
         return a + b
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     result = await test_agent.run("What is 5 + 3?")
 
     assert result is not None
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
 
     # Find child span types (invoke_agent is the transaction, not a child span)
-    chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
-    tool_spans = [s for s in spans if s["op"] == "gen_ai.execute_tool"]
+    chat_spans = [
+        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+    ]
+    tool_spans = [
+        s
+        for s in spans
+        if s["attributes"].get("sentry.op", "") == "gen_ai.execute_tool"
+    ]
 
     # Should have tool spans
     assert len(tool_spans) >= 1
 
     # Check tool span
     tool_span = tool_spans[0]
-    assert "execute_tool" in tool_span["description"]
-    assert tool_span["data"]["gen_ai.operation.name"] == "execute_tool"
-    assert tool_span["data"]["gen_ai.tool.name"] == "add_numbers"
-    assert "gen_ai.tool.input" in tool_span["data"]
-    assert "gen_ai.tool.output" in tool_span["data"]
+    assert "execute_tool" in tool_span["name"]
+    assert tool_span["attributes"]["gen_ai.operation.name"] == "execute_tool"
+    assert tool_span["attributes"]["gen_ai.tool.name"] == "add_numbers"
+    assert "gen_ai.tool.input" in tool_span["attributes"]
+    assert "gen_ai.tool.output" in tool_span["attributes"]
 
     # Check chat spans have available_tools
     for chat_span in chat_spans:
-        assert "gen_ai.request.available_tools" in chat_span["data"]
-        available_tools_str = chat_span["data"]["gen_ai.request.available_tools"]
+        assert "gen_ai.request.available_tools" in chat_span["attributes"]
+        available_tools_str = chat_span["attributes"]["gen_ai.request.available_tools"]
         # Available tools is serialized as a string
         assert "add_numbers" in available_tools_str
 
@@ -363,7 +374,7 @@ def add_numbers(a: int, b: int) -> int:
 )
 @pytest.mark.asyncio
 async def test_agent_with_tool_model_retry(
-    sentry_init, capture_events, get_test_agent, handled_tool_call_exceptions
+    sentry_init, capture_items, get_test_agent, handled_tool_call_exceptions
 ):
     """
     Test that a handled exception is captured when a tool raises ModelRetry.
@@ -391,47 +402,51 @@ def add_numbers(a: int, b: int) -> float:
             raise ModelRetry(message="Try again with the same arguments.")
         return a + b
 
-    events = capture_events()
+    items = capture_items("event", "transaction", "span")
 
     result = await test_agent.run("What is 5 + 3?")
 
     assert result is not None
 
     if handled_tool_call_exceptions:
-        (error, transaction) = events
-    else:
-        (transaction,) = events
-    spans = transaction["spans"]
-
-    if handled_tool_call_exceptions:
+        (error,) = (item.payload for item in items if item.type == "event")
         assert error["level"] == "error"
         assert error["exception"]["values"][0]["mechanism"]["handled"]
 
+    spans = [item.payload for item in items if item.type == "span"]
     # Find child span types (invoke_agent is the transaction, not a child span)
-    chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
-    tool_spans = [s for s in spans if s["op"] == "gen_ai.execute_tool"]
+    chat_spans = [
+        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+    ]
+    tool_spans = [
+        s
+        for s in spans
+        if s["attributes"].get("sentry.op", "") == "gen_ai.execute_tool"
+    ]
 
     # Should have tool spans
     assert len(tool_spans) >= 1
 
     # Check tool spans
     model_retry_tool_span = tool_spans[0]
-    assert "execute_tool" in model_retry_tool_span["description"]
-    assert model_retry_tool_span["data"]["gen_ai.operation.name"] == "execute_tool"
-    assert model_retry_tool_span["data"]["gen_ai.tool.name"] == "add_numbers"
-    assert "gen_ai.tool.input" in model_retry_tool_span["data"]
+    assert "execute_tool" in model_retry_tool_span["name"]
+    assert (
+        model_retry_tool_span["attributes"]["gen_ai.operation.name"] == "execute_tool"
+    )
+    assert model_retry_tool_span["attributes"]["gen_ai.tool.name"] == "add_numbers"
+    assert "gen_ai.tool.input" in model_retry_tool_span["attributes"]
 
     tool_span = tool_spans[1]
-    assert "execute_tool" in tool_span["description"]
-    assert tool_span["data"]["gen_ai.operation.name"] == "execute_tool"
-    assert tool_span["data"]["gen_ai.tool.name"] == "add_numbers"
-    assert "gen_ai.tool.input" in tool_span["data"]
-    assert "gen_ai.tool.output" in tool_span["data"]
+    assert "execute_tool" in tool_span["name"]
+    assert tool_span["attributes"]["gen_ai.operation.name"] == "execute_tool"
+    assert tool_span["attributes"]["gen_ai.tool.name"] == "add_numbers"
+    assert "gen_ai.tool.input" in tool_span["attributes"]
+    assert "gen_ai.tool.output" in tool_span["attributes"]
 
     # Check chat spans have available_tools
     for chat_span in chat_spans:
-        assert "gen_ai.request.available_tools" in chat_span["data"]
-        available_tools_str = chat_span["data"]["gen_ai.request.available_tools"]
+        assert "gen_ai.request.available_tools" in chat_span["attributes"]
+        available_tools_str = chat_span["attributes"]["gen_ai.request.available_tools"]
         # Available tools is serialized as a string
         assert "add_numbers" in available_tools_str
 
@@ -442,7 +457,7 @@ def add_numbers(a: int, b: int) -> float:
 )
 @pytest.mark.asyncio
 async def test_agent_with_tool_validation_error(
-    sentry_init, capture_events, get_test_agent, handled_tool_call_exceptions
+    sentry_init, capture_items, get_test_agent, handled_tool_call_exceptions
 ):
     """
     Test that a handled exception is captured when a tool has unsatisfiable constraints.
@@ -464,7 +479,7 @@ def add_numbers(a: Annotated[int, Field(gt=0, lt=0)], b: int) -> int:
         """Add two numbers together."""
         return a + b
 
-    events = capture_events()
+    items = capture_items("event", "transaction", "span")
 
     result = None
     with pytest.raises(UnexpectedModelBehavior):
@@ -473,42 +488,45 @@ def add_numbers(a: Annotated[int, Field(gt=0, lt=0)], b: int) -> int:
     assert result is None
 
     if handled_tool_call_exceptions:
-        (error, model_behaviour_error, transaction) = events
-    else:
         (
+            error,
             model_behaviour_error,
-            transaction,
-        ) = events
-    spans = transaction["spans"]
-
-    if handled_tool_call_exceptions:
+        ) = (item.payload for item in items if item.type == "event")
         assert error["level"] == "error"
         assert error["exception"]["values"][0]["mechanism"]["handled"]
 
-    # Find child span types (invoke_agent is the transaction, not a child span)
-    chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
-    tool_spans = [s for s in spans if s["op"] == "gen_ai.execute_tool"]
+    spans = [item.payload for item in items if item.type == "span"]
+    chat_spans = [
+        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+    ]
+    tool_spans = [
+        s
+        for s in spans
+        if s["attributes"].get("sentry.op", "") == "gen_ai.execute_tool"
+    ]
 
     # Should have tool spans
     assert len(tool_spans) >= 1
 
     # Check tool spans
     model_retry_tool_span = tool_spans[0]
-    assert "execute_tool" in model_retry_tool_span["description"]
-    assert model_retry_tool_span["data"]["gen_ai.operation.name"] == "execute_tool"
-    assert model_retry_tool_span["data"]["gen_ai.tool.name"] == "add_numbers"
-    assert "gen_ai.tool.input" in model_retry_tool_span["data"]
+    assert "execute_tool" in model_retry_tool_span["name"]
+    assert (
+        model_retry_tool_span["attributes"]["gen_ai.operation.name"] == "execute_tool"
+    )
+    assert model_retry_tool_span["attributes"]["gen_ai.tool.name"] == "add_numbers"
+    assert "gen_ai.tool.input" in model_retry_tool_span["attributes"]
 
     # Check chat spans have available_tools
     for chat_span in chat_spans:
-        assert "gen_ai.request.available_tools" in chat_span["data"]
-        available_tools_str = chat_span["data"]["gen_ai.request.available_tools"]
+        assert "gen_ai.request.available_tools" in chat_span["attributes"]
+        available_tools_str = chat_span["attributes"]["gen_ai.request.available_tools"]
         # Available tools is serialized as a string
         assert "add_numbers" in available_tools_str
 
 
 @pytest.mark.asyncio
-async def test_agent_with_tools_streaming(sentry_init, capture_events, get_test_agent):
+async def test_agent_with_tools_streaming(sentry_init, capture_items, get_test_agent):
     """
     Test that tool execution works correctly with streaming.
     """
@@ -525,37 +543,40 @@ def multiply(a: int, b: int) -> int:
         """Multiply two numbers."""
         return a * b
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     async with test_agent.run_stream("What is 7 times 8?") as result:
         async for _ in result.stream_output():
             pass
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
 
     # Find span types
-    chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
-    tool_spans = [s for s in spans if s["op"] == "gen_ai.execute_tool"]
+    chat_spans = [
+        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+    ]
+    tool_spans = [
+        s
+        for s in spans
+        if s["attributes"].get("sentry.op", "") == "gen_ai.execute_tool"
+    ]
 
     # Should have tool spans
     assert len(tool_spans) >= 1
 
     # Verify streaming flag is True
     for chat_span in chat_spans:
-        assert chat_span["data"]["gen_ai.response.streaming"] is True
+        assert chat_span["attributes"]["gen_ai.response.streaming"] is True
 
     # Check tool span
     tool_span = tool_spans[0]
-    assert tool_span["data"]["gen_ai.tool.name"] == "multiply"
-    assert "gen_ai.tool.input" in tool_span["data"]
-    assert "gen_ai.tool.output" in tool_span["data"]
+    assert tool_span["attributes"]["gen_ai.tool.name"] == "multiply"
+    assert "gen_ai.tool.input" in tool_span["attributes"]
+    assert "gen_ai.tool.output" in tool_span["attributes"]
 
 
 @pytest.mark.asyncio
-async def test_model_settings(
-    sentry_init, capture_events, get_test_agent_with_settings
-):
+async def test_model_settings(sentry_init, capture_items, get_test_agent_with_settings):
     """
     Test that model settings are captured in spans.
     """
@@ -564,23 +585,24 @@ async def test_model_settings(
         traces_sample_rate=1.0,
     )
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     test_agent_with_settings = get_test_agent_with_settings()
     await test_agent_with_settings.run("Test input")
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
 
     # Find chat span
-    chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
+    chat_spans = [
+        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+    ]
     assert len(chat_spans) >= 1
 
     chat_span = chat_spans[0]
     # Check that model settings are captured
-    assert chat_span["data"].get("gen_ai.request.temperature") == 0.7
-    assert chat_span["data"].get("gen_ai.request.max_tokens") == 100
-    assert chat_span["data"].get("gen_ai.request.top_p") == 0.9
+    assert chat_span["attributes"].get("gen_ai.request.temperature") == 0.7
+    assert chat_span["attributes"].get("gen_ai.request.max_tokens") == 100
+    assert chat_span["attributes"].get("gen_ai.request.top_p") == 0.9
 
 
 @pytest.mark.asyncio
@@ -594,7 +616,7 @@ async def test_model_settings(
     ],
 )
 async def test_system_prompt_attribute(
-    sentry_init, capture_events, send_default_pii, include_prompts
+    sentry_init, capture_items, send_default_pii, include_prompts
 ):
     """
     Test that system prompts are included as the first message.
@@ -611,21 +633,24 @@ async def test_system_prompt_attribute(
         send_default_pii=send_default_pii,
     )
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     await agent.run("Hello")
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
 
     # The transaction IS the invoke_agent span, check for messages in chat spans instead
-    chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
+    chat_spans = [
+        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+    ]
     assert len(chat_spans) >= 1
 
     chat_span = chat_spans[0]
 
     if send_default_pii and include_prompts:
-        system_instructions = chat_span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
+        system_instructions = chat_span["attributes"][
+            SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS
+        ]
         assert json.loads(system_instructions) == [
             {
                 "type": "text",
@@ -633,11 +658,11 @@ async def test_system_prompt_attribute(
             }
         ]
     else:
-        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_span["data"]
+        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_span["attributes"]
 
 
 @pytest.mark.asyncio
-async def test_error_handling(sentry_init, capture_events):
+async def test_error_handling(sentry_init, capture_items):
     """
     Test error handling in agent execution.
     """
@@ -653,14 +678,13 @@ async def test_error_handling(sentry_init, capture_events):
         traces_sample_rate=1.0,
     )
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     # Simple run that should succeed
     await agent.run("Hello")
 
     # At minimum, we should have a transaction
-    assert len(events) >= 1
-    transaction = [e for e in events if e.get("type") == "transaction"][0]
+    transaction = next(item.payload for item in items if item.type == "transaction")
     assert transaction["transaction"] == "invoke_agent test_error"
     # Transaction should complete successfully (status key may not exist if no error)
     trace_status = transaction["contexts"]["trace"].get("status")
@@ -668,7 +692,7 @@ async def test_error_handling(sentry_init, capture_events):
 
 
 @pytest.mark.asyncio
-async def test_without_pii(sentry_init, capture_events, get_test_agent):
+async def test_without_pii(sentry_init, capture_items, get_test_agent):
     """
     Test that PII is not captured when send_default_pii is False.
     """
@@ -678,25 +702,26 @@ async def test_without_pii(sentry_init, capture_events, get_test_agent):
         send_default_pii=False,
     )
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     test_agent = get_test_agent()
     await test_agent.run("Sensitive input")
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
 
     # Find child spans (invoke_agent is the transaction, not a child span)
-    chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
+    chat_spans = [
+        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+    ]
 
     # Verify that messages and response text are not captured
     for span in chat_spans:
-        assert "gen_ai.request.messages" not in span["data"]
-        assert "gen_ai.response.text" not in span["data"]
+        assert "gen_ai.request.messages" not in span["attributes"]
+        assert "gen_ai.response.text" not in span["attributes"]
 
 
 @pytest.mark.asyncio
-async def test_without_pii_tools(sentry_init, capture_events, get_test_agent):
+async def test_without_pii_tools(sentry_init, capture_items, get_test_agent):
     """
     Test that tool input/output are not captured when send_default_pii is False.
     """
@@ -713,24 +738,27 @@ def sensitive_tool(data: str) -> str:
         """A tool with sensitive data."""
         return f"Processed: {data}"
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     await test_agent.run("Use sensitive tool with private data")
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
 
     # Find tool spans
-    tool_spans = [s for s in spans if s["op"] == "gen_ai.execute_tool"]
+    tool_spans = [
+        s
+        for s in spans
+        if s["attributes"].get("sentry.op", "") == "gen_ai.execute_tool"
+    ]
 
     # If tool was executed, verify input/output are not captured
     for tool_span in tool_spans:
-        assert "gen_ai.tool.input" not in tool_span["data"]
-        assert "gen_ai.tool.output" not in tool_span["data"]
+        assert "gen_ai.tool.input" not in tool_span["attributes"]
+        assert "gen_ai.tool.output" not in tool_span["attributes"]
 
 
 @pytest.mark.asyncio
-async def test_multiple_agents_concurrent(sentry_init, capture_events, get_test_agent):
+async def test_multiple_agents_concurrent(sentry_init, capture_items, get_test_agent):
     """
     Test that multiple agents can run concurrently without interfering.
     """
@@ -739,7 +767,7 @@ async def test_multiple_agents_concurrent(sentry_init, capture_events, get_test_
         traces_sample_rate=1.0,
     )
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     test_agent = get_test_agent()
 
@@ -750,18 +778,15 @@ async def run_agent(input_text):
     results = await asyncio.gather(*[run_agent(f"Input {i}") for i in range(3)])
 
     assert len(results) == 3
-    assert len(events) == 3
 
     # Verify each transaction is separate
+    events = [item.payload for item in items if item.type == "transaction"]
     for i, transaction in enumerate(events):
-        assert transaction["type"] == "transaction"
         assert transaction["transaction"] == "invoke_agent test_agent"
-        # Each should have its own spans
-        assert len(transaction["spans"]) >= 1
 
 
 @pytest.mark.asyncio
-async def test_message_history(sentry_init, capture_events):
+async def test_message_history(sentry_init, capture_items):
     """
     Test that full conversation history is captured in chat spans.
     """
@@ -776,7 +801,7 @@ async def test_message_history(sentry_init, capture_events):
         send_default_pii=True,
     )
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     # First message
     await agent.run("Hello, I'm Alice")
@@ -797,23 +822,26 @@ async def test_message_history(sentry_init, capture_events):
     await agent.run("What is my name?", message_history=history)
 
     # We should have 2 transactions
+    events = [item.payload for item in items if item.type == "transaction"]
     assert len(events) >= 2
 
     # Check the second transaction has the full history
     second_transaction = events[1]
     spans = second_transaction["spans"]
-    chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
+    chat_spans = [
+        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+    ]
 
     if chat_spans:
         chat_span = chat_spans[0]
-        if "gen_ai.request.messages" in chat_span["data"]:
-            messages_data = chat_span["data"]["gen_ai.request.messages"]
+        if "gen_ai.request.messages" in chat_span["attributes"]:
+            messages_data = chat_span["attributes"]["gen_ai.request.messages"]
             # Should have multiple messages including history
             assert len(messages_data) > 1
 
 
 @pytest.mark.asyncio
-async def test_gen_ai_system(sentry_init, capture_events, get_test_agent):
+async def test_gen_ai_system(sentry_init, capture_items, get_test_agent):
     """
     Test that gen_ai.system is set from the model.
     """
@@ -822,26 +850,27 @@ async def test_gen_ai_system(sentry_init, capture_events, get_test_agent):
         traces_sample_rate=1.0,
     )
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     test_agent = get_test_agent()
     await test_agent.run("Test input")
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
 
     # Find chat span
-    chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
+    chat_spans = [
+        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+    ]
     assert len(chat_spans) >= 1
 
     chat_span = chat_spans[0]
     # gen_ai.system should be set from the model (TestModel -> 'test')
-    assert "gen_ai.system" in chat_span["data"]
-    assert chat_span["data"]["gen_ai.system"] == "test"
+    assert "gen_ai.system" in chat_span["attributes"]
+    assert chat_span["attributes"]["gen_ai.system"] == "test"
 
 
 @pytest.mark.asyncio
-async def test_include_prompts_false(sentry_init, capture_events, get_test_agent):
+async def test_include_prompts_false(sentry_init, capture_items, get_test_agent):
     """
     Test that prompts are not captured when include_prompts=False.
     """
@@ -851,25 +880,26 @@ async def test_include_prompts_false(sentry_init, capture_events, get_test_agent
         send_default_pii=True,  # Even with PII enabled, prompts should not be captured
     )
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     test_agent = get_test_agent()
     await test_agent.run("Sensitive prompt")
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
 
     # Find child spans (invoke_agent is the transaction, not a child span)
-    chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
+    chat_spans = [
+        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+    ]
 
     # Verify that messages and response text are not captured
     for span in chat_spans:
-        assert "gen_ai.request.messages" not in span["data"]
-        assert "gen_ai.response.text" not in span["data"]
+        assert "gen_ai.request.messages" not in span["attributes"]
+        assert "gen_ai.response.text" not in span["attributes"]
 
 
 @pytest.mark.asyncio
-async def test_include_prompts_true(sentry_init, capture_events, get_test_agent):
+async def test_include_prompts_true(sentry_init, capture_items, get_test_agent):
     """
     Test that prompts are captured when include_prompts=True (default).
     """
@@ -879,26 +909,27 @@ async def test_include_prompts_true(sentry_init, capture_events, get_test_agent)
         send_default_pii=True,
     )
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     test_agent = get_test_agent()
     await test_agent.run("Test prompt")
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
 
     # Find child spans (invoke_agent is the transaction, not a child span)
-    chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
+    chat_spans = [
+        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+    ]
 
     # Verify that messages are captured in chat spans
     assert len(chat_spans) >= 1
     for chat_span in chat_spans:
-        assert "gen_ai.request.messages" in chat_span["data"]
+        assert "gen_ai.request.messages" in chat_span["attributes"]
 
 
 @pytest.mark.asyncio
 async def test_include_prompts_false_with_tools(
-    sentry_init, capture_events, get_test_agent
+    sentry_init, capture_items, get_test_agent
 ):
     """
     Test that tool input/output are not captured when include_prompts=False.
@@ -916,26 +947,27 @@ def test_tool(value: int) -> int:
         """A test tool."""
         return value * 2
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     await test_agent.run("Use the test tool with value 5")
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
 
     # Find tool spans
-    tool_spans = [s for s in spans if s["op"] == "gen_ai.execute_tool"]
+    tool_spans = [
+        s
+        for s in spans
+        if s["attributes"].get("sentry.op", "") == "gen_ai.execute_tool"
+    ]
 
     # If tool was executed, verify input/output are not captured
     for tool_span in tool_spans:
-        assert "gen_ai.tool.input" not in tool_span["data"]
-        assert "gen_ai.tool.output" not in tool_span["data"]
+        assert "gen_ai.tool.input" not in tool_span["attributes"]
+        assert "gen_ai.tool.output" not in tool_span["attributes"]
 
 
 @pytest.mark.asyncio
-async def test_include_prompts_requires_pii(
-    sentry_init, capture_events, get_test_agent
-):
+async def test_include_prompts_requires_pii(sentry_init, capture_items, get_test_agent):
     """
     Test that include_prompts requires send_default_pii=True.
     """
@@ -945,25 +977,26 @@ async def test_include_prompts_requires_pii(
         send_default_pii=False,  # PII disabled
     )
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     test_agent = get_test_agent()
     await test_agent.run("Test prompt")
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
 
     # Find child spans (invoke_agent is the transaction, not a child span)
-    chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
+    chat_spans = [
+        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+    ]
 
     # Even with include_prompts=True, if PII is disabled, messages should not be captured
     for span in chat_spans:
-        assert "gen_ai.request.messages" not in span["data"]
-        assert "gen_ai.response.text" not in span["data"]
+        assert "gen_ai.request.messages" not in span["attributes"]
+        assert "gen_ai.response.text" not in span["attributes"]
 
 
 @pytest.mark.asyncio
-async def test_mcp_tool_execution_spans(sentry_init, capture_events):
+async def test_mcp_tool_execution_spans(sentry_init, capture_items):
     """
     Test that MCP (Model Context Protocol) tool calls create execute_tool spans.
 
@@ -1035,12 +1068,10 @@ async def mock_map_tool_result_part(part):
         send_default_pii=True,
     )
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     # Simulate MCP tool execution within a transaction through CombinedToolset
-    with sentry_sdk.start_transaction(
-        op="ai.run", name="invoke_agent test_mcp_agent"
-    ) as transaction:
+    with sentry_sdk.start_transaction(op="ai.run", name="invoke_agent test_mcp_agent"):
         # Set up the agent context
         scope = sentry_sdk.get_current_scope()
         scope._contexts["pydantic_ai_agent"] = {
@@ -1080,13 +1111,10 @@ async def mock_map_tool_result_part(part):
             # MCP tool might raise if not fully mocked, that's okay
             pass
 
-    events_list = events
+    events_list = items
     if len(events_list) == 0:
         pytest.skip("No events captured, MCP test setup incomplete")
 
-    (transaction,) = events_list
-    transaction["spans"]
-
     # Note: This test manually calls combined.call_tool which doesn't go through
     # ToolManager._call_tool (which is what the integration patches).
     # In real-world usage, MCP tools are called through agent.run() which uses ToolManager.
@@ -1256,7 +1284,7 @@ async def run_and_check_context(agent, agent_name):
 
 
 @pytest.mark.asyncio
-async def test_invoke_agent_with_list_user_prompt(sentry_init, capture_events):
+async def test_invoke_agent_with_list_user_prompt(sentry_init, capture_items):
     """
     Test that invoke_agent span handles list user prompts correctly.
     """
@@ -1271,15 +1299,14 @@ async def test_invoke_agent_with_list_user_prompt(sentry_init, capture_events):
         send_default_pii=True,
     )
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     # Use a list as user prompt
     await agent.run(["First part", "Second part"])
 
-    (transaction,) = events
-
     # Check that the invoke_agent transaction has messages data
     # The invoke_agent is the transaction itself
+    (transaction,) = [item.payload for item in items if item.type == "transaction"]
     if "gen_ai.request.messages" in transaction["contexts"]["trace"]["data"]:
         messages_str = transaction["contexts"]["trace"]["data"][
             "gen_ai.request.messages"
@@ -1299,7 +1326,7 @@ async def test_invoke_agent_with_list_user_prompt(sentry_init, capture_events):
     ],
 )
 async def test_invoke_agent_with_instructions(
-    sentry_init, capture_events, send_default_pii, include_prompts
+    sentry_init, capture_items, send_default_pii, include_prompts
 ):
     """
     Test that invoke_agent span handles instructions correctly.
@@ -1322,31 +1349,34 @@ async def test_invoke_agent_with_instructions(
         send_default_pii=send_default_pii,
     )
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     await agent.run("Test input")
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
 
     # The transaction IS the invoke_agent span, check for messages in chat spans instead
-    chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
+    chat_spans = [
+        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+    ]
     assert len(chat_spans) >= 1
 
     chat_span = chat_spans[0]
 
     if send_default_pii and include_prompts:
-        system_instructions = chat_span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
+        system_instructions = chat_span["attributes"][
+            SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS
+        ]
         assert json.loads(system_instructions) == [
             {"type": "text", "content": "System prompt"},
             {"type": "text", "content": "Instruction 1\nInstruction 2"},
         ]
     else:
-        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_span["data"]
+        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_span["attributes"]
 
 
 @pytest.mark.asyncio
-async def test_model_name_extraction_with_callable(sentry_init, capture_events):
+async def test_model_name_extraction_with_callable(sentry_init, capture_items):
     """
     Test model name extraction when model has a callable name() method.
     """
@@ -1372,7 +1402,7 @@ async def test_model_name_extraction_with_callable(sentry_init, capture_events):
 
 
 @pytest.mark.asyncio
-async def test_model_name_extraction_fallback_to_str(sentry_init, capture_events):
+async def test_model_name_extraction_fallback_to_str(sentry_init, capture_items):
     """
     Test model name extraction falls back to str() when no name attribute exists.
     """
@@ -1399,7 +1429,7 @@ async def test_model_name_extraction_fallback_to_str(sentry_init, capture_events
 
 
 @pytest.mark.asyncio
-async def test_model_settings_object_style(sentry_init, capture_events):
+async def test_model_settings_object_style(sentry_init, capture_items):
     """
     Test that object-style model settings (non-dict) are handled correctly.
     """
@@ -1433,7 +1463,7 @@ async def test_model_settings_object_style(sentry_init, capture_events):
 
 
 @pytest.mark.asyncio
-async def test_usage_data_partial(sentry_init, capture_events):
+async def test_usage_data_partial(sentry_init, capture_items):
     """
     Test that usage data is correctly handled when only some fields are present.
     """
@@ -1447,14 +1477,15 @@ async def test_usage_data_partial(sentry_init, capture_events):
         traces_sample_rate=1.0,
     )
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     await agent.run("Test input")
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
 
-    chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
+    chat_spans = [
+        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+    ]
     assert len(chat_spans) >= 1
 
     # Check that usage data fields exist (they may or may not be set depending on TestModel)
@@ -1464,7 +1495,7 @@ async def test_usage_data_partial(sentry_init, capture_events):
 
 
 @pytest.mark.asyncio
-async def test_agent_data_from_scope(sentry_init, capture_events):
+async def test_agent_data_from_scope(sentry_init, capture_items):
     """
     Test that agent data can be retrieved from Sentry scope when not passed directly.
     """
@@ -1479,20 +1510,19 @@ async def test_agent_data_from_scope(sentry_init, capture_events):
         traces_sample_rate=1.0,
     )
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     # The integration automatically sets agent in scope during execution
     await agent.run("Test input")
 
-    (transaction,) = events
-
-    # Verify agent name is captured
+    # Verify agent name is capture
+    (transaction,) = (item.payload for item in items if item.type == "transaction")
     assert transaction["transaction"] == "invoke_agent test_scope_agent"
 
 
 @pytest.mark.asyncio
 async def test_available_tools_without_description(
-    sentry_init, capture_events, get_test_agent
+    sentry_init, capture_items, get_test_agent
 ):
     """
     Test that available tools are captured even when description is missing.
@@ -1509,23 +1539,24 @@ def tool_without_desc(x: int) -> int:
         # No docstring = no description
         return x * 2
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     await test_agent.run("Use the tool with 5")
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
 
-    chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
+    chat_spans = [
+        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+    ]
     if chat_spans:
         chat_span = chat_spans[0]
-        if "gen_ai.request.available_tools" in chat_span["data"]:
-            tools_str = chat_span["data"]["gen_ai.request.available_tools"]
+        if "gen_ai.request.available_tools" in chat_span["attributes"]:
+            tools_str = chat_span["attributes"]["gen_ai.request.available_tools"]
             assert "tool_without_desc" in tools_str
 
 
 @pytest.mark.asyncio
-async def test_output_with_tool_calls(sentry_init, capture_events, get_test_agent):
+async def test_output_with_tool_calls(sentry_init, capture_items, get_test_agent):
     """
     Test that tool calls in model response are captured correctly.
     """
@@ -1542,14 +1573,15 @@ def calc_tool(value: int) -> int:
         """Calculate something."""
         return value + 10
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     await test_agent.run("Use calc_tool with 5")
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
 
-    chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
+    chat_spans = [
+        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+    ]
 
     # At least one chat span should exist
     assert len(chat_spans) >= 1
@@ -1558,11 +1590,11 @@ def calc_tool(value: int) -> int:
     for chat_span in chat_spans:
         # Tool calls may or may not be in response depending on TestModel behavior
         # Just verify the span was created and has basic data
-        assert "gen_ai.operation.name" in chat_span["data"]
+        assert "gen_ai.operation.name" in chat_span["attributes"]
 
 
 @pytest.mark.asyncio
-async def test_message_formatting_with_different_parts(sentry_init, capture_events):
+async def test_message_formatting_with_different_parts(sentry_init, capture_items):
     """
     Test that different message part types are handled correctly in ai_client span.
     """
@@ -1579,7 +1611,7 @@ async def test_message_formatting_with_different_parts(sentry_init, capture_even
         send_default_pii=True,
     )
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     # Create message history with different part types
     history = [
@@ -1594,24 +1626,25 @@ async def test_message_formatting_with_different_parts(sentry_init, capture_even
 
     await agent.run("What did I say?", message_history=history)
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
 
-    chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
+    chat_spans = [
+        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+    ]
 
     # Should have chat spans
     assert len(chat_spans) >= 1
 
     # Check that messages are captured
     chat_span = chat_spans[0]
-    if "gen_ai.request.messages" in chat_span["data"]:
-        messages_data = chat_span["data"]["gen_ai.request.messages"]
+    if "gen_ai.request.messages" in chat_span["attributes"]:
+        messages_data = chat_span["attributes"]["gen_ai.request.messages"]
         # Should contain message history
         assert messages_data is not None
 
 
 @pytest.mark.asyncio
-async def test_update_invoke_agent_span_with_none_output(sentry_init, capture_events):
+async def test_update_invoke_agent_span_with_none_output(sentry_init, capture_items):
     """
     Test that update_invoke_agent_span handles None output gracefully.
     """
@@ -1639,7 +1672,7 @@ async def test_update_invoke_agent_span_with_none_output(sentry_init, capture_ev
 
 
 @pytest.mark.asyncio
-async def test_update_ai_client_span_with_none_response(sentry_init, capture_events):
+async def test_update_ai_client_span_with_none_response(sentry_init, capture_items):
     """
     Test that update_ai_client_span handles None response gracefully.
     """
@@ -1666,7 +1699,7 @@ async def test_update_ai_client_span_with_none_response(sentry_init, capture_eve
 
 
 @pytest.mark.asyncio
-async def test_agent_without_name(sentry_init, capture_events):
+async def test_agent_without_name(sentry_init, capture_items):
     """
     Test that agent without a name is handled correctly.
     """
@@ -1678,20 +1711,18 @@ async def test_agent_without_name(sentry_init, capture_events):
         traces_sample_rate=1.0,
     )
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     await agent.run("Test input")
 
-    (transaction,) = events
-
     # Should still create transaction, just with default name
-    assert transaction["type"] == "transaction"
+    (transaction,) = (item.payload for item in items if item.type == "transaction")
     # Transaction name should be "invoke_agent agent" or similar default
     assert "invoke_agent" in transaction["transaction"]
 
 
 @pytest.mark.asyncio
-async def test_model_response_without_parts(sentry_init, capture_events):
+async def test_model_response_without_parts(sentry_init, capture_items):
     """
     Test handling of model response without parts attribute.
     """
@@ -1723,7 +1754,7 @@ async def test_model_response_without_parts(sentry_init, capture_events):
 
 
 @pytest.mark.asyncio
-async def test_input_messages_error_handling(sentry_init, capture_events):
+async def test_input_messages_error_handling(sentry_init, capture_items):
     """
     Test that _set_input_messages handles errors gracefully.
     """
@@ -1751,7 +1782,7 @@ async def test_input_messages_error_handling(sentry_init, capture_events):
 
 
 @pytest.mark.asyncio
-async def test_available_tools_error_handling(sentry_init, capture_events):
+async def test_available_tools_error_handling(sentry_init, capture_items):
     """
     Test that _set_available_tools handles errors gracefully.
     """
@@ -1781,7 +1812,7 @@ async def test_available_tools_error_handling(sentry_init, capture_events):
 
 
 @pytest.mark.asyncio
-async def test_set_usage_data_with_none_usage(sentry_init, capture_events):
+async def test_set_usage_data_with_none_usage(sentry_init, capture_items):
     """
     Test that _set_usage_data handles None usage gracefully.
     """
@@ -1806,7 +1837,7 @@ async def test_set_usage_data_with_none_usage(sentry_init, capture_events):
 
 
 @pytest.mark.asyncio
-async def test_set_usage_data_with_partial_fields(sentry_init, capture_events):
+async def test_set_usage_data_with_partial_fields(sentry_init, capture_items):
     """
     Test that _set_usage_data handles usage with only some fields.
     """
@@ -1838,7 +1869,7 @@ async def test_set_usage_data_with_partial_fields(sentry_init, capture_events):
 
 
 @pytest.mark.asyncio
-async def test_message_parts_with_tool_return(sentry_init, capture_events):
+async def test_message_parts_with_tool_return(sentry_init, capture_items):
     """
     Test that ToolReturnPart messages are handled correctly.
     """
@@ -1860,22 +1891,23 @@ def test_tool(x: int) -> int:
         send_default_pii=True,
     )
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     # Run with history containing tool return
     await agent.run("Use test_tool with 5")
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
 
-    chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
+    chat_spans = [
+        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+    ]
 
     # Should have chat spans
     assert len(chat_spans) >= 1
 
 
 @pytest.mark.asyncio
-async def test_message_parts_with_list_content(sentry_init, capture_events):
+async def test_message_parts_with_list_content(sentry_init, capture_items):
     """
     Test that message parts with list content are handled correctly.
     """
@@ -1910,7 +1942,7 @@ async def test_message_parts_with_list_content(sentry_init, capture_events):
 
 
 @pytest.mark.asyncio
-async def test_output_data_with_text_and_tool_calls(sentry_init, capture_events):
+async def test_output_data_with_text_and_tool_calls(sentry_init, capture_items):
     """
     Test that _set_output_data handles both text and tool calls in response.
     """
@@ -1949,7 +1981,7 @@ async def test_output_data_with_text_and_tool_calls(sentry_init, capture_events)
 
 
 @pytest.mark.asyncio
-async def test_output_data_error_handling(sentry_init, capture_events):
+async def test_output_data_error_handling(sentry_init, capture_items):
     """
     Test that _set_output_data handles errors in formatting gracefully.
     """
@@ -1981,7 +2013,7 @@ async def test_output_data_error_handling(sentry_init, capture_events):
 
 
 @pytest.mark.asyncio
-async def test_message_with_system_prompt_part(sentry_init, capture_events):
+async def test_message_with_system_prompt_part(sentry_init, capture_items):
     """
     Test that SystemPromptPart is handled with correct role.
     """
@@ -2017,7 +2049,7 @@ async def test_message_with_system_prompt_part(sentry_init, capture_events):
 
 
 @pytest.mark.asyncio
-async def test_message_with_instructions(sentry_init, capture_events):
+async def test_message_with_instructions(sentry_init, capture_items):
     """
     Test that messages with instructions field are handled correctly.
     """
@@ -2052,7 +2084,7 @@ async def test_message_with_instructions(sentry_init, capture_events):
 
 
 @pytest.mark.asyncio
-async def test_set_input_messages_without_prompts(sentry_init, capture_events):
+async def test_set_input_messages_without_prompts(sentry_init, capture_items):
     """
     Test that _set_input_messages respects _should_send_prompts().
     """
@@ -2078,7 +2110,7 @@ async def test_set_input_messages_without_prompts(sentry_init, capture_events):
 
 
 @pytest.mark.asyncio
-async def test_set_output_data_without_prompts(sentry_init, capture_events):
+async def test_set_output_data_without_prompts(sentry_init, capture_items):
     """
     Test that _set_output_data respects _should_send_prompts().
     """
@@ -2107,7 +2139,7 @@ async def test_set_output_data_without_prompts(sentry_init, capture_events):
 
 
 @pytest.mark.asyncio
-async def test_get_model_name_with_exception_in_callable(sentry_init, capture_events):
+async def test_get_model_name_with_exception_in_callable(sentry_init, capture_items):
     """
     Test that _get_model_name handles exceptions in name() callable.
     """
@@ -2131,7 +2163,7 @@ async def test_get_model_name_with_exception_in_callable(sentry_init, capture_ev
 
 
 @pytest.mark.asyncio
-async def test_get_model_name_with_string_model(sentry_init, capture_events):
+async def test_get_model_name_with_string_model(sentry_init, capture_items):
     """
     Test that _get_model_name handles string models.
     """
@@ -2150,7 +2182,7 @@ async def test_get_model_name_with_string_model(sentry_init, capture_events):
 
 
 @pytest.mark.asyncio
-async def test_get_model_name_with_none(sentry_init, capture_events):
+async def test_get_model_name_with_none(sentry_init, capture_items):
     """
     Test that _get_model_name handles None model.
     """
@@ -2169,7 +2201,7 @@ async def test_get_model_name_with_none(sentry_init, capture_events):
 
 
 @pytest.mark.asyncio
-async def test_set_model_data_with_system(sentry_init, capture_events):
+async def test_set_model_data_with_system(sentry_init, capture_items):
     """
     Test that _set_model_data captures system from model.
     """
@@ -2200,7 +2232,7 @@ async def test_set_model_data_with_system(sentry_init, capture_events):
 
 
 @pytest.mark.asyncio
-async def test_set_model_data_from_agent_scope(sentry_init, capture_events):
+async def test_set_model_data_from_agent_scope(sentry_init, capture_items):
     """
     Test that _set_model_data retrieves model from agent in scope when not passed.
     """
@@ -2234,7 +2266,7 @@ async def test_set_model_data_from_agent_scope(sentry_init, capture_events):
 
 
 @pytest.mark.asyncio
-async def test_set_model_data_with_none_settings_values(sentry_init, capture_events):
+async def test_set_model_data_with_none_settings_values(sentry_init, capture_items):
     """
     Test that _set_model_data skips None values in settings.
     """
@@ -2266,7 +2298,7 @@ async def test_set_model_data_with_none_settings_values(sentry_init, capture_eve
 
 
 @pytest.mark.asyncio
-async def test_should_send_prompts_without_pii(sentry_init, capture_events):
+async def test_should_send_prompts_without_pii(sentry_init, capture_items):
     """
     Test that _should_send_prompts returns False when PII disabled.
     """
@@ -2284,7 +2316,7 @@ async def test_should_send_prompts_without_pii(sentry_init, capture_events):
 
 
 @pytest.mark.asyncio
-async def test_set_agent_data_without_agent(sentry_init, capture_events):
+async def test_set_agent_data_without_agent(sentry_init, capture_items):
     """
     Test that _set_agent_data handles None agent gracefully.
     """
@@ -2309,7 +2341,7 @@ async def test_set_agent_data_without_agent(sentry_init, capture_events):
 
 
 @pytest.mark.asyncio
-async def test_set_agent_data_from_scope(sentry_init, capture_events):
+async def test_set_agent_data_from_scope(sentry_init, capture_items):
     """
     Test that _set_agent_data retrieves agent from scope when not passed.
     """
@@ -2341,7 +2373,7 @@ async def test_set_agent_data_from_scope(sentry_init, capture_events):
 
 
 @pytest.mark.asyncio
-async def test_set_agent_data_without_name(sentry_init, capture_events):
+async def test_set_agent_data_without_name(sentry_init, capture_items):
     """
     Test that _set_agent_data handles agent without name attribute.
     """
@@ -2371,7 +2403,7 @@ async def test_set_agent_data_without_name(sentry_init, capture_events):
 
 
 @pytest.mark.asyncio
-async def test_set_available_tools_without_toolset(sentry_init, capture_events):
+async def test_set_available_tools_without_toolset(sentry_init, capture_items):
     """
     Test that _set_available_tools handles agent without toolset.
     """
@@ -2401,7 +2433,7 @@ async def test_set_available_tools_without_toolset(sentry_init, capture_events):
 
 
 @pytest.mark.asyncio
-async def test_set_available_tools_with_schema(sentry_init, capture_events):
+async def test_set_available_tools_with_schema(sentry_init, capture_items):
     """
     Test that _set_available_tools extracts tool schema correctly.
     """
@@ -2437,7 +2469,7 @@ async def test_set_available_tools_with_schema(sentry_init, capture_events):
 
 
 @pytest.mark.asyncio
-async def test_execute_tool_span_creation(sentry_init, capture_events):
+async def test_execute_tool_span_creation(sentry_init, capture_items):
     """
     Test direct creation of execute_tool span.
     """
@@ -2464,7 +2496,7 @@ async def test_execute_tool_span_creation(sentry_init, capture_events):
 
 
 @pytest.mark.asyncio
-async def test_execute_tool_span_with_mcp_type(sentry_init, capture_events):
+async def test_execute_tool_span_with_mcp_type(sentry_init, capture_items):
     """
     Test execute_tool span with MCP tool type.
     """
@@ -2490,7 +2522,7 @@ async def test_execute_tool_span_with_mcp_type(sentry_init, capture_events):
 
 
 @pytest.mark.asyncio
-async def test_execute_tool_span_without_prompts(sentry_init, capture_events):
+async def test_execute_tool_span_without_prompts(sentry_init, capture_items):
     """
     Test that execute_tool span respects _should_send_prompts().
     """
@@ -2517,7 +2549,7 @@ async def test_execute_tool_span_without_prompts(sentry_init, capture_events):
 
 
 @pytest.mark.asyncio
-async def test_execute_tool_span_with_none_args(sentry_init, capture_events):
+async def test_execute_tool_span_with_none_args(sentry_init, capture_items):
     """
     Test execute_tool span with None args.
     """
@@ -2540,7 +2572,7 @@ async def test_execute_tool_span_with_none_args(sentry_init, capture_events):
 
 
 @pytest.mark.asyncio
-async def test_update_execute_tool_span_with_none_span(sentry_init, capture_events):
+async def test_update_execute_tool_span_with_none_span(sentry_init, capture_items):
     """
     Test that update_execute_tool_span handles None span gracefully.
     """
@@ -2561,7 +2593,7 @@ async def test_update_execute_tool_span_with_none_span(sentry_init, capture_even
 
 
 @pytest.mark.asyncio
-async def test_update_execute_tool_span_with_none_result(sentry_init, capture_events):
+async def test_update_execute_tool_span_with_none_result(sentry_init, capture_items):
     """
     Test that update_execute_tool_span handles None result gracefully.
     """
@@ -2588,7 +2620,7 @@ async def test_update_execute_tool_span_with_none_result(sentry_init, capture_ev
 
 
 @pytest.mark.asyncio
-async def test_tool_execution_without_span_context(sentry_init, capture_events):
+async def test_tool_execution_without_span_context(sentry_init, capture_items):
     """
     Test that tool execution patch handles case when no span context exists.
     This tests the code path where current_span is None in _patch_tool_execution.
@@ -2617,7 +2649,7 @@ async def test_tool_execution_without_span_context(sentry_init, capture_events):
 
 
 @pytest.mark.asyncio
-async def test_invoke_agent_span_with_callable_instruction(sentry_init, capture_events):
+async def test_invoke_agent_span_with_callable_instruction(sentry_init, capture_items):
     """
     Test that invoke_agent_span skips callable instructions correctly.
     """
@@ -2650,7 +2682,7 @@ async def test_invoke_agent_span_with_callable_instruction(sentry_init, capture_
 
 
 @pytest.mark.asyncio
-async def test_invoke_agent_span_with_string_instructions(sentry_init, capture_events):
+async def test_invoke_agent_span_with_string_instructions(sentry_init, capture_items):
     """
     Test that invoke_agent_span handles string instructions (not list).
     """
@@ -2680,7 +2712,7 @@ async def test_invoke_agent_span_with_string_instructions(sentry_init, capture_e
 
 
 @pytest.mark.asyncio
-async def test_ai_client_span_with_streaming_flag(sentry_init, capture_events):
+async def test_ai_client_span_with_streaming_flag(sentry_init, capture_items):
     """
     Test that ai_client_span reads streaming flag from scope.
     """
@@ -2706,7 +2738,7 @@ async def test_ai_client_span_with_streaming_flag(sentry_init, capture_events):
 
 
 @pytest.mark.asyncio
-async def test_ai_client_span_gets_agent_from_scope(sentry_init, capture_events):
+async def test_ai_client_span_gets_agent_from_scope(sentry_init, capture_items):
     """
     Test that ai_client_span gets agent from scope when not passed.
     """
@@ -2759,7 +2791,7 @@ def _find_binary_content(messages_data, expected_modality, expected_mime_type):
 
 
 @pytest.mark.asyncio
-async def test_binary_content_encoding_image(sentry_init, capture_events):
+async def test_binary_content_encoding_image(sentry_init, capture_items):
     """Test that BinaryContent with image data is properly encoded in messages."""
     sentry_init(
         integrations=[PydanticAIIntegration()],
@@ -2767,7 +2799,7 @@ async def test_binary_content_encoding_image(sentry_init, capture_events):
         send_default_pii=True,
     )
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     with sentry_sdk.start_transaction(op="test", name="test"):
         span = sentry_sdk.start_span(op="test_span")
@@ -2782,14 +2814,14 @@ async def test_binary_content_encoding_image(sentry_init, capture_events):
         _set_input_messages(span, [mock_msg])
         span.finish()
 
-    (event,) = events
+    (event,) = (item.payload for item in items if item.type == "transaction")
     span_data = event["spans"][0]["data"]
     messages_data = _get_messages_from_span(span_data)
     assert _find_binary_content(messages_data, "image", "image/png")
 
 
 @pytest.mark.asyncio
-async def test_binary_content_encoding_mixed_content(sentry_init, capture_events):
+async def test_binary_content_encoding_mixed_content(sentry_init, capture_items):
     """Test that BinaryContent mixed with text content is properly handled."""
     sentry_init(
         integrations=[PydanticAIIntegration()],
@@ -2797,7 +2829,7 @@ async def test_binary_content_encoding_mixed_content(sentry_init, capture_events
         send_default_pii=True,
     )
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     with sentry_sdk.start_transaction(op="test", name="test"):
         span = sentry_sdk.start_span(op="test_span")
@@ -2814,7 +2846,7 @@ async def test_binary_content_encoding_mixed_content(sentry_init, capture_events
         _set_input_messages(span, [mock_msg])
         span.finish()
 
-    (event,) = events
+    (event,) = (item.payload for item in items if item.type == "transaction")
     span_data = event["spans"][0]["data"]
     messages_data = _get_messages_from_span(span_data)
 
@@ -2830,7 +2862,7 @@ async def test_binary_content_encoding_mixed_content(sentry_init, capture_events
 
 
 @pytest.mark.asyncio
-async def test_binary_content_in_agent_run(sentry_init, capture_events):
+async def test_binary_content_in_agent_run(sentry_init, capture_items):
     """Test that BinaryContent in actual agent run is properly captured in spans."""
     agent = Agent("test", name="test_binary_agent")
 
@@ -2840,28 +2872,30 @@ async def test_binary_content_in_agent_run(sentry_init, capture_events):
         send_default_pii=True,
     )
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
     binary_content = BinaryContent(
         data=b"fake_image_data_for_testing", media_type="image/png"
     )
     await agent.run(["Analyze this image:", binary_content])
 
-    (transaction,) = events
-    chat_spans = [s for s in transaction["spans"] if s["op"] == "gen_ai.chat"]
+    spans = [item.payload for item in items if item.type == "span"]
+    chat_spans = [
+        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+    ]
     assert len(chat_spans) >= 1
 
     chat_span = chat_spans[0]
-    if "gen_ai.request.messages" in chat_span["data"]:
-        messages_str = str(chat_span["data"]["gen_ai.request.messages"])
+    if "gen_ai.request.messages" in chat_span["attributes"]:
+        messages_str = str(chat_span["attributes"]["gen_ai.request.messages"])
         assert any(keyword in messages_str for keyword in ["blob", "image", "base64"])
 
 
 @pytest.mark.asyncio
-async def test_set_usage_data_with_cache_tokens(sentry_init, capture_events):
+async def test_set_usage_data_with_cache_tokens(sentry_init, capture_items):
     """Test that cache_read_tokens and cache_write_tokens are tracked."""
     sentry_init(integrations=[PydanticAIIntegration()], traces_sample_rate=1.0)
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     with sentry_sdk.start_transaction(op="test", name="test"):
         span = sentry_sdk.start_span(op="test_span")
@@ -2874,7 +2908,7 @@ async def test_set_usage_data_with_cache_tokens(sentry_init, capture_events):
         _set_usage_data(span, usage)
         span.finish()
 
-    (event,) = events
+    (event,) = (item.payload for item in items if item.type == "transaction")
     (span_data,) = event["spans"]
     assert span_data["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80
     assert span_data["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20
@@ -2922,7 +2956,7 @@ async def test_set_usage_data_with_cache_tokens(sentry_init, capture_events):
     ],
 )
 def test_image_url_base64_content_in_span(
-    sentry_init, capture_events, url, image_url_kwargs, expected_content
+    sentry_init, capture_items, url, image_url_kwargs, expected_content
 ):
     from sentry_sdk.integrations.pydantic_ai.spans.ai_client import ai_client_span
 
@@ -2932,7 +2966,7 @@ def test_image_url_base64_content_in_span(
         send_default_pii=True,
     )
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     with sentry_sdk.start_transaction(op="test", name="test"):
         image_url = ImageUrl(url=url, **image_url_kwargs)
@@ -2944,10 +2978,12 @@ def test_image_url_base64_content_in_span(
         span = ai_client_span([mock_msg], None, None, None)
         span.finish()
 
-    (event,) = events
-    chat_spans = [s for s in event["spans"] if s["op"] == "gen_ai.chat"]
+    spans = [item.payload for item in items if item.type == "span"]
+    chat_spans = [
+        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+    ]
     assert len(chat_spans) >= 1
-    messages_data = _get_messages_from_span(chat_spans[0]["data"])
+    messages_data = _get_messages_from_span(chat_spans[0]["attributes"])
 
     found_image = False
     for msg in messages_data:
@@ -2992,7 +3028,7 @@ def test_image_url_base64_content_in_span(
     ],
 )
 async def test_invoke_agent_image_url(
-    sentry_init, capture_events, url, image_url_kwargs, expected_content
+    sentry_init, capture_items, url, image_url_kwargs, expected_content
 ):
     sentry_init(
         integrations=[PydanticAIIntegration()],
@@ -3002,17 +3038,18 @@ async def test_invoke_agent_image_url(
 
     agent = Agent("test", name="test_image_url_agent")
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
     image_url = ImageUrl(url=url, **image_url_kwargs)
     await agent.run([image_url, "Describe this image"])
 
-    (transaction,) = events
-
     found_image = False
 
-    chat_spans = [s for s in transaction["spans"] if s["op"] == "gen_ai.chat"]
+    spans = [item.payload for item in items if item.type == "span"]
+    chat_spans = [
+        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+    ]
     for chat_span in chat_spans:
-        messages_data = _get_messages_from_span(chat_span["data"])
+        messages_data = _get_messages_from_span(chat_span["attributes"])
         for msg in messages_data:
             if "content" not in msg:
                 continue
@@ -3025,7 +3062,7 @@ async def test_invoke_agent_image_url(
 
 
 @pytest.mark.asyncio
-async def test_tool_description_in_execute_tool_span(sentry_init, capture_events):
+async def test_tool_description_in_execute_tool_span(sentry_init, capture_items):
     """
     Test that tool description from the tool's docstring is included in execute_tool spans.
     """
@@ -3046,18 +3083,24 @@ def multiply_numbers(a: int, b: int) -> int:
         send_default_pii=True,
     )
 
-    events = capture_events()
+    items = capture_items("transaction", "span")
 
     result = await agent.run("What is 5 times 3?")
     assert result is not None
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
 
-    tool_spans = [s for s in spans if s["op"] == "gen_ai.execute_tool"]
+    tool_spans = [
+        s
+        for s in spans
+        if s["attributes"].get("sentry.op", "") == "gen_ai.execute_tool"
+    ]
     assert len(tool_spans) >= 1
 
     tool_span = tool_spans[0]
-    assert tool_span["data"]["gen_ai.tool.name"] == "multiply_numbers"
-    assert SPANDATA.GEN_AI_TOOL_DESCRIPTION in tool_span["data"]
-    assert "Multiply two numbers" in tool_span["data"][SPANDATA.GEN_AI_TOOL_DESCRIPTION]
+    assert tool_span["attributes"]["gen_ai.tool.name"] == "multiply_numbers"
+    assert SPANDATA.GEN_AI_TOOL_DESCRIPTION in tool_span["attributes"]
+    assert (
+        "Multiply two numbers"
+        in tool_span["attributes"][SPANDATA.GEN_AI_TOOL_DESCRIPTION]
+    )

From 7befc7d3863593c0414d437e59f7591ac4334cf5 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Fri, 17 Apr 2026 13:03:38 +0200
Subject: [PATCH 19/84] .

---
 sentry_sdk/client.py            | 16 ++++++++--
 tests/tracing/test_decorator.py | 53 ++++++++++++++++++++++++---------
 tests/tracing/test_misc.py      |  8 ++---
 3 files changed, 56 insertions(+), 21 deletions(-)

diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py
index c6df2f564b..99e58ec499 100644
--- a/sentry_sdk/client.py
+++ b/sentry_sdk/client.py
@@ -245,9 +245,15 @@ def _serialized_v1_span_to_serialized_v2_span(
 
     res["attributes"] = {}
     for key, value in attributes.items():
-        res["attributes"][key] = _serialized_v1_attribute_to_serialized_v2_attribute(
-            value
-        )
+        converted_value = _serialized_v1_attribute_to_serialized_v2_attribute(value)
+        if converted_value is None:
+            continue
+
+        res["attributes"][key] = converted_value
+
+    # Remove redundant attribute, as status is stored in the status field.
+    if "status" in res["attributes"]:
+        del res["attributes"]["status"]
 
     return res
 
@@ -268,6 +274,10 @@ def _split_gen_ai_spans(
     non_gen_ai_spans = []
     gen_ai_spans = []
     for span in spans:
+        if not isinstance(span, dict):
+            non_gen_ai_spans.append(span)
+            continue
+
         span_op = span.get("op")
         if isinstance(span_op, str) and span_op.startswith("gen_ai."):
             gen_ai_spans.append(span)
diff --git a/tests/tracing/test_decorator.py b/tests/tracing/test_decorator.py
index 15432f5862..e73323138a 100644
--- a/tests/tracing/test_decorator.py
+++ b/tests/tracing/test_decorator.py
@@ -121,9 +121,9 @@ async def _some_function_traced(a, b, c):
     )
 
 
-def test_span_templates_ai_dicts(sentry_init, capture_events):
+def test_span_templates_ai_dicts(sentry_init, capture_items):
     sentry_init(traces_sample_rate=1.0)
-    events = capture_events()
+    items = capture_items("span")
 
     @sentry_sdk.trace(template=SPANTEMPLATE.AI_TOOL)
     def my_tool(arg1, arg2):
@@ -166,40 +166,57 @@ def my_agent():
     with sentry_sdk.start_transaction(name="test-transaction"):
         my_agent()
 
-    (event,) = events
-    (agent_span, tool_span, chat_span) = event["spans"]
+    (agent_span, tool_span, chat_span) = (
+        item.payload for item in items if item.type == "span"
+    )
 
-    assert agent_span["op"] == "gen_ai.invoke_agent"
+    assert agent_span["attributes"]["sentry.op"] == "gen_ai.invoke_agent"
     assert (
-        agent_span["description"]
+        agent_span["name"]
         == "invoke_agent test_decorator.test_span_templates_ai_dicts.<locals>.my_agent"
     )
-    assert agent_span["data"] == {
+    assert agent_span["attributes"] == {
         "gen_ai.agent.name": "test_decorator.test_span_templates_ai_dicts.<locals>.my_agent",
         "gen_ai.operation.name": "invoke_agent",
+        "sentry.environment": "production",
+        "sentry.op": "gen_ai.invoke_agent",
+        "sentry.origin": "manual",
+        "sentry.release": mock.ANY,
+        "sentry.sdk.name": "sentry.python",
+        "sentry.sdk.version": mock.ANY,
+        "sentry.segment.id": mock.ANY,
+        "sentry.segment.name": "test-transaction",
         "thread.id": mock.ANY,
         "thread.name": mock.ANY,
     }
 
-    assert tool_span["op"] == "gen_ai.execute_tool"
+    assert tool_span["attributes"]["sentry.op"] == "gen_ai.execute_tool"
     assert (
-        tool_span["description"]
+        tool_span["name"]
         == "execute_tool test_decorator.test_span_templates_ai_dicts.<locals>.my_tool"
     )
-    assert tool_span["data"] == {
+    assert tool_span["attributes"] == {
         "gen_ai.tool.name": "test_decorator.test_span_templates_ai_dicts.<locals>.my_tool",
         "gen_ai.operation.name": "execute_tool",
         "gen_ai.usage.input_tokens": 10,
         "gen_ai.usage.output_tokens": 20,
         "gen_ai.usage.total_tokens": 30,
+        "sentry.environment": "production",
+        "sentry.op": "gen_ai.execute_tool",
+        "sentry.origin": "manual",
+        "sentry.release": mock.ANY,
+        "sentry.sdk.name": "sentry.python",
+        "sentry.sdk.version": mock.ANY,
+        "sentry.segment.id": mock.ANY,
+        "sentry.segment.name": "test-transaction",
         "thread.id": mock.ANY,
         "thread.name": mock.ANY,
     }
-    assert "gen_ai.tool.description" not in tool_span["data"]
+    assert "gen_ai.tool.description" not in tool_span["attributes"]
 
-    assert chat_span["op"] == "gen_ai.chat"
-    assert chat_span["description"] == "chat my-gpt-4o-mini"
-    assert chat_span["data"] == {
+    assert chat_span["attributes"]["sentry.op"] == "gen_ai.chat"
+    assert chat_span["name"] == "chat my-gpt-4o-mini"
+    assert chat_span["attributes"] == {
         "gen_ai.operation.name": "chat",
         "gen_ai.request.frequency_penalty": 1.0,
         "gen_ai.request.max_tokens": 100,
@@ -213,6 +230,14 @@ def my_agent():
         "gen_ai.usage.input_tokens": 11,
         "gen_ai.usage.output_tokens": 22,
         "gen_ai.usage.total_tokens": 33,
+        "sentry.environment": "production",
+        "sentry.op": "gen_ai.chat",
+        "sentry.origin": "manual",
+        "sentry.release": mock.ANY,
+        "sentry.sdk.name": "sentry.python",
+        "sentry.sdk.version": mock.ANY,
+        "sentry.segment.id": mock.ANY,
+        "sentry.segment.name": "test-transaction",
         "thread.id": mock.ANY,
         "thread.name": mock.ANY,
     }
diff --git a/tests/tracing/test_misc.py b/tests/tracing/test_misc.py
index 8895c98dbc..f69e19791a 100644
--- a/tests/tracing/test_misc.py
+++ b/tests/tracing/test_misc.py
@@ -611,11 +611,11 @@ class TestConversationIdPropagation:
     """Tests for conversation_id propagation to AI spans."""
 
     def test_conversation_id_propagates_to_span_with_gen_ai_operation_name(
-        self, sentry_init, capture_events
+        self, sentry_init, capture_items
     ):
         """Span with gen_ai.operation.name data should get conversation_id."""
         sentry_init(traces_sample_rate=1.0)
-        events = capture_events()
+        items = capture_items("span")
 
         scope = sentry_sdk.get_current_scope()
         scope.set_conversation_id("conv-op-name-test")
@@ -624,8 +624,8 @@ def test_conversation_id_propagates_to_span_with_gen_ai_operation_name(
             with start_span(op="http.client") as span:
                 span.set_data("gen_ai.operation.name", "chat")
 
-        (event,) = events
-        span_data = event["spans"][0]["data"]
+        spans = [item.payload for item in items if item.type == "span"]
+        span_data = spans[0]["data"]
         assert span_data.get("gen_ai.conversation.id") == "conv-op-name-test"
 
     def test_conversation_id_propagates_to_span_with_ai_op(

From fb348bb1037ce1350c714ad3da8ec7b77f79c350 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Fri, 17 Apr 2026 13:12:59 +0200
Subject: [PATCH 20/84] openai-agents tests

---
 .../openai_agents/test_openai_agents.py       | 855 ++++++++++--------
 1 file changed, 470 insertions(+), 385 deletions(-)

diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py
index 7310e86df5..1c4925915d 100644
--- a/tests/integrations/openai_agents/test_openai_agents.py
+++ b/tests/integrations/openai_agents/test_openai_agents.py
@@ -160,7 +160,7 @@ def test_agent_custom_model():
 @pytest.mark.asyncio
 async def test_agent_invocation_span_no_pii(
     sentry_init,
-    capture_events,
+    capture_items,
     test_agent,
     nonstreaming_responses_model_response,
     get_model_response,
@@ -184,7 +184,7 @@ async def test_agent_invocation_span_no_pii(
             send_default_pii=False,
         )
 
-        events = capture_events()
+        items = capture_items("span", "transaction")
 
         result = await agents.Runner.run(
             agent, "Test input", run_config=test_run_config
@@ -193,38 +193,44 @@ async def test_agent_invocation_span_no_pii(
         assert result is not None
         assert result.final_output == "Hello, how can I help you?"
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
     invoke_agent_span = next(
-        span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT
+        span
+        for span in spans
+        if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
+    )
+    ai_client_span = next(
+        span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
     )
-    ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT)
 
+    transactions = [item.payload for item in items if item.type == "transaction"]
+    assert len(transactions) == 1
+    transaction = transactions[0]
     assert transaction["transaction"] == "test_agent workflow"
     assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
 
-    assert invoke_agent_span["description"] == "invoke_agent test_agent"
+    assert invoke_agent_span["name"] == "invoke_agent test_agent"
 
-    assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in invoke_agent_span["data"]
-    assert "gen_ai.request.messages" not in invoke_agent_span["data"]
-    assert "gen_ai.response.text" not in invoke_agent_span["data"]
+    assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in invoke_agent_span["attributes"]
+    assert "gen_ai.request.messages" not in invoke_agent_span["attributes"]
+    assert "gen_ai.response.text" not in invoke_agent_span["attributes"]
 
-    assert invoke_agent_span["data"]["gen_ai.operation.name"] == "invoke_agent"
-    assert invoke_agent_span["data"]["gen_ai.system"] == "openai"
-    assert invoke_agent_span["data"]["gen_ai.agent.name"] == "test_agent"
-    assert invoke_agent_span["data"]["gen_ai.request.max_tokens"] == 100
-    assert invoke_agent_span["data"]["gen_ai.request.model"] == "gpt-4"
-    assert invoke_agent_span["data"]["gen_ai.request.temperature"] == 0.7
-    assert invoke_agent_span["data"]["gen_ai.request.top_p"] == 1.0
+    assert invoke_agent_span["attributes"]["gen_ai.operation.name"] == "invoke_agent"
+    assert invoke_agent_span["attributes"]["gen_ai.system"] == "openai"
+    assert invoke_agent_span["attributes"]["gen_ai.agent.name"] == "test_agent"
+    assert invoke_agent_span["attributes"]["gen_ai.request.max_tokens"] == 100
+    assert invoke_agent_span["attributes"]["gen_ai.request.model"] == "gpt-4"
+    assert invoke_agent_span["attributes"]["gen_ai.request.temperature"] == 0.7
+    assert invoke_agent_span["attributes"]["gen_ai.request.top_p"] == 1.0
 
-    assert ai_client_span["description"] == "chat gpt-4"
-    assert ai_client_span["data"]["gen_ai.operation.name"] == "chat"
-    assert ai_client_span["data"]["gen_ai.system"] == "openai"
-    assert ai_client_span["data"]["gen_ai.agent.name"] == "test_agent"
-    assert ai_client_span["data"]["gen_ai.request.max_tokens"] == 100
-    assert ai_client_span["data"]["gen_ai.request.model"] == "gpt-4"
-    assert ai_client_span["data"]["gen_ai.request.temperature"] == 0.7
-    assert ai_client_span["data"]["gen_ai.request.top_p"] == 1.0
+    assert ai_client_span["name"] == "chat gpt-4"
+    assert ai_client_span["attributes"]["gen_ai.operation.name"] == "chat"
+    assert ai_client_span["attributes"]["gen_ai.system"] == "openai"
+    assert ai_client_span["attributes"]["gen_ai.agent.name"] == "test_agent"
+    assert ai_client_span["attributes"]["gen_ai.request.max_tokens"] == 100
+    assert ai_client_span["attributes"]["gen_ai.request.model"] == "gpt-4"
+    assert ai_client_span["attributes"]["gen_ai.request.temperature"] == 0.7
+    assert ai_client_span["attributes"]["gen_ai.request.top_p"] == 1.0
 
 
 @pytest.mark.asyncio
@@ -305,7 +311,7 @@ async def test_agent_invocation_span_no_pii(
 )
 async def test_agent_invocation_span(
     sentry_init,
-    capture_events,
+    capture_items,
     test_agent_with_instructions,
     nonstreaming_responses_model_response,
     instructions,
@@ -335,7 +341,7 @@ async def test_agent_invocation_span(
             send_default_pii=True,
         )
 
-        events = capture_events()
+        items = capture_items("span", "transaction")
 
         result = await agents.Runner.run(
             agent,
@@ -346,28 +352,34 @@ async def test_agent_invocation_span(
         assert result is not None
         assert result.final_output == "Hello, how can I help you?"
 
-    (transaction,) = events
-    spans = transaction["spans"]
-    invoke_agent_span, ai_client_span = spans
-
+    transactions = [item.payload for item in items if item.type == "transaction"]
+    assert len(transactions) == 1
+    transaction = transactions[0]
     assert transaction["transaction"] == "test_agent workflow"
     assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
 
-    assert invoke_agent_span["description"] == "invoke_agent test_agent"
+    spans = [item.payload for item in items if item.type == "span"]
+    invoke_agent_span, ai_client_span = spans
+
+    assert invoke_agent_span["name"] == "invoke_agent test_agent"
 
     # Only first case checks "gen_ai.request.messages" until further input handling work.
     param_id = request.node.callspec.id
     if "string" in param_id and instructions is None:  # type: ignore
-        assert "gen_ai.system_instructions" not in ai_client_span["data"]
+        assert "gen_ai.system_instructions" not in ai_client_span["attributes"]
 
-        assert invoke_agent_span["data"]["gen_ai.request.messages"] == safe_serialize(
+        assert invoke_agent_span["attributes"][
+            "gen_ai.request.messages"
+        ] == safe_serialize(
             [
                 {"content": [{"text": "Test input", "type": "text"}], "role": "user"},
             ]
         )
 
     elif "string" in param_id:
-        assert ai_client_span["data"]["gen_ai.system_instructions"] == safe_serialize(
+        assert ai_client_span["attributes"][
+            "gen_ai.system_instructions"
+        ] == safe_serialize(
             [
                 {
                     "type": "text",
@@ -376,13 +388,17 @@ async def test_agent_invocation_span(
             ]
         )
     elif "blocks_no_type" in param_id and instructions is None:  # type: ignore
-        assert ai_client_span["data"]["gen_ai.system_instructions"] == safe_serialize(
+        assert ai_client_span["attributes"][
+            "gen_ai.system_instructions"
+        ] == safe_serialize(
             [
                 {"type": "text", "content": "You are a helpful assistant."},
             ]
         )
     elif "blocks_no_type" in param_id:
-        assert ai_client_span["data"]["gen_ai.system_instructions"] == safe_serialize(
+        assert ai_client_span["attributes"][
+            "gen_ai.system_instructions"
+        ] == safe_serialize(
             [
                 {
                     "type": "text",
@@ -392,13 +408,17 @@ async def test_agent_invocation_span(
             ]
         )
     elif "blocks" in param_id and instructions is None:  # type: ignore
-        assert ai_client_span["data"]["gen_ai.system_instructions"] == safe_serialize(
+        assert ai_client_span["attributes"][
+            "gen_ai.system_instructions"
+        ] == safe_serialize(
             [
                 {"type": "text", "content": "You are a helpful assistant."},
             ]
         )
     elif "blocks" in param_id:
-        assert ai_client_span["data"]["gen_ai.system_instructions"] == safe_serialize(
+        assert ai_client_span["attributes"][
+            "gen_ai.system_instructions"
+        ] == safe_serialize(
             [
                 {
                     "type": "text",
@@ -408,14 +428,18 @@ async def test_agent_invocation_span(
             ]
         )
     elif "parts_no_type" in param_id and instructions is None:
-        assert ai_client_span["data"]["gen_ai.system_instructions"] == safe_serialize(
+        assert ai_client_span["attributes"][
+            "gen_ai.system_instructions"
+        ] == safe_serialize(
             [
                 {"type": "text", "content": "You are a helpful assistant."},
                 {"type": "text", "content": "Be concise and clear."},
             ]
         )
     elif "parts_no_type" in param_id:
-        assert ai_client_span["data"]["gen_ai.system_instructions"] == safe_serialize(
+        assert ai_client_span["attributes"][
+            "gen_ai.system_instructions"
+        ] == safe_serialize(
             [
                 {
                     "type": "text",
@@ -426,14 +450,18 @@ async def test_agent_invocation_span(
             ]
         )
     elif instructions is None:  # type: ignore
-        assert ai_client_span["data"]["gen_ai.system_instructions"] == safe_serialize(
+        assert ai_client_span["attributes"][
+            "gen_ai.system_instructions"
+        ] == safe_serialize(
             [
                 {"type": "text", "content": "You are a helpful assistant."},
                 {"type": "text", "content": "Be concise and clear."},
             ]
         )
     else:
-        assert ai_client_span["data"]["gen_ai.system_instructions"] == safe_serialize(
+        assert ai_client_span["attributes"][
+            "gen_ai.system_instructions"
+        ] == safe_serialize(
             [
                 {
                     "type": "text",
@@ -445,32 +473,32 @@ async def test_agent_invocation_span(
         )
 
     assert (
-        invoke_agent_span["data"]["gen_ai.response.text"]
+        invoke_agent_span["attributes"]["gen_ai.response.text"]
         == "Hello, how can I help you?"
     )
 
-    assert invoke_agent_span["data"]["gen_ai.operation.name"] == "invoke_agent"
-    assert invoke_agent_span["data"]["gen_ai.system"] == "openai"
-    assert invoke_agent_span["data"]["gen_ai.agent.name"] == "test_agent"
-    assert invoke_agent_span["data"]["gen_ai.request.max_tokens"] == 100
-    assert invoke_agent_span["data"]["gen_ai.request.model"] == "gpt-4"
-    assert invoke_agent_span["data"]["gen_ai.request.temperature"] == 0.7
-    assert invoke_agent_span["data"]["gen_ai.request.top_p"] == 1.0
+    assert invoke_agent_span["attributes"]["gen_ai.operation.name"] == "invoke_agent"
+    assert invoke_agent_span["attributes"]["gen_ai.system"] == "openai"
+    assert invoke_agent_span["attributes"]["gen_ai.agent.name"] == "test_agent"
+    assert invoke_agent_span["attributes"]["gen_ai.request.max_tokens"] == 100
+    assert invoke_agent_span["attributes"]["gen_ai.request.model"] == "gpt-4"
+    assert invoke_agent_span["attributes"]["gen_ai.request.temperature"] == 0.7
+    assert invoke_agent_span["attributes"]["gen_ai.request.top_p"] == 1.0
 
-    assert ai_client_span["description"] == "chat gpt-4"
-    assert ai_client_span["data"]["gen_ai.operation.name"] == "chat"
-    assert ai_client_span["data"]["gen_ai.system"] == "openai"
-    assert ai_client_span["data"]["gen_ai.agent.name"] == "test_agent"
-    assert ai_client_span["data"]["gen_ai.request.max_tokens"] == 100
-    assert ai_client_span["data"]["gen_ai.request.model"] == "gpt-4"
-    assert ai_client_span["data"]["gen_ai.request.temperature"] == 0.7
-    assert ai_client_span["data"]["gen_ai.request.top_p"] == 1.0
+    assert ai_client_span["name"] == "chat gpt-4"
+    assert ai_client_span["attributes"]["gen_ai.operation.name"] == "chat"
+    assert ai_client_span["attributes"]["gen_ai.system"] == "openai"
+    assert ai_client_span["attributes"]["gen_ai.agent.name"] == "test_agent"
+    assert ai_client_span["attributes"]["gen_ai.request.max_tokens"] == 100
+    assert ai_client_span["attributes"]["gen_ai.request.model"] == "gpt-4"
+    assert ai_client_span["attributes"]["gen_ai.request.temperature"] == 0.7
+    assert ai_client_span["attributes"]["gen_ai.request.top_p"] == 1.0
 
 
 @pytest.mark.asyncio
 async def test_client_span_custom_model(
     sentry_init,
-    capture_events,
+    capture_items,
     test_agent_custom_model,
     nonstreaming_responses_model_response,
     get_model_response,
@@ -497,7 +525,7 @@ async def test_client_span_custom_model(
             traces_sample_rate=1.0,
         )
 
-        events = capture_events()
+        items = capture_items("transaction", "spans")
 
         result = await agents.Runner.run(
             agent, "Test input", run_config=test_run_config
@@ -506,17 +534,18 @@ async def test_client_span_custom_model(
         assert result is not None
         assert result.final_output == "Hello, how can I help you?"
 
-    (transaction,) = events
-    spans = transaction["spans"]
-    ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT)
+    spans = [item.payload for item in items if item.type == "span"]
+    ai_client_span = next(
+        span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    )
 
-    assert ai_client_span["description"] == "chat my-custom-model"
-    assert ai_client_span["data"]["gen_ai.request.model"] == "my-custom-model"
+    assert ai_client_span["name"] == "chat my-custom-model"
+    assert ai_client_span["attributes"]["gen_ai.request.model"] == "my-custom-model"
 
 
 def test_agent_invocation_span_sync_no_pii(
     sentry_init,
-    capture_events,
+    capture_items,
     test_agent,
     nonstreaming_responses_model_response,
     get_model_response,
@@ -543,42 +572,48 @@ def test_agent_invocation_span_sync_no_pii(
             send_default_pii=False,
         )
 
-        events = capture_events()
+        items = capture_items("span", "transaction")
 
         result = agents.Runner.run_sync(agent, "Test input", run_config=test_run_config)
 
         assert result is not None
         assert result.final_output == "Hello, how can I help you?"
 
-    (transaction,) = events
-    spans = transaction["spans"]
-    invoke_agent_span = next(
-        span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT
-    )
-    ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT)
-
+    transactions = [item.payload for item in items if item.type == "transaction"]
+    assert len(transactions) == 1
+    transaction = transactions[0]
     assert transaction["transaction"] == "test_agent workflow"
     assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
 
-    assert invoke_agent_span["description"] == "invoke_agent test_agent"
-    assert invoke_agent_span["data"]["gen_ai.operation.name"] == "invoke_agent"
-    assert invoke_agent_span["data"]["gen_ai.system"] == "openai"
-    assert invoke_agent_span["data"]["gen_ai.agent.name"] == "test_agent"
-    assert invoke_agent_span["data"]["gen_ai.request.max_tokens"] == 100
-    assert invoke_agent_span["data"]["gen_ai.request.model"] == "gpt-4"
-    assert invoke_agent_span["data"]["gen_ai.request.temperature"] == 0.7
-    assert invoke_agent_span["data"]["gen_ai.request.top_p"] == 1.0
+    spans = [item.payload for item in items if item.type == "span"]
+    invoke_agent_span = next(
+        span
+        for span in spans
+        if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
+    )
+    ai_client_span = next(
+        span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    )
+
+    assert invoke_agent_span["name"] == "invoke_agent test_agent"
+    assert invoke_agent_span["attributes"]["gen_ai.operation.name"] == "invoke_agent"
+    assert invoke_agent_span["attributes"]["gen_ai.system"] == "openai"
+    assert invoke_agent_span["attributes"]["gen_ai.agent.name"] == "test_agent"
+    assert invoke_agent_span["attributes"]["gen_ai.request.max_tokens"] == 100
+    assert invoke_agent_span["attributes"]["gen_ai.request.model"] == "gpt-4"
+    assert invoke_agent_span["attributes"]["gen_ai.request.temperature"] == 0.7
+    assert invoke_agent_span["attributes"]["gen_ai.request.top_p"] == 1.0
 
-    assert ai_client_span["description"] == "chat gpt-4"
-    assert ai_client_span["data"]["gen_ai.operation.name"] == "chat"
-    assert ai_client_span["data"]["gen_ai.system"] == "openai"
-    assert ai_client_span["data"]["gen_ai.agent.name"] == "test_agent"
-    assert ai_client_span["data"]["gen_ai.request.max_tokens"] == 100
-    assert ai_client_span["data"]["gen_ai.request.model"] == "gpt-4"
-    assert ai_client_span["data"]["gen_ai.request.temperature"] == 0.7
-    assert ai_client_span["data"]["gen_ai.request.top_p"] == 1.0
+    assert ai_client_span["name"] == "chat gpt-4"
+    assert ai_client_span["attributes"]["gen_ai.operation.name"] == "chat"
+    assert ai_client_span["attributes"]["gen_ai.system"] == "openai"
+    assert ai_client_span["attributes"]["gen_ai.agent.name"] == "test_agent"
+    assert ai_client_span["attributes"]["gen_ai.request.max_tokens"] == 100
+    assert ai_client_span["attributes"]["gen_ai.request.model"] == "gpt-4"
+    assert ai_client_span["attributes"]["gen_ai.request.temperature"] == 0.7
+    assert ai_client_span["attributes"]["gen_ai.request.top_p"] == 1.0
 
-    assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in invoke_agent_span["data"]
+    assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in invoke_agent_span["attributes"]
 
 
 @pytest.mark.parametrize(
@@ -658,7 +693,7 @@ def test_agent_invocation_span_sync_no_pii(
 )
 def test_agent_invocation_span_sync(
     sentry_init,
-    capture_events,
+    capture_items,
     test_agent_with_instructions,
     nonstreaming_responses_model_response,
     instructions,
@@ -688,7 +723,7 @@ def test_agent_invocation_span_sync(
             send_default_pii=True,
         )
 
-        events = capture_events()
+        items = capture_items("span", "transaction")
 
         result = agents.Runner.run_sync(
             agent,
@@ -699,36 +734,40 @@ def test_agent_invocation_span_sync(
         assert result is not None
         assert result.final_output == "Hello, how can I help you?"
 
-    (transaction,) = events
-    spans = transaction["spans"]
-    invoke_agent_span, ai_client_span = spans
-
+    transactions = [item.payload for item in items if item.type == "transaction"]
+    assert len(transactions) == 1
+    transaction = transactions[0]
     assert transaction["transaction"] == "test_agent workflow"
     assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
 
-    assert invoke_agent_span["description"] == "invoke_agent test_agent"
-    assert invoke_agent_span["data"]["gen_ai.operation.name"] == "invoke_agent"
-    assert invoke_agent_span["data"]["gen_ai.system"] == "openai"
-    assert invoke_agent_span["data"]["gen_ai.agent.name"] == "test_agent"
-    assert invoke_agent_span["data"]["gen_ai.request.max_tokens"] == 100
-    assert invoke_agent_span["data"]["gen_ai.request.model"] == "gpt-4"
-    assert invoke_agent_span["data"]["gen_ai.request.temperature"] == 0.7
-    assert invoke_agent_span["data"]["gen_ai.request.top_p"] == 1.0
-
-    assert ai_client_span["description"] == "chat gpt-4"
-    assert ai_client_span["data"]["gen_ai.operation.name"] == "chat"
-    assert ai_client_span["data"]["gen_ai.system"] == "openai"
-    assert ai_client_span["data"]["gen_ai.agent.name"] == "test_agent"
-    assert ai_client_span["data"]["gen_ai.request.max_tokens"] == 100
-    assert ai_client_span["data"]["gen_ai.request.model"] == "gpt-4"
-    assert ai_client_span["data"]["gen_ai.request.temperature"] == 0.7
-    assert ai_client_span["data"]["gen_ai.request.top_p"] == 1.0
+    spans = [item.payload for item in items if item.type == "span"]
+    invoke_agent_span, ai_client_span = spans
+
+    assert invoke_agent_span["name"] == "invoke_agent test_agent"
+    assert invoke_agent_span["attributes"]["gen_ai.operation.name"] == "invoke_agent"
+    assert invoke_agent_span["attributes"]["gen_ai.system"] == "openai"
+    assert invoke_agent_span["attributes"]["gen_ai.agent.name"] == "test_agent"
+    assert invoke_agent_span["attributes"]["gen_ai.request.max_tokens"] == 100
+    assert invoke_agent_span["attributes"]["gen_ai.request.model"] == "gpt-4"
+    assert invoke_agent_span["attributes"]["gen_ai.request.temperature"] == 0.7
+    assert invoke_agent_span["attributes"]["gen_ai.request.top_p"] == 1.0
+
+    assert ai_client_span["name"] == "chat gpt-4"
+    assert ai_client_span["attributes"]["gen_ai.operation.name"] == "chat"
+    assert ai_client_span["attributes"]["gen_ai.system"] == "openai"
+    assert ai_client_span["attributes"]["gen_ai.agent.name"] == "test_agent"
+    assert ai_client_span["attributes"]["gen_ai.request.max_tokens"] == 100
+    assert ai_client_span["attributes"]["gen_ai.request.model"] == "gpt-4"
+    assert ai_client_span["attributes"]["gen_ai.request.temperature"] == 0.7
+    assert ai_client_span["attributes"]["gen_ai.request.top_p"] == 1.0
 
     param_id = request.node.callspec.id
     if "string" in param_id and instructions is None:  # type: ignore
-        assert "gen_ai.system_instructions" not in ai_client_span["data"]
+        assert "gen_ai.system_instructions" not in ai_client_span["attributes"]
     elif "string" in param_id:
-        assert ai_client_span["data"]["gen_ai.system_instructions"] == safe_serialize(
+        assert ai_client_span["attributes"][
+            "gen_ai.system_instructions"
+        ] == safe_serialize(
             [
                 {
                     "type": "text",
@@ -737,13 +776,17 @@ def test_agent_invocation_span_sync(
             ]
         )
     elif "blocks_no_type" in param_id and instructions is None:  # type: ignore
-        assert ai_client_span["data"]["gen_ai.system_instructions"] == safe_serialize(
+        assert ai_client_span["attributes"][
+            "gen_ai.system_instructions"
+        ] == safe_serialize(
             [
                 {"type": "text", "content": "You are a helpful assistant."},
             ]
         )
     elif "blocks_no_type" in param_id:
-        assert ai_client_span["data"]["gen_ai.system_instructions"] == safe_serialize(
+        assert ai_client_span["attributes"][
+            "gen_ai.system_instructions"
+        ] == safe_serialize(
             [
                 {
                     "type": "text",
@@ -753,13 +796,17 @@ def test_agent_invocation_span_sync(
             ]
         )
     elif "blocks" in param_id and instructions is None:  # type: ignore
-        assert ai_client_span["data"]["gen_ai.system_instructions"] == safe_serialize(
+        assert ai_client_span["attributes"][
+            "gen_ai.system_instructions"
+        ] == safe_serialize(
             [
                 {"type": "text", "content": "You are a helpful assistant."},
             ]
         )
     elif "blocks" in param_id:
-        assert ai_client_span["data"]["gen_ai.system_instructions"] == safe_serialize(
+        assert ai_client_span["attributes"][
+            "gen_ai.system_instructions"
+        ] == safe_serialize(
             [
                 {
                     "type": "text",
@@ -769,14 +816,18 @@ def test_agent_invocation_span_sync(
             ]
         )
     elif "parts_no_type" in param_id and instructions is None:
-        assert ai_client_span["data"]["gen_ai.system_instructions"] == safe_serialize(
+        assert ai_client_span["attributes"][
+            "gen_ai.system_instructions"
+        ] == safe_serialize(
             [
                 {"type": "text", "content": "You are a helpful assistant."},
                 {"type": "text", "content": "Be concise and clear."},
             ]
         )
     elif "parts_no_type" in param_id:
-        assert ai_client_span["data"]["gen_ai.system_instructions"] == safe_serialize(
+        assert ai_client_span["attributes"][
+            "gen_ai.system_instructions"
+        ] == safe_serialize(
             [
                 {
                     "type": "text",
@@ -787,14 +838,18 @@ def test_agent_invocation_span_sync(
             ]
         )
     elif instructions is None:  # type: ignore
-        assert ai_client_span["data"]["gen_ai.system_instructions"] == safe_serialize(
+        assert ai_client_span["attributes"][
+            "gen_ai.system_instructions"
+        ] == safe_serialize(
             [
                 {"type": "text", "content": "You are a helpful assistant."},
                 {"type": "text", "content": "Be concise and clear."},
             ]
         )
     else:
-        assert ai_client_span["data"]["gen_ai.system_instructions"] == safe_serialize(
+        assert ai_client_span["attributes"][
+            "gen_ai.system_instructions"
+        ] == safe_serialize(
             [
                 {
                     "type": "text",
@@ -807,7 +862,7 @@ def test_agent_invocation_span_sync(
 
 
 @pytest.mark.asyncio
-async def test_handoff_span(sentry_init, capture_events, get_model_response):
+async def test_handoff_span(sentry_init, capture_items, get_model_response):
     """
     Test that handoff spans are created when agents hand off to other agents.
     """
@@ -910,7 +965,7 @@ async def test_handoff_span(sentry_init, capture_events, get_model_response):
             traces_sample_rate=1.0,
         )
 
-        events = capture_events()
+        items = capture_items("transaction", "span")
 
         result = await agents.Runner.run(
             primary_agent,
@@ -920,21 +975,22 @@ async def test_handoff_span(sentry_init, capture_events, get_model_response):
 
         assert result is not None
 
-    (transaction,) = events
-    spans = transaction["spans"]
-    handoff_span = next(span for span in spans if span.get("op") == OP.GEN_AI_HANDOFF)
+    spans = [item.payload for item in items if item.type == "span"]
+    handoff_span = next(
+        span
+        for span in spans
+        if span["attributes"].get("sentry.op") == OP.GEN_AI_HANDOFF
+    )
 
     # Verify handoff span was created
     assert handoff_span is not None
-    assert (
-        handoff_span["description"] == "handoff from primary_agent to secondary_agent"
-    )
-    assert handoff_span["data"]["gen_ai.operation.name"] == "handoff"
+    assert handoff_span["name"] == "handoff from primary_agent to secondary_agent"
+    assert handoff_span["attributes"]["gen_ai.operation.name"] == "handoff"
 
 
 @pytest.mark.asyncio
 async def test_max_turns_before_handoff_span(
-    sentry_init, capture_events, get_model_response
+    sentry_init, capture_items, get_model_response
 ):
     """
     Example raising agents.exceptions.AgentsException after the agent invocation span is complete.
@@ -1038,7 +1094,7 @@ async def test_max_turns_before_handoff_span(
             traces_sample_rate=1.0,
         )
 
-        events = capture_events()
+        items = capture_items("transaction", "span")
 
         with pytest.raises(MaxTurnsExceeded):
             await agents.Runner.run(
@@ -1048,22 +1104,23 @@ async def test_max_turns_before_handoff_span(
                 max_turns=1,
             )
 
-    (error, transaction) = events
-    spans = transaction["spans"]
-    handoff_span = next(span for span in spans if span.get("op") == OP.GEN_AI_HANDOFF)
+    spans = [item.payload for item in items if item.type == "span"]
+    handoff_span = next(
+        span
+        for span in spans
+        if span["attributes"].get("sentry.op") == OP.GEN_AI_HANDOFF
+    )
 
     # Verify handoff span was created
     assert handoff_span is not None
-    assert (
-        handoff_span["description"] == "handoff from primary_agent to secondary_agent"
-    )
-    assert handoff_span["data"]["gen_ai.operation.name"] == "handoff"
+    assert handoff_span["name"] == "handoff from primary_agent to secondary_agent"
+    assert handoff_span["attributes"]["gen_ai.operation.name"] == "handoff"
 
 
 @pytest.mark.asyncio
 async def test_tool_execution_span(
     sentry_init,
-    capture_events,
+    capture_items,
     test_agent,
     get_model_response,
     responses_tool_call_model_responses,
@@ -1135,7 +1192,7 @@ def simple_test_tool(message: str) -> str:
             send_default_pii=True,
         )
 
-        events = capture_events()
+        items = capture_items("transaction", "span")
 
         await agents.Runner.run(
             agent_with_tool,
@@ -1143,13 +1200,26 @@ def simple_test_tool(message: str) -> str:
             run_config=test_run_config,
         )
 
-    (transaction,) = events
-    spans = transaction["spans"]
-    agent_span = next(span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT)
+    transactions = [item.payload for item in items if item.type == "transaction"]
+    assert len(transactions) == 1
+    transaction = transactions[0]
+    assert transaction["transaction"] == "test_agent workflow"
+    assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
+
+    spans = [item.payload for item in items if item.type == "span"]
+    agent_span = next(
+        span
+        for span in spans
+        if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
+    )
     ai_client_span1, ai_client_span2 = (
-        span for span in spans if span["op"] == OP.GEN_AI_CHAT
+        span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    )
+    tool_span = next(
+        span
+        for span in spans
+        if span["attributes"]["sentry.op"] == OP.GEN_AI_EXECUTE_TOOL
     )
-    tool_span = next(span for span in spans if span["op"] == OP.GEN_AI_EXECUTE_TOOL)
 
     available_tool = {
         "name": "simple_test_tool",
@@ -1189,39 +1259,36 @@ def simple_test_tool(message: str) -> str:
             }
         )
 
-    assert transaction["transaction"] == "test_agent workflow"
-    assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
-
-    assert agent_span["description"] == "invoke_agent test_agent"
-    assert agent_span["origin"] == "auto.ai.openai_agents"
-    assert agent_span["data"]["gen_ai.agent.name"] == "test_agent"
-    assert agent_span["data"]["gen_ai.operation.name"] == "invoke_agent"
+    assert agent_span["name"] == "invoke_agent test_agent"
+    assert agent_span["attributes"]["sentry.origin"] == "auto.ai.openai_agents"
+    assert agent_span["attributes"]["gen_ai.agent.name"] == "test_agent"
+    assert agent_span["attributes"]["gen_ai.operation.name"] == "invoke_agent"
 
     agent_span_available_tool = json.loads(
-        agent_span["data"]["gen_ai.request.available_tools"]
+        agent_span["attributes"]["gen_ai.request.available_tools"]
     )[0]
     assert all(agent_span_available_tool[k] == v for k, v in available_tool.items())
 
-    assert agent_span["data"]["gen_ai.request.max_tokens"] == 100
-    assert agent_span["data"]["gen_ai.request.model"] == "gpt-4"
-    assert agent_span["data"]["gen_ai.request.temperature"] == 0.7
-    assert agent_span["data"]["gen_ai.request.top_p"] == 1.0
-    assert agent_span["data"]["gen_ai.system"] == "openai"
+    assert agent_span["attributes"]["gen_ai.request.max_tokens"] == 100
+    assert agent_span["attributes"]["gen_ai.request.model"] == "gpt-4"
+    assert agent_span["attributes"]["gen_ai.request.temperature"] == 0.7
+    assert agent_span["attributes"]["gen_ai.request.top_p"] == 1.0
+    assert agent_span["attributes"]["gen_ai.system"] == "openai"
 
-    assert ai_client_span1["description"] == "chat gpt-4"
-    assert ai_client_span1["data"]["gen_ai.operation.name"] == "chat"
-    assert ai_client_span1["data"]["gen_ai.system"] == "openai"
-    assert ai_client_span1["data"]["gen_ai.agent.name"] == "test_agent"
+    assert ai_client_span1["name"] == "chat gpt-4"
+    assert ai_client_span1["attributes"]["gen_ai.operation.name"] == "chat"
+    assert ai_client_span1["attributes"]["gen_ai.system"] == "openai"
+    assert ai_client_span1["attributes"]["gen_ai.agent.name"] == "test_agent"
 
     ai_client_span1_available_tool = json.loads(
-        ai_client_span1["data"]["gen_ai.request.available_tools"]
+        ai_client_span1["attributes"]["gen_ai.request.available_tools"]
     )[0]
     assert all(
         ai_client_span1_available_tool[k] == v for k, v in available_tool.items()
     )
 
-    assert ai_client_span1["data"]["gen_ai.request.max_tokens"] == 100
-    assert ai_client_span1["data"]["gen_ai.request.messages"] == safe_serialize(
+    assert ai_client_span1["attributes"]["gen_ai.request.max_tokens"] == 100
+    assert ai_client_span1["attributes"]["gen_ai.request.messages"] == safe_serialize(
         [
             {
                 "role": "user",
@@ -1231,14 +1298,14 @@ def simple_test_tool(message: str) -> str:
             },
         ]
     )
-    assert ai_client_span1["data"]["gen_ai.request.model"] == "gpt-4"
-    assert ai_client_span1["data"]["gen_ai.request.temperature"] == 0.7
-    assert ai_client_span1["data"]["gen_ai.request.top_p"] == 1.0
-    assert ai_client_span1["data"]["gen_ai.usage.input_tokens"] == 10
-    assert ai_client_span1["data"]["gen_ai.usage.input_tokens.cached"] == 0
-    assert ai_client_span1["data"]["gen_ai.usage.output_tokens"] == 5
-    assert ai_client_span1["data"]["gen_ai.usage.output_tokens.reasoning"] == 0
-    assert ai_client_span1["data"]["gen_ai.usage.total_tokens"] == 15
+    assert ai_client_span1["attributes"]["gen_ai.request.model"] == "gpt-4"
+    assert ai_client_span1["attributes"]["gen_ai.request.temperature"] == 0.7
+    assert ai_client_span1["attributes"]["gen_ai.request.top_p"] == 1.0
+    assert ai_client_span1["attributes"]["gen_ai.usage.input_tokens"] == 10
+    assert ai_client_span1["attributes"]["gen_ai.usage.input_tokens.cached"] == 0
+    assert ai_client_span1["attributes"]["gen_ai.usage.output_tokens"] == 5
+    assert ai_client_span1["attributes"]["gen_ai.usage.output_tokens.reasoning"] == 0
+    assert ai_client_span1["attributes"]["gen_ai.usage.total_tokens"] == 15
 
     tool_call = {
         "arguments": '{"message": "hello"}',
@@ -1252,41 +1319,41 @@ def simple_test_tool(message: str) -> str:
     if OPENAI_VERSION >= (2, 25, 0):
         tool_call["namespace"] = None
 
-    assert json.loads(ai_client_span1["data"]["gen_ai.response.tool_calls"]) == [
+    assert json.loads(ai_client_span1["attributes"]["gen_ai.response.tool_calls"]) == [
         tool_call
     ]
 
-    assert tool_span["description"] == "execute_tool simple_test_tool"
-    assert tool_span["data"]["gen_ai.agent.name"] == "test_agent"
-    assert tool_span["data"]["gen_ai.operation.name"] == "execute_tool"
+    assert tool_span["name"] == "execute_tool simple_test_tool"
+    assert tool_span["attributes"]["gen_ai.agent.name"] == "test_agent"
+    assert tool_span["attributes"]["gen_ai.operation.name"] == "execute_tool"
 
     tool_span_available_tool = json.loads(
-        tool_span["data"]["gen_ai.request.available_tools"]
+        tool_span["attributes"]["gen_ai.request.available_tools"]
     )[0]
     assert all(tool_span_available_tool[k] == v for k, v in available_tool.items())
 
-    assert tool_span["data"]["gen_ai.request.max_tokens"] == 100
-    assert tool_span["data"]["gen_ai.request.model"] == "gpt-4"
-    assert tool_span["data"]["gen_ai.request.temperature"] == 0.7
-    assert tool_span["data"]["gen_ai.request.top_p"] == 1.0
-    assert tool_span["data"]["gen_ai.system"] == "openai"
-    assert tool_span["data"]["gen_ai.tool.description"] == "A simple tool"
-    assert tool_span["data"]["gen_ai.tool.input"] == '{"message": "hello"}'
-    assert tool_span["data"]["gen_ai.tool.name"] == "simple_test_tool"
-    assert tool_span["data"]["gen_ai.tool.output"] == "Tool executed with: hello"
-    assert ai_client_span2["description"] == "chat gpt-4"
-    assert ai_client_span2["data"]["gen_ai.agent.name"] == "test_agent"
-    assert ai_client_span2["data"]["gen_ai.operation.name"] == "chat"
+    assert tool_span["attributes"]["gen_ai.request.max_tokens"] == 100
+    assert tool_span["attributes"]["gen_ai.request.model"] == "gpt-4"
+    assert tool_span["attributes"]["gen_ai.request.temperature"] == 0.7
+    assert tool_span["attributes"]["gen_ai.request.top_p"] == 1.0
+    assert tool_span["attributes"]["gen_ai.system"] == "openai"
+    assert tool_span["attributes"]["gen_ai.tool.description"] == "A simple tool"
+    assert tool_span["attributes"]["gen_ai.tool.input"] == '{"message": "hello"}'
+    assert tool_span["attributes"]["gen_ai.tool.name"] == "simple_test_tool"
+    assert tool_span["attributes"]["gen_ai.tool.output"] == "Tool executed with: hello"
+    assert ai_client_span2["name"] == "chat gpt-4"
+    assert ai_client_span2["attributes"]["gen_ai.agent.name"] == "test_agent"
+    assert ai_client_span2["attributes"]["gen_ai.operation.name"] == "chat"
 
     ai_client_span2_available_tool = json.loads(
-        ai_client_span2["data"]["gen_ai.request.available_tools"]
+        ai_client_span2["attributes"]["gen_ai.request.available_tools"]
     )[0]
     assert all(
         ai_client_span2_available_tool[k] == v for k, v in available_tool.items()
     )
 
-    assert ai_client_span2["data"]["gen_ai.request.max_tokens"] == 100
-    assert ai_client_span2["data"]["gen_ai.request.messages"] == safe_serialize(
+    assert ai_client_span2["attributes"]["gen_ai.request.max_tokens"] == 100
+    assert ai_client_span2["attributes"]["gen_ai.request.messages"] == safe_serialize(
         [
             {
                 "role": "tool",
@@ -1300,19 +1367,19 @@ def simple_test_tool(message: str) -> str:
             },
         ]
     )
-    assert ai_client_span2["data"]["gen_ai.request.model"] == "gpt-4"
-    assert ai_client_span2["data"]["gen_ai.request.temperature"] == 0.7
-    assert ai_client_span2["data"]["gen_ai.request.top_p"] == 1.0
+    assert ai_client_span2["attributes"]["gen_ai.request.model"] == "gpt-4"
+    assert ai_client_span2["attributes"]["gen_ai.request.temperature"] == 0.7
+    assert ai_client_span2["attributes"]["gen_ai.request.top_p"] == 1.0
     assert (
-        ai_client_span2["data"]["gen_ai.response.text"]
+        ai_client_span2["attributes"]["gen_ai.response.text"]
         == "Task completed using the tool"
     )
-    assert ai_client_span2["data"]["gen_ai.system"] == "openai"
-    assert ai_client_span2["data"]["gen_ai.usage.input_tokens.cached"] == 0
-    assert ai_client_span2["data"]["gen_ai.usage.input_tokens"] == 15
-    assert ai_client_span2["data"]["gen_ai.usage.output_tokens.reasoning"] == 0
-    assert ai_client_span2["data"]["gen_ai.usage.output_tokens"] == 10
-    assert ai_client_span2["data"]["gen_ai.usage.total_tokens"] == 25
+    assert ai_client_span2["attributes"]["gen_ai.system"] == "openai"
+    assert ai_client_span2["attributes"]["gen_ai.usage.input_tokens.cached"] == 0
+    assert ai_client_span2["attributes"]["gen_ai.usage.input_tokens"] == 15
+    assert ai_client_span2["attributes"]["gen_ai.usage.output_tokens.reasoning"] == 0
+    assert ai_client_span2["attributes"]["gen_ai.usage.output_tokens"] == 10
+    assert ai_client_span2["attributes"]["gen_ai.usage.total_tokens"] == 25
 
 
 @pytest.mark.asyncio
@@ -1570,7 +1637,7 @@ async def test_hosted_mcp_tool_propagation_headers(
 
 
 @pytest.mark.asyncio
-async def test_model_behavior_error(sentry_init, capture_events, test_agent):
+async def test_model_behavior_error(sentry_init, capture_items, test_agent):
     """
     Example raising agents.exceptions.AgentsException before the agent invocation span is complete.
     The mocked API response indicates that "wrong_tool" was called.
@@ -1613,7 +1680,7 @@ def simple_test_tool(message: str) -> str:
                 send_default_pii=True,
             )
 
-            events = capture_events()
+            items = capture_items("span", "transaction")
 
             with pytest.raises(ModelBehaviorError):
                 await agents.Runner.run(
@@ -1622,26 +1689,27 @@ def simple_test_tool(message: str) -> str:
                     run_config=test_run_config,
                 )
 
-    (error, transaction) = events
-    spans = transaction["spans"]
+    transactions = [item.payload for item in items if item.type == "transaction"]
+    assert len(transactions) == 1
+    transaction = transactions[0]
+    assert transaction["transaction"] == "test_agent workflow"
+    assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
+
+    spans = [item.payload for item in items if item.type == "span"]
     (
         agent_span,
         ai_client_span1,
     ) = spans
 
-    assert transaction["transaction"] == "test_agent workflow"
-    assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
-
-    assert agent_span["description"] == "invoke_agent test_agent"
-    assert agent_span["origin"] == "auto.ai.openai_agents"
+    assert agent_span["name"] == "invoke_agent test_agent"
+    assert agent_span["attributes"]["sentry.origin"] == "auto.ai.openai_agents"
 
     # Error due to unrecognized tool in model response.
-    assert agent_span["status"] == "internal_error"
-    assert agent_span["tags"]["status"] == "internal_error"
+    assert agent_span["status"] == "error"
 
 
 @pytest.mark.asyncio
-async def test_error_handling(sentry_init, capture_events, test_agent):
+async def test_error_handling(sentry_init, capture_items, test_agent):
     """
     Test error handling in agent execution.
     """
@@ -1660,39 +1728,39 @@ async def test_error_handling(sentry_init, capture_events, test_agent):
                 traces_sample_rate=1.0,
             )
 
-            events = capture_events()
+            items = capture_items("error", "span", "transaction")
 
             with pytest.raises(Exception, match="Model Error"):
                 await agents.Runner.run(
                     test_agent, "Test input", run_config=test_run_config
                 )
 
-    (
-        error_event,
-        transaction,
-    ) = events
-
+    error_events = [item.payload for item in items if item.type == "event"]
+    assert len(error_events) == 1
+    error_event = error_events[0]
     assert error_event["exception"]["values"][0]["type"] == "Exception"
     assert error_event["exception"]["values"][0]["value"] == "Model Error"
     assert error_event["exception"]["values"][0]["mechanism"]["type"] == "openai_agents"
 
-    spans = transaction["spans"]
-    (invoke_agent_span, ai_client_span) = spans
-
+    transactions = [item.payload for item in items if item.type == "transaction"]
+    assert len(transactions) == 1
+    transaction = transactions[0]
     assert transaction["transaction"] == "test_agent workflow"
     assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
 
-    assert invoke_agent_span["description"] == "invoke_agent test_agent"
-    assert invoke_agent_span["origin"] == "auto.ai.openai_agents"
+    spans = [item.payload for item in items if item.type == "span"]
+    (invoke_agent_span, ai_client_span) = spans
+
+    assert invoke_agent_span["name"] == "invoke_agent test_agent"
+    assert invoke_agent_span["attributes"]["sentry.origin"] == "auto.ai.openai_agents"
 
-    assert ai_client_span["description"] == "chat gpt-4"
-    assert ai_client_span["origin"] == "auto.ai.openai_agents"
-    assert ai_client_span["status"] == "internal_error"
-    assert ai_client_span["tags"]["status"] == "internal_error"
+    assert ai_client_span["name"] == "chat gpt-4"
+    assert ai_client_span["attributes"]["sentry.origin"] == "auto.ai.openai_agents"
+    assert ai_client_span["status"] == "error"
 
 
 @pytest.mark.asyncio
-async def test_error_captures_input_data(sentry_init, capture_events, test_agent):
+async def test_error_captures_input_data(sentry_init, capture_items, test_agent):
     """
     Test that input data is captured even when the API call raises an exception.
     This verifies that _set_input_data is called before the API call.
@@ -1725,37 +1793,36 @@ async def test_error_captures_input_data(sentry_init, capture_events, test_agent
             send_default_pii=True,
         )
 
-        events = capture_events()
+        items = capture_items("error", "span")
 
         with pytest.raises(InternalServerError, match="Error code: 500"):
             await agents.Runner.run(agent, "Test input", run_config=test_run_config)
 
-    (
-        error_event,
-        transaction,
-    ) = events
-
+    error_events = [item.payload for item in items if item.type == "event"]
+    assert len(error_events) == 1
+    error_event = error_events[0]
     assert error_event["exception"]["values"][0]["type"] == "InternalServerError"
     assert error_event["exception"]["values"][0]["value"] == "Error code: 500"
 
-    spans = transaction["spans"]
-    ai_client_span = [s for s in spans if s["op"] == "gen_ai.chat"][0]
+    spans = [item.payload for item in items if item.type == "span"]
+    ai_client_span = [
+        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+    ][0]
 
-    assert ai_client_span["description"] == "chat gpt-4"
-    assert ai_client_span["status"] == "internal_error"
-    assert ai_client_span["tags"]["status"] == "internal_error"
+    assert ai_client_span["name"] == "chat gpt-4"
+    assert ai_client_span["status"] == "error"
 
-    assert "gen_ai.request.messages" in ai_client_span["data"]
+    assert "gen_ai.request.messages" in ai_client_span["attributes"]
     request_messages = safe_serialize(
         [
             {"role": "user", "content": [{"type": "text", "text": "Test input"}]},
         ]
     )
-    assert ai_client_span["data"]["gen_ai.request.messages"] == request_messages
+    assert ai_client_span["attributes"]["gen_ai.request.messages"] == request_messages
 
 
 @pytest.mark.asyncio
-async def test_span_status_error(sentry_init, capture_events, test_agent):
+async def test_span_status_error(sentry_init, capture_items, test_agent):
     with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
         with patch(
             "agents.models.openai_responses.OpenAIResponsesModel.get_response"
@@ -1770,23 +1837,26 @@ async def test_span_status_error(sentry_init, capture_events, test_agent):
                 traces_sample_rate=1.0,
             )
 
-            events = capture_events()
+            items = capture_items("event", "transaction", "span")
 
             with pytest.raises(ValueError, match="Model Error"):
                 await agents.Runner.run(
                     test_agent, "Test input", run_config=test_run_config
                 )
 
-    (error, transaction) = events
+    (error,) = (item.payload for item in items if item.type == "event")
     assert error["level"] == "error"
-    assert transaction["spans"][0]["status"] == "internal_error"
-    assert transaction["spans"][0]["tags"]["status"] == "internal_error"
-    assert transaction["contexts"]["trace"]["status"] == "internal_error"
+
+    spans = [item.payload for item in items if item.type == "span"]
+    assert spans[0]["status"] == "error"
+
+    (transaction,) = (item.payload for item in items if item.type == "transaction")
+    assert transaction["contexts"]["trace"]["status"] == "error"
 
 
 @pytest.mark.asyncio
 async def test_mcp_tool_execution_spans(
-    sentry_init, capture_events, test_agent, get_model_response
+    sentry_init, capture_items, test_agent, get_model_response
 ):
     """
     Test that MCP (Model Context Protocol) tool calls create execute_tool spans.
@@ -1880,7 +1950,7 @@ async def test_mcp_tool_execution_spans(
             send_default_pii=True,
         )
 
-        events = capture_events()
+        items = capture_items("span", "transaction")
 
         await agents.Runner.run(
             agent,
@@ -1888,33 +1958,35 @@ async def test_mcp_tool_execution_spans(
             run_config=test_run_config,
         )
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
 
     # Find the MCP execute_tool span
     mcp_tool_span = None
     for span in spans:
-        if span.get("description") == "execute_tool test_mcp_tool":
+        if span.get("name") == "execute_tool test_mcp_tool":
             mcp_tool_span = span
             break
 
     # Verify the MCP tool span was created
     assert mcp_tool_span is not None, "MCP execute_tool span was not created"
-    assert mcp_tool_span["description"] == "execute_tool test_mcp_tool"
-    assert mcp_tool_span["data"]["gen_ai.tool.name"] == "test_mcp_tool"
-    assert mcp_tool_span["data"]["gen_ai.tool.input"] == '{"query": "search term"}'
+    assert mcp_tool_span["name"] == "execute_tool test_mcp_tool"
+    assert mcp_tool_span["attributes"]["gen_ai.tool.name"] == "test_mcp_tool"
     assert (
-        mcp_tool_span["data"]["gen_ai.tool.output"] == "MCP tool executed successfully"
+        mcp_tool_span["attributes"]["gen_ai.tool.input"] == '{"query": "search term"}'
+    )
+    assert (
+        mcp_tool_span["attributes"]["gen_ai.tool.output"]
+        == "MCP tool executed successfully"
     )
 
     # Verify no error status since error was None
-    assert mcp_tool_span.get("status") != "internal_error"
-    assert mcp_tool_span.get("tags", {}).get("status") != "internal_error"
+    assert mcp_tool_span.get("status") != "error"
+    assert mcp_tool_span.get("tags", {}).get("status") != "error"
 
 
 @pytest.mark.asyncio
 async def test_mcp_tool_execution_with_error(
-    sentry_init, capture_events, test_agent, get_model_response
+    sentry_init, capture_items, test_agent, get_model_response
 ):
     """
     Test that MCP tool calls with errors are tracked with error status.
@@ -2008,7 +2080,7 @@ async def test_mcp_tool_execution_with_error(
             send_default_pii=True,
         )
 
-        events = capture_events()
+        items = capture_items("span", "transaction")
 
         await agents.Runner.run(
             agent,
@@ -2016,31 +2088,29 @@ async def test_mcp_tool_execution_with_error(
             run_config=test_run_config,
         )
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
 
     # Find the MCP execute_tool span with error
     mcp_tool_span = None
     for span in spans:
-        if span.get("description") == "execute_tool failing_mcp_tool":
+        if span.get("name") == "execute_tool failing_mcp_tool":
             mcp_tool_span = span
             break
 
     # Verify the MCP tool span was created with error status
     assert mcp_tool_span is not None, "MCP execute_tool span was not created"
-    assert mcp_tool_span["description"] == "execute_tool failing_mcp_tool"
-    assert mcp_tool_span["data"]["gen_ai.tool.name"] == "failing_mcp_tool"
-    assert mcp_tool_span["data"]["gen_ai.tool.input"] == '{"query": "test"}'
-    assert mcp_tool_span["data"]["gen_ai.tool.output"] is None
+    assert mcp_tool_span["name"] == "execute_tool failing_mcp_tool"
+    assert mcp_tool_span["attributes"]["gen_ai.tool.name"] == "failing_mcp_tool"
+    assert mcp_tool_span["attributes"]["gen_ai.tool.input"] == '{"query": "test"}'
+    assert mcp_tool_span["attributes"]["gen_ai.tool.output"] is None
 
     # Verify error status was set
-    assert mcp_tool_span["status"] == "internal_error"
-    assert mcp_tool_span["tags"]["status"] == "internal_error"
+    assert mcp_tool_span["status"] == "error"
 
 
 @pytest.mark.asyncio
 async def test_mcp_tool_execution_without_pii(
-    sentry_init, capture_events, test_agent, get_model_response
+    sentry_init, capture_items, test_agent, get_model_response
 ):
     """
     Test that MCP tool input/output are not included when send_default_pii is False.
@@ -2134,7 +2204,7 @@ async def test_mcp_tool_execution_without_pii(
             send_default_pii=False,  # PII disabled
         )
 
-        events = capture_events()
+        items = capture_items("span", "transaction")
 
         await agents.Runner.run(
             agent,
@@ -2142,30 +2212,29 @@ async def test_mcp_tool_execution_without_pii(
             run_config=test_run_config,
         )
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
 
     # Find the MCP execute_tool span
     mcp_tool_span = None
     for span in spans:
-        if span.get("description") == "execute_tool test_mcp_tool":
+        if span.get("name") == "execute_tool test_mcp_tool":
             mcp_tool_span = span
             break
 
     # Verify the MCP tool span was created but without input/output
     assert mcp_tool_span is not None, "MCP execute_tool span was not created"
-    assert mcp_tool_span["description"] == "execute_tool test_mcp_tool"
-    assert mcp_tool_span["data"]["gen_ai.tool.name"] == "test_mcp_tool"
+    assert mcp_tool_span["name"] == "execute_tool test_mcp_tool"
+    assert mcp_tool_span["attributes"]["gen_ai.tool.name"] == "test_mcp_tool"
 
     # Verify input and output are not included when send_default_pii is False
-    assert "gen_ai.tool.input" not in mcp_tool_span["data"]
-    assert "gen_ai.tool.output" not in mcp_tool_span["data"]
+    assert "gen_ai.tool.input" not in mcp_tool_span["attributes"]
+    assert "gen_ai.tool.output" not in mcp_tool_span["attributes"]
 
 
 @pytest.mark.asyncio
 async def test_multiple_agents_asyncio(
     sentry_init,
-    capture_events,
+    capture_items,
     test_agent,
     nonstreaming_responses_model_response,
     get_model_response,
@@ -2192,7 +2261,7 @@ async def test_multiple_agents_asyncio(
             traces_sample_rate=1.0,
         )
 
-        events = capture_events()
+        items = capture_items("span", "transaction")
 
         async def run():
             await agents.Runner.run(
@@ -2203,14 +2272,10 @@ async def run():
 
         await asyncio.gather(*[run() for _ in range(3)])
 
-    assert len(events) == 3
-    txn1, txn2, txn3 = events
+    txn1, txn2, txn3 = (item.payload for item in items if item.type == "transaction")
 
-    assert txn1["type"] == "transaction"
     assert txn1["transaction"] == "test_agent workflow"
-    assert txn2["type"] == "transaction"
     assert txn2["transaction"] == "test_agent workflow"
-    assert txn3["type"] == "transaction"
     assert txn3["transaction"] == "test_agent workflow"
 
 
@@ -2230,7 +2295,7 @@ async def run():
     ],
 )
 def test_openai_agents_message_role_mapping(
-    sentry_init, capture_events, test_message, expected_role
+    sentry_init, capture_items, test_message, expected_role
 ):
     """Test that OpenAI Agents integration properly maps message roles like 'ai' to 'assistant'"""
     sentry_init(
@@ -2259,7 +2324,7 @@ def test_openai_agents_message_role_mapping(
 @pytest.mark.asyncio
 async def test_tool_execution_error_tracing(
     sentry_init,
-    capture_events,
+    capture_items,
     test_agent,
     get_model_response,
     responses_tool_call_model_responses,
@@ -2338,7 +2403,7 @@ def failing_tool(message: str) -> str:
             send_default_pii=True,
         )
 
-        events = capture_events()
+        items = capture_items("span", "transaction")
 
         # Note: The agents library catches tool exceptions internally,
         # so we don't expect this to raise
@@ -2348,13 +2413,12 @@ def failing_tool(message: str) -> str:
             run_config=test_run_config,
         )
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
 
     # Find the execute_tool span
     execute_tool_span = None
     for span in spans:
-        description = span.get("description", "")
+        description = span.get("name", "")
         if description is not None and description.startswith(
             "execute_tool failing_tool"
         ):
@@ -2363,19 +2427,18 @@ def failing_tool(message: str) -> str:
 
     # Verify the execute_tool span was created
     assert execute_tool_span is not None, "execute_tool span was not created"
-    assert execute_tool_span["description"] == "execute_tool failing_tool"
-    assert execute_tool_span["data"]["gen_ai.tool.name"] == "failing_tool"
+    assert execute_tool_span["name"] == "execute_tool failing_tool"
+    assert execute_tool_span["attributes"]["gen_ai.tool.name"] == "failing_tool"
 
     # Verify error status was set (this is the key test for our patch)
     # The span should be marked as error because the tool execution failed
-    assert execute_tool_span["status"] == "internal_error"
-    assert execute_tool_span["tags"]["status"] == "internal_error"
+    assert execute_tool_span["status"] == "error"
 
 
 @pytest.mark.asyncio
 async def test_invoke_agent_span_includes_usage_data(
     sentry_init,
-    capture_events,
+    capture_items,
     test_agent,
     get_model_response,
 ):
@@ -2437,7 +2500,7 @@ async def test_invoke_agent_span_includes_usage_data(
             send_default_pii=True,
         )
 
-        events = capture_events()
+        items = capture_items("span", "transaction")
 
         result = await agents.Runner.run(
             agent, "Test input", run_config=test_run_config
@@ -2445,29 +2508,30 @@ async def test_invoke_agent_span_includes_usage_data(
 
         assert result is not None
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
     invoke_agent_span = next(
-        span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT
+        span
+        for span in spans
+        if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
     )
 
     # Verify invoke_agent span has usage data from context_wrapper
-    assert invoke_agent_span["description"] == "invoke_agent test_agent"
-    assert "gen_ai.usage.input_tokens" in invoke_agent_span["data"]
-    assert "gen_ai.usage.output_tokens" in invoke_agent_span["data"]
-    assert "gen_ai.usage.total_tokens" in invoke_agent_span["data"]
+    assert invoke_agent_span["name"] == "invoke_agent test_agent"
+    assert "gen_ai.usage.input_tokens" in invoke_agent_span["attributes"]
+    assert "gen_ai.usage.output_tokens" in invoke_agent_span["attributes"]
+    assert "gen_ai.usage.total_tokens" in invoke_agent_span["attributes"]
 
-    assert invoke_agent_span["data"]["gen_ai.usage.input_tokens"] == 10
-    assert invoke_agent_span["data"]["gen_ai.usage.output_tokens"] == 20
-    assert invoke_agent_span["data"]["gen_ai.usage.total_tokens"] == 30
-    assert invoke_agent_span["data"]["gen_ai.usage.input_tokens.cached"] == 0
-    assert invoke_agent_span["data"]["gen_ai.usage.output_tokens.reasoning"] == 5
+    assert invoke_agent_span["attributes"]["gen_ai.usage.input_tokens"] == 10
+    assert invoke_agent_span["attributes"]["gen_ai.usage.output_tokens"] == 20
+    assert invoke_agent_span["attributes"]["gen_ai.usage.total_tokens"] == 30
+    assert invoke_agent_span["attributes"]["gen_ai.usage.input_tokens.cached"] == 0
+    assert invoke_agent_span["attributes"]["gen_ai.usage.output_tokens.reasoning"] == 5
 
 
 @pytest.mark.asyncio
 async def test_ai_client_span_includes_response_model(
     sentry_init,
-    capture_events,
+    capture_items,
     test_agent,
     get_model_response,
 ):
@@ -2529,7 +2593,7 @@ async def test_ai_client_span_includes_response_model(
             send_default_pii=True,
         )
 
-        events = capture_events()
+        items = capture_items("span", "transaction")
 
         result = await agents.Runner.run(
             agent, "Test input", run_config=test_run_config
@@ -2537,20 +2601,21 @@ async def test_ai_client_span_includes_response_model(
 
         assert result is not None
 
-    (transaction,) = events
-    spans = transaction["spans"]
-    ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT)
+    spans = [item.payload for item in items if item.type == "span"]
+    ai_client_span = next(
+        span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    )
 
     # Verify ai_client span has response model from API response
-    assert ai_client_span["description"] == "chat gpt-4"
-    assert "gen_ai.response.model" in ai_client_span["data"]
-    assert ai_client_span["data"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14"
+    assert ai_client_span["name"] == "chat gpt-4"
+    assert "gen_ai.response.model" in ai_client_span["attributes"]
+    assert ai_client_span["attributes"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14"
 
 
 @pytest.mark.asyncio
 async def test_ai_client_span_response_model_with_chat_completions(
     sentry_init,
-    capture_events,
+    capture_items,
     get_model_response,
 ):
     """
@@ -2616,7 +2681,7 @@ async def test_ai_client_span_response_model_with_chat_completions(
             traces_sample_rate=1.0,
         )
 
-        events = capture_events()
+        items = capture_items("span", "transaction")
 
         result = await agents.Runner.run(
             agent, "Test input", run_config=test_run_config
@@ -2624,18 +2689,22 @@ async def test_ai_client_span_response_model_with_chat_completions(
 
         assert result is not None
 
-    (transaction,) = events
-    spans = transaction["spans"]
-    ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT)
+    spans = [item.payload for item in items if item.type == "span"]
+    ai_client_span = next(
+        span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+    )
 
     # Verify response model from API response is captured
-    assert "gen_ai.response.model" in ai_client_span["data"]
-    assert ai_client_span["data"]["gen_ai.response.model"] == "gpt-4o-mini-2024-07-18"
+    assert "gen_ai.response.model" in ai_client_span["attributes"]
+    assert (
+        ai_client_span["attributes"]["gen_ai.response.model"]
+        == "gpt-4o-mini-2024-07-18"
+    )
 
 
 @pytest.mark.asyncio
 async def test_multiple_llm_calls_aggregate_usage(
-    sentry_init, capture_events, test_agent, get_model_response
+    sentry_init, capture_items, test_agent, get_model_response
 ):
     """
     Test that invoke_agent spans show aggregated usage across multiple LLM calls
@@ -2734,7 +2803,7 @@ def calculator(a: int, b: int) -> int:
             send_default_pii=True,
         )
 
-        events = capture_events()
+        items = capture_items("span", "transaction")
 
         result = await agents.Runner.run(
             agent_with_tool,
@@ -2744,25 +2813,24 @@ def calculator(a: int, b: int) -> int:
 
         assert result is not None
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
     invoke_agent_span = spans[0]
 
     # Verify invoke_agent span has aggregated usage from both API calls
     # Total: 10 + 20 = 30 input tokens, 5 + 15 = 20 output tokens, 15 + 35 = 50 total
-    assert invoke_agent_span["data"]["gen_ai.usage.input_tokens"] == 30
-    assert invoke_agent_span["data"]["gen_ai.usage.output_tokens"] == 20
-    assert invoke_agent_span["data"]["gen_ai.usage.total_tokens"] == 50
+    assert invoke_agent_span["attributes"]["gen_ai.usage.input_tokens"] == 30
+    assert invoke_agent_span["attributes"]["gen_ai.usage.output_tokens"] == 20
+    assert invoke_agent_span["attributes"]["gen_ai.usage.total_tokens"] == 50
     # Cached tokens should be aggregated: 0 + 5 = 5
-    assert invoke_agent_span["data"]["gen_ai.usage.input_tokens.cached"] == 5
+    assert invoke_agent_span["attributes"]["gen_ai.usage.input_tokens.cached"] == 5
     # Reasoning tokens should be aggregated: 0 + 3 = 3
-    assert invoke_agent_span["data"]["gen_ai.usage.output_tokens.reasoning"] == 3
+    assert invoke_agent_span["attributes"]["gen_ai.usage.output_tokens.reasoning"] == 3
 
 
 @pytest.mark.asyncio
 async def test_invoke_agent_span_includes_response_model(
     sentry_init,
-    capture_events,
+    capture_items,
     test_agent,
     get_model_response,
 ):
@@ -2823,7 +2891,7 @@ async def test_invoke_agent_span_includes_response_model(
             send_default_pii=True,
         )
 
-        events = capture_events()
+        items = capture_items("span", "transaction")
 
         result = await agents.Runner.run(
             agent, "Test input", run_config=test_run_config
@@ -2831,27 +2899,32 @@ async def test_invoke_agent_span_includes_response_model(
 
         assert result is not None
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
     invoke_agent_span = next(
-        span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT
+        span
+        for span in spans
+        if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
+    )
+    ai_client_span = next(
+        span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
     )
-    ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT)
 
     # Verify invoke_agent span has response model from API
-    assert invoke_agent_span["description"] == "invoke_agent test_agent"
-    assert "gen_ai.response.model" in invoke_agent_span["data"]
-    assert invoke_agent_span["data"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14"
+    assert invoke_agent_span["name"] == "invoke_agent test_agent"
+    assert "gen_ai.response.model" in invoke_agent_span["attributes"]
+    assert (
+        invoke_agent_span["attributes"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14"
+    )
 
     # Also verify ai_client span has it
-    assert "gen_ai.response.model" in ai_client_span["data"]
-    assert ai_client_span["data"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14"
+    assert "gen_ai.response.model" in ai_client_span["attributes"]
+    assert ai_client_span["attributes"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14"
 
 
 @pytest.mark.asyncio
 async def test_invoke_agent_span_uses_last_response_model(
     sentry_init,
-    capture_events,
+    capture_items,
     test_agent,
     get_model_response,
 ):
@@ -2952,7 +3025,7 @@ def calculator(a: int, b: int) -> int:
             send_default_pii=True,
         )
 
-        events = capture_events()
+        items = capture_items("span", "transaction")
 
         result = await agents.Runner.run(
             agent_with_tool,
@@ -2962,24 +3035,26 @@ def calculator(a: int, b: int) -> int:
 
         assert result is not None
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
     invoke_agent_span = spans[0]
     first_ai_client_span = spans[1]
     second_ai_client_span = spans[3]  # After tool span
 
     # Invoke_agent span uses the LAST response model
-    assert "gen_ai.response.model" in invoke_agent_span["data"]
-    assert invoke_agent_span["data"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14"
+    assert "gen_ai.response.model" in invoke_agent_span["attributes"]
+    assert (
+        invoke_agent_span["attributes"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14"
+    )
 
     # Each ai_client span has its own response model from the API
-    assert first_ai_client_span["data"]["gen_ai.response.model"] == "gpt-4-0613"
+    assert first_ai_client_span["attributes"]["gen_ai.response.model"] == "gpt-4-0613"
     assert (
-        second_ai_client_span["data"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14"
+        second_ai_client_span["attributes"]["gen_ai.response.model"]
+        == "gpt-4.1-2025-04-14"
     )
 
 
-def test_openai_agents_message_truncation(sentry_init, capture_events):
+def test_openai_agents_message_truncation(sentry_init, capture_items):
     """Test that large messages are truncated properly in OpenAI Agents integration."""
 
     large_content = (
@@ -3230,7 +3305,7 @@ async def test_streaming_ttft_on_chat_span(
 @pytest.mark.asyncio
 async def test_conversation_id_on_all_spans(
     sentry_init,
-    capture_events,
+    capture_items,
     test_agent,
     nonstreaming_responses_model_response,
     get_model_response,
@@ -3257,7 +3332,7 @@ async def test_conversation_id_on_all_spans(
             traces_sample_rate=1.0,
         )
 
-        events = capture_events()
+        items = capture_items("span", "transaction")
 
         result = await agents.Runner.run(
             agent,
@@ -3268,24 +3343,28 @@ async def test_conversation_id_on_all_spans(
 
         assert result is not None
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    spans = [item.payload for item in items if item.type == "span"]
     invoke_agent_span = next(
-        span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT
+        span
+        for span in spans
+        if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
+    )
+    ai_client_span = next(
+        span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
     )
-    ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT)
 
     # Verify workflow span (transaction) has conversation_id
+    (transaction,) = (item.payload for item in items if item.type == "transaction")
     assert (
         transaction["contexts"]["trace"]["data"]["gen_ai.conversation.id"]
         == "conv_test_123"
     )
 
     # Verify invoke_agent span has conversation_id
-    assert invoke_agent_span["data"]["gen_ai.conversation.id"] == "conv_test_123"
+    assert invoke_agent_span["attributes"]["gen_ai.conversation.id"] == "conv_test_123"
 
     # Verify ai_client span has conversation_id
-    assert ai_client_span["data"]["gen_ai.conversation.id"] == "conv_test_123"
+    assert ai_client_span["attributes"]["gen_ai.conversation.id"] == "conv_test_123"
 
 
 @pytest.mark.skipif(
@@ -3294,7 +3373,7 @@ async def test_conversation_id_on_all_spans(
 )
 @pytest.mark.asyncio
 async def test_conversation_id_on_tool_span(
-    sentry_init, capture_events, test_agent, get_model_response
+    sentry_init, capture_items, test_agent, get_model_response
 ):
     """
     Test that gen_ai.conversation.id is set on tool execution spans when passed to Runner.run().
@@ -3391,7 +3470,7 @@ def simple_tool(message: str) -> str:
             traces_sample_rate=1.0,
         )
 
-        events = capture_events()
+        items = capture_items("span", "transaction")
 
         await agents.Runner.run(
             agent_with_tool,
@@ -3400,21 +3479,20 @@ def simple_tool(message: str) -> str:
             conversation_id="conv_tool_test_456",
         )
 
-    (transaction,) = events
-    spans = transaction["spans"]
-
+    spans = [item.payload for item in items if item.type == "span"]
     # Find the tool span
     tool_span = None
     for span in spans:
-        if span.get("description", "").startswith("execute_tool"):
+        if span.get("name", "").startswith("execute_tool"):
             tool_span = span
             break
 
     assert tool_span is not None
     # Tool span should have the conversation_id passed to Runner.run()
-    assert tool_span["data"]["gen_ai.conversation.id"] == "conv_tool_test_456"
+    assert tool_span["attributes"]["gen_ai.conversation.id"] == "conv_tool_test_456"
 
     # Workflow span (transaction) should have the same conversation_id
+    (transaction,) = (item.payload for item in items if item.type == "transaction")
     assert (
         transaction["contexts"]["trace"]["data"]["gen_ai.conversation.id"]
         == "conv_tool_test_456"
@@ -3428,7 +3506,7 @@ def simple_tool(message: str) -> str:
 @pytest.mark.asyncio
 async def test_no_conversation_id_when_not_provided(
     sentry_init,
-    capture_events,
+    capture_items,
     test_agent,
     nonstreaming_responses_model_response,
     get_model_response,
@@ -3455,7 +3533,7 @@ async def test_no_conversation_id_when_not_provided(
             traces_sample_rate=1.0,
         )
 
-        events = capture_events()
+        items = capture_items("span", "transaction")
 
         # Don't pass conversation_id
         result = await agents.Runner.run(
@@ -3464,16 +3542,23 @@ async def test_no_conversation_id_when_not_provided(
 
         assert result is not None
 
-    (transaction,) = events
-    spans = transaction["spans"]
+    transactions = [item.payload for item in items if item.type == "transaction"]
+    assert len(transactions) == 1
+    transaction = transactions[0]
+
+    spans = [item.payload for item in items if item.type == "span"]
     invoke_agent_span = next(
-        span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT
+        span
+        for span in spans
+        if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
+    )
+    ai_client_span = next(
+        span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
     )
-    ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT)
 
     # Verify conversation_id is NOT set on any spans
     assert "gen_ai.conversation.id" not in transaction["contexts"]["trace"].get(
-        "data", {}
+        "attributes", {}
     )
-    assert "gen_ai.conversation.id" not in invoke_agent_span.get("data", {})
-    assert "gen_ai.conversation.id" not in ai_client_span.get("data", {})
+    assert "gen_ai.conversation.id" not in invoke_agent_span.get("attributes", {})
+    assert "gen_ai.conversation.id" not in ai_client_span.get("attributes", {})

From 41e409d73164807c557a0ee7563bdd1655f56d83 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Fri, 17 Apr 2026 13:46:26 +0200
Subject: [PATCH 21/84] fix openai-agents tests

---
 tests/integrations/openai_agents/test_openai_agents.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py
index 1c4925915d..294812b0ca 100644
--- a/tests/integrations/openai_agents/test_openai_agents.py
+++ b/tests/integrations/openai_agents/test_openai_agents.py
@@ -525,7 +525,7 @@ async def test_client_span_custom_model(
             traces_sample_rate=1.0,
         )
 
-        items = capture_items("transaction", "spans")
+        items = capture_items("span")
 
         result = await agents.Runner.run(
             agent, "Test input", run_config=test_run_config
@@ -1728,7 +1728,7 @@ async def test_error_handling(sentry_init, capture_items, test_agent):
                 traces_sample_rate=1.0,
             )
 
-            items = capture_items("error", "span", "transaction")
+            items = capture_items("event", "span", "transaction")
 
             with pytest.raises(Exception, match="Model Error"):
                 await agents.Runner.run(
@@ -1793,7 +1793,7 @@ async def test_error_captures_input_data(sentry_init, capture_items, test_agent)
             send_default_pii=True,
         )
 
-        items = capture_items("error", "span")
+        items = capture_items("event", "span")
 
         with pytest.raises(InternalServerError, match="Error code: 500"):
             await agents.Runner.run(agent, "Test input", run_config=test_run_config)
@@ -1851,7 +1851,7 @@ async def test_span_status_error(sentry_init, capture_items, test_agent):
     assert spans[0]["status"] == "error"
 
     (transaction,) = (item.payload for item in items if item.type == "transaction")
-    assert transaction["contexts"]["trace"]["status"] == "error"
+    assert transaction["contexts"]["trace"]["status"] == "internal_error"
 
 
 @pytest.mark.asyncio
@@ -2102,7 +2102,7 @@ async def test_mcp_tool_execution_with_error(
     assert mcp_tool_span["name"] == "execute_tool failing_mcp_tool"
     assert mcp_tool_span["attributes"]["gen_ai.tool.name"] == "failing_mcp_tool"
     assert mcp_tool_span["attributes"]["gen_ai.tool.input"] == '{"query": "test"}'
-    assert mcp_tool_span["attributes"]["gen_ai.tool.output"] is None
+    assert mcp_tool_span["attributes"]["gen_ai.tool.output"] == "None"
 
     # Verify error status was set
     assert mcp_tool_span["status"] == "error"

From 8bf77f0ed1b351923f1c6fa5956437a952f75c9d Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Fri, 17 Apr 2026 13:51:40 +0200
Subject: [PATCH 22/84] fix common tests

---
 tests/tracing/test_decorator.py | 51 ++++++++++++++++++++++-----------
 1 file changed, 35 insertions(+), 16 deletions(-)

diff --git a/tests/tracing/test_decorator.py b/tests/tracing/test_decorator.py
index e73323138a..bbb7e85b1a 100644
--- a/tests/tracing/test_decorator.py
+++ b/tests/tracing/test_decorator.py
@@ -170,7 +170,6 @@ def my_agent():
         item.payload for item in items if item.type == "span"
     )
 
-    assert agent_span["attributes"]["sentry.op"] == "gen_ai.invoke_agent"
     assert (
         agent_span["name"]
         == "invoke_agent test_decorator.test_span_templates_ai_dicts.<locals>.my_agent"
@@ -190,7 +189,6 @@ def my_agent():
         "thread.name": mock.ANY,
     }
 
-    assert tool_span["attributes"]["sentry.op"] == "gen_ai.execute_tool"
     assert (
         tool_span["name"]
         == "execute_tool test_decorator.test_span_templates_ai_dicts.<locals>.my_tool"
@@ -214,7 +212,6 @@ def my_agent():
     }
     assert "gen_ai.tool.description" not in tool_span["attributes"]
 
-    assert chat_span["attributes"]["sentry.op"] == "gen_ai.chat"
     assert chat_span["name"] == "chat my-gpt-4o-mini"
     assert chat_span["attributes"] == {
         "gen_ai.operation.name": "chat",
@@ -243,9 +240,9 @@ def my_agent():
     }
 
 
-def test_span_templates_ai_objects(sentry_init, capture_events):
+def test_span_templates_ai_objects(sentry_init, capture_items):
     sentry_init(traces_sample_rate=1.0)
-    events = capture_events()
+    items = capture_items("span")
 
     @sentry_sdk.trace(template=SPANTEMPLATE.AI_TOOL)
     def my_tool(arg1, arg2):
@@ -292,40 +289,54 @@ def my_agent():
     with sentry_sdk.start_transaction(name="test-transaction"):
         my_agent()
 
-    (event,) = events
-    (agent_span, tool_span, chat_span) = event["spans"]
+    (agent_span, tool_span, chat_span) = (
+        item.payload for item in items if item.type == "span"
+    )
 
-    assert agent_span["op"] == "gen_ai.invoke_agent"
     assert (
-        agent_span["description"]
+        agent_span["name"]
         == "invoke_agent test_decorator.test_span_templates_ai_objects.<locals>.my_agent"
     )
-    assert agent_span["data"] == {
+    assert agent_span["attributes"] == {
         "gen_ai.agent.name": "test_decorator.test_span_templates_ai_objects.<locals>.my_agent",
         "gen_ai.operation.name": "invoke_agent",
+        "sentry.environment": "production",
+        "sentry.op": "gen_ai.invoke_agent",
+        "sentry.origin": "manual",
+        "sentry.release": mock.ANY,
+        "sentry.sdk.name": "sentry.python",
+        "sentry.sdk.version": mock.ANY,
+        "sentry.segment.id": mock.ANY,
+        "sentry.segment.name": "test-transaction",
         "thread.id": mock.ANY,
         "thread.name": mock.ANY,
     }
 
-    assert tool_span["op"] == "gen_ai.execute_tool"
     assert (
-        tool_span["description"]
+        tool_span["name"]
         == "execute_tool test_decorator.test_span_templates_ai_objects.<locals>.my_tool"
     )
-    assert tool_span["data"] == {
+    assert tool_span["attributes"] == {
         "gen_ai.tool.name": "test_decorator.test_span_templates_ai_objects.<locals>.my_tool",
         "gen_ai.tool.description": "This is a tool function.",
         "gen_ai.operation.name": "execute_tool",
         "gen_ai.usage.input_tokens": 10,
         "gen_ai.usage.output_tokens": 20,
         "gen_ai.usage.total_tokens": 30,
+        "sentry.environment": "production",
+        "sentry.op": "gen_ai.execute_tool",
+        "sentry.origin": "manual",
+        "sentry.release": mock.ANY,
+        "sentry.sdk.name": "sentry.python",
+        "sentry.sdk.version": mock.ANY,
+        "sentry.segment.id": mock.ANY,
+        "sentry.segment.name": "test-transaction",
         "thread.id": mock.ANY,
         "thread.name": mock.ANY,
     }
 
-    assert chat_span["op"] == "gen_ai.chat"
-    assert chat_span["description"] == "chat my-gpt-4o-mini"
-    assert chat_span["data"] == {
+    assert chat_span["name"] == "chat my-gpt-4o-mini"
+    assert chat_span["attributes"] == {
         "gen_ai.operation.name": "chat",
         "gen_ai.request.frequency_penalty": 1.0,
         "gen_ai.request.max_tokens": 100,
@@ -339,6 +350,14 @@ def my_agent():
         "gen_ai.usage.input_tokens": 11,
         "gen_ai.usage.output_tokens": 22,
         "gen_ai.usage.total_tokens": 33,
+        "sentry.environment": "production",
+        "sentry.op": "gen_ai.chat",
+        "sentry.origin": "manual",
+        "sentry.release": mock.ANY,
+        "sentry.sdk.name": "sentry.python",
+        "sentry.sdk.version": mock.ANY,
+        "sentry.segment.id": mock.ANY,
+        "sentry.segment.name": "test-transaction",
         "thread.id": mock.ANY,
         "thread.name": mock.ANY,
     }

From 7c3da4fdab771be2ae50dc741156951230d88c83 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Fri, 17 Apr 2026 13:52:39 +0200
Subject: [PATCH 23/84] client handle None

---
 sentry_sdk/client.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py
index 99e58ec499..356b68e254 100644
--- a/sentry_sdk/client.py
+++ b/sentry_sdk/client.py
@@ -149,6 +149,12 @@ def _serialized_v1_attribute_to_serialized_v2_attribute(
             "type": "string",
         }
 
+    if attribute_value is None:
+        return {
+            "value": "None",
+            "type": "string",
+        }
+
     return None
 
 

From 06c2a40a6dd723e0a1ed0e6ee7166efe4068e179 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Fri, 17 Apr 2026 13:54:05 +0200
Subject: [PATCH 24/84] fix item_count

---
 sentry_sdk/client.py | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py
index 356b68e254..0d13b6db03 100644
--- a/sentry_sdk/client.py
+++ b/sentry_sdk/client.py
@@ -1126,23 +1126,21 @@ def capture_event(
                 event_opt["spans"] = non_gen_ai_spans
                 envelope.add_transaction(event_opt)
 
+                converted_gen_ai_spans = [
+                    _serialized_v1_span_to_serialized_v2_span(span, event)
+                    for span in gen_ai_spans
+                    if isinstance(span, dict)
+                ]
+
                 envelope.add_item(
                     Item(
                         type=SpanBatcher.TYPE,
                         content_type=SpanBatcher.CONTENT_TYPE,
                         headers={
-                            "item_count": len(gen_ai_spans),
+                            "item_count": len(converted_gen_ai_spans),
                         },
                         payload=PayloadRef(
-                            json={
-                                "items": [
-                                    _serialized_v1_span_to_serialized_v2_span(
-                                        span, event
-                                    )
-                                    for span in gen_ai_spans
-                                    if isinstance(span, dict)
-                                ]
-                            },
+                            json={"items": converted_gen_ai_spans},
                         ),
                     )
                 )

From 204b9809f6efa06aad3b9f1914d169d1c677e286 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Fri, 17 Apr 2026 14:02:48 +0200
Subject: [PATCH 25/84] fix common tests

---
 tests/tracing/test_decorator.py | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/tests/tracing/test_decorator.py b/tests/tracing/test_decorator.py
index bbb7e85b1a..5f5adec2cb 100644
--- a/tests/tracing/test_decorator.py
+++ b/tests/tracing/test_decorator.py
@@ -364,9 +364,9 @@ def my_agent():
 
 
 @pytest.mark.parametrize("send_default_pii", [True, False])
-def test_span_templates_ai_pii(sentry_init, capture_events, send_default_pii):
+def test_span_templates_ai_pii(sentry_init, capture_items, send_default_pii):
     sentry_init(traces_sample_rate=1.0, send_default_pii=send_default_pii)
-    events = capture_events()
+    items = capture_items("span")
 
     @sentry_sdk.trace(template=SPANTEMPLATE.AI_TOOL)
     def my_tool(arg1, arg2, **kwargs):
@@ -396,15 +396,14 @@ def my_agent(*args, **kwargs):
     with sentry_sdk.start_transaction(name="test-transaction"):
         my_agent(22, 33, arg1=44, arg2=55)
 
-    (event,) = events
-    (_, tool_span, _) = event["spans"]
+    (_, tool_span, _) = (item.payload for item in items if item.type == "span")
 
     if send_default_pii:
         assert (
-            tool_span["data"]["gen_ai.tool.input"]
+            tool_span["attributes"]["gen_ai.tool.input"]
             == "{'args': (1, 2), 'kwargs': {'tool_arg1': '3', 'tool_arg2': '4'}}"
         )
-        assert tool_span["data"]["gen_ai.tool.output"] == "'tool_output'"
+        assert tool_span["attributes"]["gen_ai.tool.output"] == "'tool_output'"
     else:
-        assert "gen_ai.tool.input" not in tool_span["data"]
-        assert "gen_ai.tool.output" not in tool_span["data"]
+        assert "gen_ai.tool.input" not in tool_span["attributes"]
+        assert "gen_ai.tool.output" not in tool_span["attributes"]

From 00733f960e239bb4a4c606580bc0e9a05f97ec42 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Fri, 17 Apr 2026 14:08:15 +0200
Subject: [PATCH 26/84] fix common tests

---
 tests/tracing/test_misc.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/tracing/test_misc.py b/tests/tracing/test_misc.py
index f69e19791a..bb8d942335 100644
--- a/tests/tracing/test_misc.py
+++ b/tests/tracing/test_misc.py
@@ -625,15 +625,15 @@ def test_conversation_id_propagates_to_span_with_gen_ai_operation_name(
                 span.set_data("gen_ai.operation.name", "chat")
 
         spans = [item.payload for item in items if item.type == "span"]
-        span_data = spans[0]["data"]
+        span_data = spans[0]["attributes"]
         assert span_data.get("gen_ai.conversation.id") == "conv-op-name-test"
 
     def test_conversation_id_propagates_to_span_with_ai_op(
-        self, sentry_init, capture_events
+        self, sentry_init, capture_items
     ):
         """Span with ai.* op should get conversation_id."""
         sentry_init(traces_sample_rate=1.0)
-        events = capture_events()
+        items = capture_items("span")
 
         scope = sentry_sdk.get_current_scope()
         scope.set_conversation_id("conv-ai-op-test")
@@ -642,8 +642,8 @@ def test_conversation_id_propagates_to_span_with_ai_op(
             with start_span(op="ai.chat.completions"):
                 pass
 
-        (event,) = events
-        span_data = event["spans"][0]["data"]
+        spans = [item.payload for item in items if item.type == "span"]
+        span_data = spans[0]["attributes"]
         assert span_data.get("gen_ai.conversation.id") == "conv-ai-op-test"
 
     def test_conversation_id_propagates_to_span_with_gen_ai_op(

From a54cab4ce7b94624f5de991a1615e632da71f5f9 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Fri, 17 Apr 2026 14:16:23 +0200
Subject: [PATCH 27/84] common tests

---
 tests/tracing/test_misc.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/tests/tracing/test_misc.py b/tests/tracing/test_misc.py
index bb8d942335..8895c98dbc 100644
--- a/tests/tracing/test_misc.py
+++ b/tests/tracing/test_misc.py
@@ -611,11 +611,11 @@ class TestConversationIdPropagation:
     """Tests for conversation_id propagation to AI spans."""
 
     def test_conversation_id_propagates_to_span_with_gen_ai_operation_name(
-        self, sentry_init, capture_items
+        self, sentry_init, capture_events
     ):
         """Span with gen_ai.operation.name data should get conversation_id."""
         sentry_init(traces_sample_rate=1.0)
-        items = capture_items("span")
+        events = capture_events()
 
         scope = sentry_sdk.get_current_scope()
         scope.set_conversation_id("conv-op-name-test")
@@ -624,16 +624,16 @@ def test_conversation_id_propagates_to_span_with_gen_ai_operation_name(
             with start_span(op="http.client") as span:
                 span.set_data("gen_ai.operation.name", "chat")
 
-        spans = [item.payload for item in items if item.type == "span"]
-        span_data = spans[0]["attributes"]
+        (event,) = events
+        span_data = event["spans"][0]["data"]
         assert span_data.get("gen_ai.conversation.id") == "conv-op-name-test"
 
     def test_conversation_id_propagates_to_span_with_ai_op(
-        self, sentry_init, capture_items
+        self, sentry_init, capture_events
     ):
         """Span with ai.* op should get conversation_id."""
         sentry_init(traces_sample_rate=1.0)
-        items = capture_items("span")
+        events = capture_events()
 
         scope = sentry_sdk.get_current_scope()
         scope.set_conversation_id("conv-ai-op-test")
@@ -642,8 +642,8 @@ def test_conversation_id_propagates_to_span_with_ai_op(
             with start_span(op="ai.chat.completions"):
                 pass
 
-        spans = [item.payload for item in items if item.type == "span"]
-        span_data = spans[0]["attributes"]
+        (event,) = events
+        span_data = event["spans"][0]["data"]
         assert span_data.get("gen_ai.conversation.id") == "conv-ai-op-test"
 
     def test_conversation_id_propagates_to_span_with_gen_ai_op(

From 4b0c47b28f8a4bf62de2e3a0a9d888ba908fe1b8 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Fri, 17 Apr 2026 14:24:18 +0200
Subject: [PATCH 28/84] tests

---
 tests/tracing/test_misc.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/tracing/test_misc.py b/tests/tracing/test_misc.py
index 8895c98dbc..0188b08a88 100644
--- a/tests/tracing/test_misc.py
+++ b/tests/tracing/test_misc.py
@@ -647,11 +647,11 @@ def test_conversation_id_propagates_to_span_with_ai_op(
         assert span_data.get("gen_ai.conversation.id") == "conv-ai-op-test"
 
     def test_conversation_id_propagates_to_span_with_gen_ai_op(
-        self, sentry_init, capture_events
+        self, sentry_init, capture_items
     ):
         """Span with gen_ai.* op should get conversation_id."""
         sentry_init(traces_sample_rate=1.0)
-        events = capture_events()
+        items = capture_items("span")
 
         scope = sentry_sdk.get_current_scope()
         scope.set_conversation_id("conv-gen-ai-op-test")
@@ -660,8 +660,8 @@ def test_conversation_id_propagates_to_span_with_gen_ai_op(
             with start_span(op="gen_ai.invoke_agent"):
                 pass
 
-        (event,) = events
-        span_data = event["spans"][0]["data"]
+        spans = [item.payload for item in items if item.type == "span"]
+        span_data = spans[0]["attributes"]
         assert span_data.get("gen_ai.conversation.id") == "conv-gen-ai-op-test"
 
     def test_conversation_id_not_propagated_to_non_ai_span(

From 6c5c812faa8879523fb4f90c650327a7f70a1d81 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Fri, 17 Apr 2026 14:46:25 +0200
Subject: [PATCH 29/84] add experimental v2 option

---
 .../integrations/anthropic/test_anthropic.py  | 53 ++++++++++++
 .../google_genai/test_google_genai.py         | 37 ++++++++
 .../huggingface_hub/test_huggingface_hub.py   |  8 ++
 .../integrations/langchain/test_langchain.py  | 26 ++++++
 tests/integrations/litellm/test_litellm.py    | 28 ++++++
 tests/integrations/openai/test_openai.py      | 42 +++++++++
 .../openai_agents/test_openai_agents.py       | 32 +++++++
 .../pydantic_ai/test_pydantic_ai.py           | 85 +++++++++++++++++++
 8 files changed, 311 insertions(+)

diff --git a/tests/integrations/anthropic/test_anthropic.py b/tests/integrations/anthropic/test_anthropic.py
index c7fc280b6c..aedab1578b 100644
--- a/tests/integrations/anthropic/test_anthropic.py
+++ b/tests/integrations/anthropic/test_anthropic.py
@@ -97,6 +97,7 @@ def test_nonstreaming_create_message(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
@@ -171,6 +172,7 @@ async def test_nonstreaming_create_message_async(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     client = AsyncAnthropic(api_key="z")
@@ -287,6 +289,7 @@ def test_streaming_create_message(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -395,6 +398,7 @@ def test_streaming_create_message_close(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -498,6 +502,7 @@ def test_streaming_create_message_api_error(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -614,6 +619,7 @@ def test_stream_messages(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -723,6 +729,7 @@ def test_stream_messages_close(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -831,6 +838,7 @@ def test_stream_messages_api_error(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -953,6 +961,7 @@ async def test_streaming_create_message_async(
         traces_sample_rate=1.0,
         default_integrations=False,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1064,6 +1073,7 @@ async def test_streaming_create_message_async_close(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1170,6 +1180,7 @@ async def test_streaming_create_message_async_api_error(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1290,6 +1301,7 @@ async def test_stream_message_async(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1400,6 +1412,7 @@ async def test_stream_messages_async_api_error(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1510,6 +1523,7 @@ async def test_stream_messages_async_close(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1666,6 +1680,7 @@ def test_streaming_create_message_with_input_json_delta(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1815,6 +1830,7 @@ def test_stream_messages_with_input_json_delta(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1972,6 +1988,7 @@ async def test_streaming_create_message_with_input_json_delta_async(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -2129,6 +2146,7 @@ async def test_stream_message_with_input_json_delta_async(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -2188,6 +2206,7 @@ async def test_stream_message_with_input_json_delta_async(
 
 def test_exception_message_create(sentry_init, capture_items):
     sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
+    _experiments = ({"gen_ai_as_v2_spans": True},)
     items = capture_items("event", "transaction")
 
     client = Anthropic(api_key="z")
@@ -2210,6 +2229,7 @@ def test_exception_message_create(sentry_init, capture_items):
 
 def test_span_status_error(sentry_init, capture_items):
     sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
+    _experiments = ({"gen_ai_as_v2_spans": True},)
     items = capture_items("event", "span")
 
     with start_transaction(name="anthropic"):
@@ -2236,6 +2256,7 @@ def test_span_status_error(sentry_init, capture_items):
 @pytest.mark.asyncio
 async def test_span_status_error_async(sentry_init, capture_items):
     sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
+    _experiments = ({"gen_ai_as_v2_spans": True},)
     items = capture_items("event", "span")
 
     with start_transaction(name="anthropic"):
@@ -2262,6 +2283,7 @@ async def test_span_status_error_async(sentry_init, capture_items):
 @pytest.mark.asyncio
 async def test_exception_message_create_async(sentry_init, capture_items):
     sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
+    _experiments = ({"gen_ai_as_v2_spans": True},)
     items = capture_items("event", "transaction")
 
     client = AsyncAnthropic(api_key="z")
@@ -2286,6 +2308,7 @@ def test_span_origin(sentry_init, capture_items):
     sentry_init(
         integrations=[AnthropicIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -2316,6 +2339,7 @@ async def test_span_origin_async(sentry_init, capture_items):
     sentry_init(
         integrations=[AnthropicIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -2379,6 +2403,7 @@ def test_set_output_data_with_input_json_delta(sentry_init):
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with start_transaction(name="test"):
@@ -2429,6 +2454,7 @@ def test_anthropic_message_role_mapping(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -2475,6 +2501,7 @@ def test_anthropic_message_truncation(sentry_init, capture_items):
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -2525,6 +2552,7 @@ async def test_anthropic_message_truncation_async(sentry_init, capture_items):
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -2585,6 +2613,7 @@ def test_nonstreaming_create_message_with_system_prompt(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
@@ -2671,6 +2700,7 @@ async def test_nonstreaming_create_message_with_system_prompt_async(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     client = AsyncAnthropic(api_key="z")
@@ -2800,6 +2830,7 @@ def test_streaming_create_message_with_system_prompt(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -2930,6 +2961,7 @@ def test_stream_messages_with_system_prompt(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -3062,6 +3094,7 @@ async def test_stream_message_with_system_prompt_async(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -3194,6 +3227,7 @@ async def test_streaming_create_message_with_system_prompt_async(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -3269,6 +3303,7 @@ def test_system_prompt_with_complex_structure(sentry_init, capture_items):
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
@@ -3522,6 +3557,7 @@ def test_message_with_base64_image(sentry_init, capture_items):
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
@@ -3572,6 +3608,7 @@ def test_message_with_url_image(sentry_init, capture_items):
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
@@ -3615,6 +3652,7 @@ def test_message_with_file_image(sentry_init, capture_items):
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
@@ -3659,6 +3697,7 @@ def test_message_with_base64_pdf(sentry_init, capture_items):
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
@@ -3703,6 +3742,7 @@ def test_message_with_url_pdf(sentry_init, capture_items):
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
@@ -3746,6 +3786,7 @@ def test_message_with_file_document(sentry_init, capture_items):
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
@@ -3790,6 +3831,7 @@ def test_message_with_mixed_content(sentry_init, capture_items):
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
@@ -3872,6 +3914,7 @@ def test_message_with_multiple_images_different_formats(sentry_init, capture_ite
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
@@ -3946,6 +3989,7 @@ def test_binary_content_not_stored_when_pii_disabled(sentry_init, capture_items)
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=False,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
@@ -3984,6 +4028,7 @@ def test_binary_content_not_stored_when_prompts_disabled(sentry_init, capture_it
         integrations=[AnthropicIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
@@ -4019,6 +4064,7 @@ def test_binary_content_not_stored_when_prompts_disabled(sentry_init, capture_it
 def test_cache_tokens_nonstreaming(sentry_init, capture_items):
     """Test cache read/write tokens are tracked for non-streaming responses."""
     sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
+    _experiments = ({"gen_ai_as_v2_spans": True},)
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
 
@@ -4067,6 +4113,7 @@ def test_input_tokens_include_cache_write_nonstreaming(sentry_init, capture_item
               cache_creation_input_tokens=2846, cache_read_input_tokens=0)
     """
     sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
+    _experiments = ({"gen_ai_as_v2_spans": True},)
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
 
@@ -4115,6 +4162,7 @@ def test_input_tokens_include_cache_read_nonstreaming(sentry_init, capture_items
               cache_creation_input_tokens=0, cache_read_input_tokens=2846)
     """
     sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
+    _experiments = ({"gen_ai_as_v2_spans": True},)
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
 
@@ -4192,6 +4240,7 @@ def test_input_tokens_include_cache_read_streaming(
     )
 
     sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
+    _experiments = ({"gen_ai_as_v2_spans": True},)
     items = capture_items("transaction", "span")
 
     with mock.patch.object(
@@ -4258,6 +4307,7 @@ def test_stream_messages_input_tokens_include_cache_read_streaming(
     )
 
     sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
+    _experiments = ({"gen_ai_as_v2_spans": True},)
     items = capture_items("transaction", "span")
 
     with mock.patch.object(
@@ -4291,6 +4341,7 @@ def test_input_tokens_unchanged_without_caching(sentry_init, capture_items):
         Usage(input_tokens=20, output_tokens=12)
     """
     sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
+    _experiments = ({"gen_ai_as_v2_spans": True},)
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
 
@@ -4359,6 +4410,7 @@ def test_cache_tokens_streaming(
     )
 
     sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
+    _experiments = ({"gen_ai_as_v2_spans": True},)
     items = capture_items("transaction", "span")
 
     with mock.patch.object(
@@ -4419,6 +4471,7 @@ def test_stream_messages_cache_tokens(
     )
 
     sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
+    _experiments = ({"gen_ai_as_v2_spans": True},)
     items = capture_items("transaction", "span")
 
     with mock.patch.object(
diff --git a/tests/integrations/google_genai/test_google_genai.py b/tests/integrations/google_genai/test_google_genai.py
index e074b79c8c..ae31fe565b 100644
--- a/tests/integrations/google_genai/test_google_genai.py
+++ b/tests/integrations/google_genai/test_google_genai.py
@@ -130,6 +130,7 @@ def test_nonstreaming_generate_content(
         integrations=[GoogleGenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -219,6 +220,7 @@ def test_generate_content_with_system_instruction(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -262,6 +264,7 @@ def test_generate_content_with_tools(sentry_init, capture_items, mock_genai_clie
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -344,6 +347,7 @@ def test_tool_execution(sentry_init, capture_items):
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -380,6 +384,7 @@ def test_error_handling(sentry_init, capture_items, mock_genai_client):
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("event", "transaction")
 
@@ -411,6 +416,7 @@ def test_streaming_generate_content(sentry_init, capture_items, mock_genai_clien
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -527,6 +533,7 @@ def test_span_origin(sentry_init, capture_items, mock_genai_client):
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span", "transaction")
 
@@ -554,6 +561,7 @@ def test_response_without_usage_metadata(sentry_init, capture_items, mock_genai_
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -595,6 +603,7 @@ def test_multiple_candidates(sentry_init, capture_items, mock_genai_client):
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -659,6 +668,7 @@ def test_all_configuration_parameters(sentry_init, capture_items, mock_genai_cli
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -698,6 +708,7 @@ def test_empty_response(sentry_init, capture_items, mock_genai_client):
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -729,6 +740,7 @@ def test_response_with_different_id_fields(
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -771,6 +783,7 @@ def test_tool_with_async_function(sentry_init):
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Create an async tool function
@@ -793,6 +806,7 @@ def test_contents_as_none(sentry_init, capture_items, mock_genai_client):
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -819,6 +833,7 @@ def test_tool_calls_extraction(sentry_init, capture_items, mock_genai_client):
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -905,6 +920,7 @@ def test_google_genai_message_truncation(sentry_init, capture_items, mock_genai_
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -980,6 +996,7 @@ def test_embed_content(
         integrations=[GoogleGenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1041,6 +1058,7 @@ def test_embed_content_string_input(sentry_init, capture_items, mock_genai_clien
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1087,6 +1105,7 @@ def test_embed_content_error_handling(sentry_init, capture_items, mock_genai_cli
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "event")
 
@@ -1120,6 +1139,7 @@ def test_embed_content_without_statistics(
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1159,6 +1179,7 @@ def test_embed_content_span_origin(sentry_init, capture_items, mock_genai_client
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1199,6 +1220,7 @@ async def test_async_embed_content(
         integrations=[GoogleGenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1263,6 +1285,7 @@ async def test_async_embed_content_string_input(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1312,6 +1335,7 @@ async def test_async_embed_content_error_handling(
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "event")
 
@@ -1346,6 +1370,7 @@ async def test_async_embed_content_without_statistics(
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1388,6 +1413,7 @@ async def test_async_embed_content_span_origin(
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1419,6 +1445,7 @@ def test_generate_content_with_content_object(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1455,6 +1482,7 @@ def test_generate_content_with_dict_format(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1487,6 +1515,7 @@ def test_generate_content_with_file_data(sentry_init, capture_items, mock_genai_
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1536,6 +1565,7 @@ def test_generate_content_with_inline_data(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1581,6 +1611,7 @@ def test_generate_content_with_function_response(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1635,6 +1666,7 @@ def test_generate_content_with_mixed_string_and_content(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1678,6 +1710,7 @@ def test_generate_content_with_part_object_directly(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1716,6 +1749,7 @@ def test_generate_content_with_list_of_dicts(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1752,6 +1786,7 @@ def test_generate_content_with_dict_inline_data(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1796,6 +1831,7 @@ def test_generate_content_without_parts_property_inline_data(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1839,6 +1875,7 @@ def test_generate_content_without_parts_property_inline_data_and_binary_data_wit
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py
index 98abbb00fa..16c27b678d 100644
--- a/tests/integrations/huggingface_hub/test_huggingface_hub.py
+++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py
@@ -480,6 +480,7 @@ def test_text_generation(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
         integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)],
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -555,6 +556,7 @@ def test_text_generation_streaming(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
         integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)],
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -631,6 +633,7 @@ def test_chat_completion(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
         integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)],
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -709,6 +712,7 @@ def test_chat_completion_streaming(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
         integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)],
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -780,6 +784,7 @@ def test_chat_completion_api_error(
     sentry_init: "Any", capture_items: "Any", mock_hf_api_with_errors: "Any"
 ) -> None:
     sentry_init(traces_sample_rate=1.0)
+    _experiments = ({"gen_ai_as_v2_spans": True},)
     items = capture_items("event", "transaction", "span")
 
     client = get_hf_provider_inference_client()
@@ -839,6 +844,7 @@ def test_span_status_error(
     sentry_init: "Any", capture_items: "Any", mock_hf_api_with_errors: "Any"
 ) -> None:
     sentry_init(traces_sample_rate=1.0)
+    _experiments = ({"gen_ai_as_v2_spans": True},)
     items = capture_items("event", "transaction", "span")
 
     client = get_hf_provider_inference_client()
@@ -881,6 +887,7 @@ def test_chat_completion_with_tools(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
         integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)],
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -976,6 +983,7 @@ def test_chat_completion_streaming_with_tools(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
         integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)],
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py
index f709d12129..5002d050b9 100644
--- a/tests/integrations/langchain/test_langchain.py
+++ b/tests/integrations/langchain/test_langchain.py
@@ -108,6 +108,7 @@ def test_langchain_text_completion(
         ],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -216,6 +217,7 @@ def test_langchain_create_agent(
         ],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -336,6 +338,7 @@ def test_tool_execution_span(
         ],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -528,6 +531,7 @@ def test_langchain_openai_tools_agent(
         ],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -865,6 +869,7 @@ def test_langchain_error(sentry_init, capture_items):
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("event", "transaction", "span")
 
@@ -903,6 +908,7 @@ def test_span_status_error(sentry_init, capture_items):
     sentry_init(
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("event", "transaction", "span")
 
@@ -988,6 +994,7 @@ def _identifying_params(self):
             return {}
 
     sentry_init(integrations=[LangchainIntegration()])
+    _experiments = ({"gen_ai_as_v2_spans": True},)
 
     # Create a manual SentryLangchainCallback
     manual_callback = SentryLangchainCallback(
@@ -1028,6 +1035,7 @@ def test_langchain_callback_manager(sentry_init):
     sentry_init(
         integrations=[LangchainIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     local_manager = BaseCallbackManager(handlers=[])
 
@@ -1060,6 +1068,7 @@ def test_langchain_callback_manager_with_sentry_callback(sentry_init):
     sentry_init(
         integrations=[LangchainIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     sentry_callback = SentryLangchainCallback(0, False)
     local_manager = BaseCallbackManager(handlers=[sentry_callback])
@@ -1092,6 +1101,7 @@ def test_langchain_callback_list(sentry_init):
     sentry_init(
         integrations=[LangchainIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     local_callbacks = []
 
@@ -1124,6 +1134,7 @@ def test_langchain_callback_list_existing_callback(sentry_init):
     sentry_init(
         integrations=[LangchainIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     sentry_callback = SentryLangchainCallback(0, False)
     local_callbacks = [sentry_callback]
@@ -1161,6 +1172,7 @@ def test_langchain_message_role_mapping(sentry_init, capture_items):
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1298,6 +1310,7 @@ def test_langchain_message_truncation(sentry_init, capture_items):
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1390,6 +1403,7 @@ def test_langchain_embeddings_sync(
         integrations=[LangchainIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1468,6 +1482,7 @@ def test_langchain_embeddings_embed_query(
         integrations=[LangchainIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1542,6 +1557,7 @@ async def test_langchain_embeddings_async(
         integrations=[LangchainIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1614,6 +1630,7 @@ async def test_langchain_embeddings_aembed_query(sentry_init, capture_items):
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1675,6 +1692,7 @@ def test_langchain_embeddings_no_model_name(sentry_init, capture_items):
     sentry_init(
         integrations=[LangchainIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1724,6 +1742,7 @@ def test_langchain_embeddings_integration_disabled(sentry_init, capture_items):
 
     # Initialize without LangchainIntegration
     sentry_init(traces_sample_rate=1.0)
+    _experiments = ({"gen_ai_as_v2_spans": True},)
     items = capture_items("transaction", "span")
 
     with mock.patch.object(
@@ -1760,6 +1779,7 @@ def test_langchain_embeddings_multiple_providers(sentry_init, capture_items):
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1817,6 +1837,7 @@ def test_langchain_embeddings_error_handling(sentry_init, capture_items):
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1857,6 +1878,7 @@ def test_langchain_embeddings_multiple_calls(sentry_init, capture_items):
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1920,6 +1942,7 @@ def test_langchain_embeddings_span_hierarchy(sentry_init, capture_items):
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1973,6 +1996,7 @@ def test_langchain_embeddings_with_list_and_string_inputs(sentry_init, capture_i
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -2037,6 +2061,7 @@ def test_langchain_response_model_extraction(
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -2342,6 +2367,7 @@ def test_langchain_ai_system_detection(
     sentry_init(
         integrations=[LangchainIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py
index 90807744e7..b9365e7008 100644
--- a/tests/integrations/litellm/test_litellm.py
+++ b/tests/integrations/litellm/test_litellm.py
@@ -152,6 +152,7 @@ def test_nonstreaming_chat_completion(
         integrations=[LiteLLMIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -233,6 +234,7 @@ async def test_async_nonstreaming_chat_completion(
         integrations=[LiteLLMIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -316,6 +318,7 @@ def test_streaming_chat_completion(
         integrations=[LiteLLMIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -386,6 +389,7 @@ async def test_async_streaming_chat_completion(
         integrations=[LiteLLMIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -452,6 +456,7 @@ def test_embeddings_create(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -521,6 +526,7 @@ async def test_async_embeddings_create(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -585,6 +591,7 @@ def test_embeddings_create_with_list_input(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -647,6 +654,7 @@ async def test_async_embeddings_create_with_list_input(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -709,6 +717,7 @@ def test_embeddings_no_pii(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=False,  # PII disabled
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -765,6 +774,7 @@ async def test_async_embeddings_no_pii(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=False,  # PII disabled
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -815,6 +825,7 @@ def test_exception_handling(
     sentry_init(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("event")
 
@@ -853,6 +864,7 @@ async def test_async_exception_handling(
     sentry_init(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("event")
 
@@ -894,6 +906,7 @@ def test_span_origin(
     sentry_init(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -941,6 +954,7 @@ def test_multiple_providers(
     sentry_init(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction")
 
@@ -1036,6 +1050,7 @@ async def test_async_multiple_providers(
     sentry_init(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1132,6 +1147,7 @@ def test_additional_parameters(
     sentry_init(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1191,6 +1207,7 @@ async def test_async_additional_parameters(
     sentry_init(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1250,6 +1267,7 @@ def test_no_integration(
     """Test that when integration is not enabled, callbacks don't break."""
     sentry_init(
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1296,6 +1314,7 @@ async def test_async_no_integration(
     """Test that when integration is not enabled, callbacks don't break."""
     sentry_init(
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1338,6 +1357,7 @@ def test_response_without_usage(sentry_init, capture_items):
     sentry_init(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1379,6 +1399,7 @@ def test_integration_setup(sentry_init):
     sentry_init(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Check that callbacks are registered
@@ -1393,6 +1414,7 @@ def test_litellm_message_truncation(sentry_init, capture_items):
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1459,6 +1481,7 @@ def test_binary_content_encoding_image_url(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1538,6 +1561,7 @@ async def test_async_binary_content_encoding_image_url(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1618,6 +1642,7 @@ def test_binary_content_encoding_mixed_content(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1686,6 +1711,7 @@ async def test_async_binary_content_encoding_mixed_content(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1755,6 +1781,7 @@ def test_binary_content_encoding_uri_type(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1828,6 +1855,7 @@ async def test_async_binary_content_encoding_uri_type(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py
index e53f8e4f55..4c7df84b8b 100644
--- a/tests/integrations/openai/test_openai.py
+++ b/tests/integrations/openai/test_openai.py
@@ -138,6 +138,7 @@ def test_nonstreaming_chat_completion_no_prompts(
         integrations=[OpenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -233,6 +234,7 @@ def test_nonstreaming_chat_completion(sentry_init, capture_items, messages, requ
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -312,6 +314,7 @@ async def test_nonstreaming_chat_completion_async_no_prompts(
         integrations=[OpenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -407,6 +410,7 @@ async def test_nonstreaming_chat_completion_async(
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -502,6 +506,7 @@ def test_streaming_chat_completion_no_prompts(
         ],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -621,6 +626,7 @@ def test_streaming_chat_completion_with_usage_in_stream(
         integrations=[OpenAIIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
         send_default_pii=False,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -701,6 +707,7 @@ def test_streaming_chat_completion_empty_content_preserves_token_usage(
         integrations=[OpenAIIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
         send_default_pii=False,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -764,6 +771,7 @@ async def test_streaming_chat_completion_empty_content_preserves_token_usage_asy
         integrations=[OpenAIIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
         send_default_pii=False,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -829,6 +837,7 @@ async def test_streaming_chat_completion_async_with_usage_in_stream(
         integrations=[OpenAIIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
         send_default_pii=False,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -957,6 +966,7 @@ def test_streaming_chat_completion(
         ],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1109,6 +1119,7 @@ async def test_streaming_chat_completion_async_no_prompts(
         ],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1280,6 +1291,7 @@ async def test_streaming_chat_completion_async(
         ],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1412,6 +1424,7 @@ async def test_streaming_chat_completion_async(
 
 def test_bad_chat_completion(sentry_init, capture_items):
     sentry_init(integrations=[OpenAIIntegration()], traces_sample_rate=1.0)
+    _experiments = ({"gen_ai_as_v2_spans": True},)
     items = capture_items("event")
 
     client = OpenAI(api_key="z")
@@ -1430,6 +1443,7 @@ def test_bad_chat_completion(sentry_init, capture_items):
 
 def test_span_status_error(sentry_init, capture_items):
     sentry_init(integrations=[OpenAIIntegration()], traces_sample_rate=1.0)
+    _experiments = ({"gen_ai_as_v2_spans": True},)
     items = capture_items("event", "transaction", "span")
 
     with start_transaction(name="test"):
@@ -1455,6 +1469,7 @@ def test_span_status_error(sentry_init, capture_items):
 @pytest.mark.asyncio
 async def test_bad_chat_completion_async(sentry_init, capture_items):
     sentry_init(integrations=[OpenAIIntegration()], traces_sample_rate=1.0)
+    _experiments = ({"gen_ai_as_v2_spans": True},)
     items = capture_items("event")
 
     client = AsyncOpenAI(api_key="z")
@@ -1485,6 +1500,7 @@ def test_embeddings_create_no_pii(
         integrations=[OpenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1567,6 +1583,7 @@ def test_embeddings_create(sentry_init, capture_items, input, request):
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1638,6 +1655,7 @@ async def test_embeddings_create_async_no_pii(
         integrations=[OpenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1721,6 +1739,7 @@ async def test_embeddings_create_async(sentry_init, capture_items, input, reques
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1789,6 +1808,7 @@ def test_embeddings_create_raises_error(
         integrations=[OpenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("event")
 
@@ -1817,6 +1837,7 @@ async def test_embeddings_create_raises_error_async(
         integrations=[OpenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("event")
 
@@ -1837,6 +1858,7 @@ def test_span_origin_nonstreaming_chat(sentry_init, capture_items):
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1860,6 +1882,7 @@ async def test_span_origin_nonstreaming_chat_async(sentry_init, capture_items):
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1882,6 +1905,7 @@ def test_span_origin_streaming_chat(sentry_init, capture_items):
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1945,6 +1969,7 @@ async def test_span_origin_streaming_chat_async(
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -2011,6 +2036,7 @@ def test_span_origin_embeddings(sentry_init, capture_items):
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -2042,6 +2068,7 @@ async def test_span_origin_embeddings_async(sentry_init, capture_items):
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -2435,6 +2462,7 @@ def test_ai_client_span_responses_api_no_pii(sentry_init, capture_items):
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -2557,6 +2585,7 @@ def test_ai_client_span_responses_api(
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -2767,6 +2796,7 @@ def test_error_in_responses_api(sentry_init, capture_items):
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("event", "transaction", "span")
 
@@ -2873,6 +2903,7 @@ async def test_ai_client_span_responses_async_api(
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -3158,6 +3189,7 @@ async def test_ai_client_span_streaming_responses_async_api(
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -3383,6 +3415,7 @@ async def test_error_in_responses_async_api(sentry_init, capture_items):
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("event", "transaction", "span")
 
@@ -3510,6 +3543,7 @@ def test_streaming_responses_api(
         ],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -3586,6 +3620,7 @@ async def test_streaming_responses_api_async(
         ],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -3649,6 +3684,7 @@ def test_empty_tools_in_chat_completion(sentry_init, capture_items, tools):
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -3691,6 +3727,7 @@ def test_openai_message_role_mapping(
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -3721,6 +3758,7 @@ def test_openai_message_truncation(sentry_init, capture_items):
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -3770,6 +3808,7 @@ def test_streaming_chat_completion_ttft(
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -3848,6 +3887,7 @@ async def test_streaming_chat_completion_ttft_async(
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -3924,6 +3964,7 @@ def test_streaming_responses_api_ttft(
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -3973,6 +4014,7 @@ async def test_streaming_responses_api_ttft_async(
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py
index 294812b0ca..9e74848a04 100644
--- a/tests/integrations/openai_agents/test_openai_agents.py
+++ b/tests/integrations/openai_agents/test_openai_agents.py
@@ -182,6 +182,7 @@ async def test_agent_invocation_span_no_pii(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=False,
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -339,6 +340,7 @@ async def test_agent_invocation_span(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=True,
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -523,6 +525,7 @@ async def test_client_span_custom_model(
         sentry_init(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span")
@@ -570,6 +573,7 @@ def test_agent_invocation_span_sync_no_pii(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=False,
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -721,6 +725,7 @@ def test_agent_invocation_span_sync(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=True,
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -963,6 +968,7 @@ async def test_handoff_span(sentry_init, capture_items, get_model_response):
         sentry_init(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("transaction", "span")
@@ -1092,6 +1098,7 @@ async def test_max_turns_before_handoff_span(
         sentry_init(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("transaction", "span")
@@ -1190,6 +1197,7 @@ def simple_test_tool(message: str) -> str:
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=True,
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("transaction", "span")
@@ -1418,6 +1426,7 @@ async def test_hosted_mcp_tool_propagation_header_streamed(
         integrations=[OpenAIAgentsIntegration()],
         traces_sample_rate=1.0,
         release="d08ebdb9309e1b004c6f52202de58a09c2268e42",
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     request_headers = {}
@@ -1580,6 +1589,7 @@ async def test_hosted_mcp_tool_propagation_headers(
         integrations=[OpenAIAgentsIntegration()],
         traces_sample_rate=1.0,
         release="d08ebdb9309e1b004c6f52202de58a09c2268e42",
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     response = get_model_response(EXAMPLE_RESPONSE, serialize_pydantic=True)
@@ -1678,6 +1688,7 @@ def simple_test_tool(message: str) -> str:
                 integrations=[OpenAIAgentsIntegration()],
                 traces_sample_rate=1.0,
                 send_default_pii=True,
+                _experiments={"gen_ai_as_v2_spans": True},
             )
 
             items = capture_items("span", "transaction")
@@ -1726,6 +1737,7 @@ async def test_error_handling(sentry_init, capture_items, test_agent):
                     LoggingIntegration(event_level=logging.CRITICAL),
                 ],
                 traces_sample_rate=1.0,
+                _experiments={"gen_ai_as_v2_spans": True},
             )
 
             items = capture_items("event", "span", "transaction")
@@ -1791,6 +1803,7 @@ async def test_error_captures_input_data(sentry_init, capture_items, test_agent)
             ],
             traces_sample_rate=1.0,
             send_default_pii=True,
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("event", "span")
@@ -1835,6 +1848,7 @@ async def test_span_status_error(sentry_init, capture_items, test_agent):
                     LoggingIntegration(event_level=logging.CRITICAL),
                 ],
                 traces_sample_rate=1.0,
+                _experiments={"gen_ai_as_v2_spans": True},
             )
 
             items = capture_items("event", "transaction", "span")
@@ -1948,6 +1962,7 @@ async def test_mcp_tool_execution_spans(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=True,
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -2078,6 +2093,7 @@ async def test_mcp_tool_execution_with_error(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=True,
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -2202,6 +2218,7 @@ async def test_mcp_tool_execution_without_pii(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=False,  # PII disabled
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -2259,6 +2276,7 @@ async def test_multiple_agents_asyncio(
         sentry_init(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -2302,6 +2320,7 @@ def test_openai_agents_message_role_mapping(
         integrations=[OpenAIAgentsIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     get_response_kwargs = {"input": [test_message]}
@@ -2401,6 +2420,7 @@ def failing_tool(message: str) -> str:
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=True,
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -2498,6 +2518,7 @@ async def test_invoke_agent_span_includes_usage_data(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=True,
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -2591,6 +2612,7 @@ async def test_ai_client_span_includes_response_model(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=True,
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -2679,6 +2701,7 @@ async def test_ai_client_span_response_model_with_chat_completions(
         sentry_init(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -2801,6 +2824,7 @@ def calculator(a: int, b: int) -> int:
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=True,
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -2889,6 +2913,7 @@ async def test_invoke_agent_span_includes_response_model(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=True,
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -3023,6 +3048,7 @@ def calculator(a: int, b: int) -> int:
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=True,
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -3065,6 +3091,7 @@ def test_openai_agents_message_truncation(sentry_init, capture_items):
         integrations=[OpenAIAgentsIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     test_messages = [
@@ -3111,6 +3138,7 @@ async def test_streaming_span_update_captures_response_data(
         integrations=[OpenAIAgentsIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Create a mock streaming response object (similar to what we'd get from ResponseCompletedEvent)
@@ -3176,6 +3204,7 @@ async def test_streaming_ttft_on_chat_span(
     sentry_init(
         integrations=[OpenAIAgentsIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     request_headers = {}
@@ -3330,6 +3359,7 @@ async def test_conversation_id_on_all_spans(
         sentry_init(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -3468,6 +3498,7 @@ def simple_tool(message: str) -> str:
         sentry_init(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -3531,6 +3562,7 @@ async def test_no_conversation_id_when_not_provided(
         sentry_init(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
diff --git a/tests/integrations/pydantic_ai/test_pydantic_ai.py b/tests/integrations/pydantic_ai/test_pydantic_ai.py
index fe34dd0f5d..bab2f6208d 100644
--- a/tests/integrations/pydantic_ai/test_pydantic_ai.py
+++ b/tests/integrations/pydantic_ai/test_pydantic_ai.py
@@ -61,6 +61,7 @@ async def test_agent_run_async(sentry_init, capture_items, get_test_agent):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -102,6 +103,7 @@ async def test_agent_run_async_model_error(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("event", "transaction", "span")
@@ -135,6 +137,7 @@ async def test_agent_run_async_usage_data(sentry_init, capture_items, get_test_a
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -179,6 +182,7 @@ def test_agent_run_sync(sentry_init, capture_items, get_test_agent):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -211,6 +215,7 @@ def test_agent_run_sync_model_error(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("event", "transaction", "span")
@@ -244,6 +249,7 @@ async def test_agent_run_stream(sentry_init, capture_items, get_test_agent):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -288,6 +294,7 @@ async def test_agent_run_stream_events(sentry_init, capture_items, get_test_agen
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -322,6 +329,7 @@ async def test_agent_with_tools(sentry_init, capture_items, get_test_agent):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     test_agent = get_test_agent()
@@ -387,6 +395,7 @@ async def test_agent_with_tool_model_retry(
         ],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     retries = 0
@@ -470,6 +479,7 @@ async def test_agent_with_tool_validation_error(
         ],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     test_agent = get_test_agent()
@@ -534,6 +544,7 @@ async def test_agent_with_tools_streaming(sentry_init, capture_items, get_test_a
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     test_agent = get_test_agent()
@@ -583,6 +594,7 @@ async def test_model_settings(sentry_init, capture_items, get_test_agent_with_se
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -631,6 +643,7 @@ async def test_system_prompt_attribute(
         integrations=[PydanticAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -676,6 +689,7 @@ async def test_error_handling(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -700,6 +714,7 @@ async def test_without_pii(sentry_init, capture_items, get_test_agent):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=False,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -729,6 +744,7 @@ async def test_without_pii_tools(sentry_init, capture_items, get_test_agent):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=False,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     test_agent = get_test_agent()
@@ -765,6 +781,7 @@ async def test_multiple_agents_concurrent(sentry_init, capture_items, get_test_a
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -799,6 +816,7 @@ async def test_message_history(sentry_init, capture_items):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -848,6 +866,7 @@ async def test_gen_ai_system(sentry_init, capture_items, get_test_agent):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -878,6 +897,7 @@ async def test_include_prompts_false(sentry_init, capture_items, get_test_agent)
         integrations=[PydanticAIIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
         send_default_pii=True,  # Even with PII enabled, prompts should not be captured
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -907,6 +927,7 @@ async def test_include_prompts_true(sentry_init, capture_items, get_test_agent):
         integrations=[PydanticAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -938,6 +959,7 @@ async def test_include_prompts_false_with_tools(
         integrations=[PydanticAIIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     test_agent = get_test_agent()
@@ -975,6 +997,7 @@ async def test_include_prompts_requires_pii(sentry_init, capture_items, get_test
         integrations=[PydanticAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=False,  # PII disabled
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -1066,6 +1089,7 @@ async def mock_map_tool_result_part(part):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -1135,6 +1159,7 @@ async def test_context_cleanup_after_run(sentry_init, get_test_agent):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Verify context is not set before run
@@ -1158,6 +1183,7 @@ def test_context_cleanup_after_run_sync(sentry_init, get_test_agent):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Verify context is not set before run
@@ -1182,6 +1208,7 @@ async def test_context_cleanup_after_streaming(sentry_init, get_test_agent):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Verify context is not set before run
@@ -1208,6 +1235,7 @@ async def test_context_cleanup_on_error(sentry_init, get_test_agent):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     test_agent = get_test_agent()
@@ -1242,6 +1270,7 @@ async def test_context_isolation_concurrent_agents(sentry_init, get_test_agent):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Create a second agent
@@ -1297,6 +1326,7 @@ async def test_invoke_agent_with_list_user_prompt(sentry_init, capture_items):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -1347,6 +1377,7 @@ async def test_invoke_agent_with_instructions(
         integrations=[PydanticAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -1386,6 +1417,7 @@ async def test_model_name_extraction_with_callable(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Test the utility function directly
@@ -1412,6 +1444,7 @@ async def test_model_name_extraction_fallback_to_str(sentry_init, capture_items)
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Test the utility function directly
@@ -1440,6 +1473,7 @@ async def test_model_settings_object_style(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -1475,6 +1509,7 @@ async def test_usage_data_partial(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -1508,6 +1543,7 @@ async def test_agent_data_from_scope(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -1530,6 +1566,7 @@ async def test_available_tools_without_description(
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     test_agent = get_test_agent()
@@ -1564,6 +1601,7 @@ async def test_output_with_tool_calls(sentry_init, capture_items, get_test_agent
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     test_agent = get_test_agent()
@@ -1609,6 +1647,7 @@ async def test_message_formatting_with_different_parts(sentry_init, capture_item
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -1657,6 +1696,7 @@ async def test_update_invoke_agent_span_with_none_output(sentry_init, capture_it
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -1684,6 +1724,7 @@ async def test_update_ai_client_span_with_none_response(sentry_init, capture_ite
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -1709,6 +1750,7 @@ async def test_agent_without_name(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -1734,6 +1776,7 @@ async def test_model_response_without_parts(sentry_init, capture_items):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -1764,6 +1807,7 @@ async def test_input_messages_error_handling(sentry_init, capture_items):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -1793,6 +1837,7 @@ async def test_available_tools_error_handling(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -1822,6 +1867,7 @@ async def test_set_usage_data_with_none_usage(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -1848,6 +1894,7 @@ async def test_set_usage_data_with_partial_fields(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -1889,6 +1936,7 @@ def test_tool(x: int) -> int:
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -1918,6 +1966,7 @@ async def test_message_parts_with_list_content(sentry_init, capture_items):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -1954,6 +2003,7 @@ async def test_output_data_with_text_and_tool_calls(sentry_init, capture_items):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -1993,6 +2043,7 @@ async def test_output_data_error_handling(sentry_init, capture_items):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2025,6 +2076,7 @@ async def test_message_with_system_prompt_part(sentry_init, capture_items):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2060,6 +2112,7 @@ async def test_message_with_instructions(sentry_init, capture_items):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2094,6 +2147,7 @@ async def test_set_input_messages_without_prompts(sentry_init, capture_items):
         integrations=[PydanticAIIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2122,6 +2176,7 @@ async def test_set_output_data_without_prompts(sentry_init, capture_items):
         integrations=[PydanticAIIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2149,6 +2204,7 @@ async def test_get_model_name_with_exception_in_callable(sentry_init, capture_it
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Create model with callable name that raises exception
@@ -2172,6 +2228,7 @@ async def test_get_model_name_with_string_model(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Pass a string as model
@@ -2191,6 +2248,7 @@ async def test_get_model_name_with_none(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Pass None
@@ -2212,6 +2270,7 @@ async def test_set_model_data_with_system(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2243,6 +2302,7 @@ async def test_set_model_data_from_agent_scope(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2276,6 +2336,7 @@ async def test_set_model_data_with_none_settings_values(sentry_init, capture_ite
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2308,6 +2369,7 @@ async def test_should_send_prompts_without_pii(sentry_init, capture_items):
         integrations=[PydanticAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=False,  # PII disabled
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Should return False
@@ -2326,6 +2388,7 @@ async def test_set_agent_data_without_agent(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2352,6 +2415,7 @@ async def test_set_agent_data_from_scope(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2384,6 +2448,7 @@ async def test_set_agent_data_without_name(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2414,6 +2479,7 @@ async def test_set_available_tools_without_toolset(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2444,6 +2510,7 @@ async def test_set_available_tools_with_schema(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2483,6 +2550,7 @@ async def test_execute_tool_span_creation(sentry_init, capture_items):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2509,6 +2577,7 @@ async def test_execute_tool_span_with_mcp_type(sentry_init, capture_items):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2536,6 +2605,7 @@ async def test_execute_tool_span_without_prompts(sentry_init, capture_items):
         integrations=[PydanticAIIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2560,6 +2630,7 @@ async def test_execute_tool_span_with_none_args(sentry_init, capture_items):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2583,6 +2654,7 @@ async def test_update_execute_tool_span_with_none_span(sentry_init, capture_item
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Update with None span - should not raise
@@ -2607,6 +2679,7 @@ async def test_update_execute_tool_span_with_none_result(sentry_init, capture_it
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2630,6 +2703,7 @@ async def test_tool_execution_without_span_context(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Create a simple agent with no tools (won't have function_toolset)
@@ -2661,6 +2735,7 @@ async def test_invoke_agent_span_with_callable_instruction(sentry_init, capture_
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2694,6 +2769,7 @@ async def test_invoke_agent_span_with_string_instructions(sentry_init, capture_i
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2722,6 +2798,7 @@ async def test_ai_client_span_with_streaming_flag(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2749,6 +2826,7 @@ async def test_ai_client_span_gets_agent_from_scope(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2797,6 +2875,7 @@ async def test_binary_content_encoding_image(sentry_init, capture_items):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -2827,6 +2906,7 @@ async def test_binary_content_encoding_mixed_content(sentry_init, capture_items)
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -2870,6 +2950,7 @@ async def test_binary_content_in_agent_run(sentry_init, capture_items):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -2894,6 +2975,7 @@ async def test_binary_content_in_agent_run(sentry_init, capture_items):
 async def test_set_usage_data_with_cache_tokens(sentry_init, capture_items):
     """Test that cache_read_tokens and cache_write_tokens are tracked."""
     sentry_init(integrations=[PydanticAIIntegration()], traces_sample_rate=1.0)
+    _experiments = ({"gen_ai_as_v2_spans": True},)
 
     items = capture_items("transaction", "span")
 
@@ -2964,6 +3046,7 @@ def test_image_url_base64_content_in_span(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -3034,6 +3117,7 @@ async def test_invoke_agent_image_url(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     agent = Agent("test", name="test_image_url_agent")
@@ -3081,6 +3165,7 @@ def multiply_numbers(a: int, b: int) -> int:
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")

From 51a07fff893c5c552de1950239b4a064dc48b828 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Fri, 17 Apr 2026 14:47:07 +0200
Subject: [PATCH 30/84] push experiment

---
 sentry_sdk/consts.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py
index 73e5a6d9cb..82107b49ee 100644
--- a/sentry_sdk/consts.py
+++ b/sentry_sdk/consts.py
@@ -86,6 +86,7 @@ class CompressionAlgo(Enum):
             "trace_lifecycle": Optional[Literal["static", "stream"]],
             "ignore_spans": Optional[IgnoreSpansConfig],
             "suppress_asgi_chained_exceptions": Optional[bool],
+            "gen_ai_as_v2_spans": Optional[bool],
         },
         total=False,
     )

From bab75670df741b84c3b17b8b615786705abdbabc Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Fri, 17 Apr 2026 14:52:13 +0200
Subject: [PATCH 31/84] fix tests

---
 tests/tracing/test_decorator.py | 16 +++++++++++++---
 tests/tracing/test_misc.py      |  5 ++++-
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/tests/tracing/test_decorator.py b/tests/tracing/test_decorator.py
index 5f5adec2cb..d370b4bbc9 100644
--- a/tests/tracing/test_decorator.py
+++ b/tests/tracing/test_decorator.py
@@ -122,7 +122,10 @@ async def _some_function_traced(a, b, c):
 
 
 def test_span_templates_ai_dicts(sentry_init, capture_items):
-    sentry_init(traces_sample_rate=1.0)
+    sentry_init(
+        traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
+    )
     items = capture_items("span")
 
     @sentry_sdk.trace(template=SPANTEMPLATE.AI_TOOL)
@@ -241,7 +244,10 @@ def my_agent():
 
 
 def test_span_templates_ai_objects(sentry_init, capture_items):
-    sentry_init(traces_sample_rate=1.0)
+    sentry_init(
+        traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
+    )
     items = capture_items("span")
 
     @sentry_sdk.trace(template=SPANTEMPLATE.AI_TOOL)
@@ -365,7 +371,11 @@ def my_agent():
 
 @pytest.mark.parametrize("send_default_pii", [True, False])
 def test_span_templates_ai_pii(sentry_init, capture_items, send_default_pii):
-    sentry_init(traces_sample_rate=1.0, send_default_pii=send_default_pii)
+    sentry_init(
+        traces_sample_rate=1.0,
+        send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
+    )
     items = capture_items("span")
 
     @sentry_sdk.trace(template=SPANTEMPLATE.AI_TOOL)
diff --git a/tests/tracing/test_misc.py b/tests/tracing/test_misc.py
index 0188b08a88..4209a02b4b 100644
--- a/tests/tracing/test_misc.py
+++ b/tests/tracing/test_misc.py
@@ -650,7 +650,10 @@ def test_conversation_id_propagates_to_span_with_gen_ai_op(
         self, sentry_init, capture_items
     ):
         """Span with gen_ai.* op should get conversation_id."""
-        sentry_init(traces_sample_rate=1.0)
+        sentry_init(
+            traces_sample_rate=1.0,
+            _experiments={"gen_ai_as_v2_spans": True},
+        )
         items = capture_items("span")
 
         scope = sentry_sdk.get_current_scope()

From 3e5579506264719625225e62271ab612c57afdc8 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Fri, 17 Apr 2026 14:53:49 +0200
Subject: [PATCH 32/84] client changes

---
 sentry_sdk/client.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py
index 0d13b6db03..f8bc071545 100644
--- a/sentry_sdk/client.py
+++ b/sentry_sdk/client.py
@@ -1113,10 +1113,14 @@ def capture_event(
 
         envelope = Envelope(headers=headers)
 
-        if is_transaction:
+        if is_transaction and not self.options["_experiments"].get(
+            "gen_ai_as_v2_spans", False
+        ):
             if isinstance(profile, Profile):
                 envelope.add_profile(profile.to_json(event_opt, self.options))
 
+            envelope.add_transaction(event_opt)
+        elif is_transaction:
             split_spans = _split_gen_ai_spans(event_opt)
             if split_spans is None or not split_spans[1]:
                 envelope.add_transaction(event_opt)

From 6d1d7edce94a5c20be9d32470ca1a385c0d199be Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Fri, 17 Apr 2026 14:55:22 +0200
Subject: [PATCH 33/84] simplify client logic

---
 sentry_sdk/client.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py
index f8bc071545..87504c94b1 100644
--- a/sentry_sdk/client.py
+++ b/sentry_sdk/client.py
@@ -1113,12 +1113,12 @@ def capture_event(
 
         envelope = Envelope(headers=headers)
 
+        if is_transaction and isinstance(profile, Profile):
+            envelope.add_profile(profile.to_json(event_opt, self.options))
+
         if is_transaction and not self.options["_experiments"].get(
             "gen_ai_as_v2_spans", False
         ):
-            if isinstance(profile, Profile):
-                envelope.add_profile(profile.to_json(event_opt, self.options))
-
             envelope.add_transaction(event_opt)
         elif is_transaction:
             split_spans = _split_gen_ai_spans(event_opt)

From 6bf400680527c779dc13421df181daab2fb09e7e Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Fri, 17 Apr 2026 15:07:42 +0200
Subject: [PATCH 34/84] Revert "add experimental v2 option"

This reverts commit 6c5c812faa8879523fb4f90c650327a7f70a1d81.
---
 .../integrations/anthropic/test_anthropic.py  | 53 ------------
 .../google_genai/test_google_genai.py         | 37 --------
 .../huggingface_hub/test_huggingface_hub.py   |  8 --
 .../integrations/langchain/test_langchain.py  | 26 ------
 tests/integrations/litellm/test_litellm.py    | 28 ------
 tests/integrations/openai/test_openai.py      | 42 ---------
 .../openai_agents/test_openai_agents.py       | 32 -------
 .../pydantic_ai/test_pydantic_ai.py           | 85 -------------------
 8 files changed, 311 deletions(-)

diff --git a/tests/integrations/anthropic/test_anthropic.py b/tests/integrations/anthropic/test_anthropic.py
index aedab1578b..c7fc280b6c 100644
--- a/tests/integrations/anthropic/test_anthropic.py
+++ b/tests/integrations/anthropic/test_anthropic.py
@@ -97,7 +97,6 @@ def test_nonstreaming_create_message(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
@@ -172,7 +171,6 @@ async def test_nonstreaming_create_message_async(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     client = AsyncAnthropic(api_key="z")
@@ -289,7 +287,6 @@ def test_streaming_create_message(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -398,7 +395,6 @@ def test_streaming_create_message_close(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -502,7 +498,6 @@ def test_streaming_create_message_api_error(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -619,7 +614,6 @@ def test_stream_messages(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -729,7 +723,6 @@ def test_stream_messages_close(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -838,7 +831,6 @@ def test_stream_messages_api_error(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -961,7 +953,6 @@ async def test_streaming_create_message_async(
         traces_sample_rate=1.0,
         default_integrations=False,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1073,7 +1064,6 @@ async def test_streaming_create_message_async_close(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1180,7 +1170,6 @@ async def test_streaming_create_message_async_api_error(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1301,7 +1290,6 @@ async def test_stream_message_async(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1412,7 +1400,6 @@ async def test_stream_messages_async_api_error(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1523,7 +1510,6 @@ async def test_stream_messages_async_close(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1680,7 +1666,6 @@ def test_streaming_create_message_with_input_json_delta(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1830,7 +1815,6 @@ def test_stream_messages_with_input_json_delta(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1988,7 +1972,6 @@ async def test_streaming_create_message_with_input_json_delta_async(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -2146,7 +2129,6 @@ async def test_stream_message_with_input_json_delta_async(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -2206,7 +2188,6 @@ async def test_stream_message_with_input_json_delta_async(
 
 def test_exception_message_create(sentry_init, capture_items):
     sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
-    _experiments = ({"gen_ai_as_v2_spans": True},)
     items = capture_items("event", "transaction")
 
     client = Anthropic(api_key="z")
@@ -2229,7 +2210,6 @@ def test_exception_message_create(sentry_init, capture_items):
 
 def test_span_status_error(sentry_init, capture_items):
     sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
-    _experiments = ({"gen_ai_as_v2_spans": True},)
     items = capture_items("event", "span")
 
     with start_transaction(name="anthropic"):
@@ -2256,7 +2236,6 @@ def test_span_status_error(sentry_init, capture_items):
 @pytest.mark.asyncio
 async def test_span_status_error_async(sentry_init, capture_items):
     sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
-    _experiments = ({"gen_ai_as_v2_spans": True},)
     items = capture_items("event", "span")
 
     with start_transaction(name="anthropic"):
@@ -2283,7 +2262,6 @@ async def test_span_status_error_async(sentry_init, capture_items):
 @pytest.mark.asyncio
 async def test_exception_message_create_async(sentry_init, capture_items):
     sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
-    _experiments = ({"gen_ai_as_v2_spans": True},)
     items = capture_items("event", "transaction")
 
     client = AsyncAnthropic(api_key="z")
@@ -2308,7 +2286,6 @@ def test_span_origin(sentry_init, capture_items):
     sentry_init(
         integrations=[AnthropicIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -2339,7 +2316,6 @@ async def test_span_origin_async(sentry_init, capture_items):
     sentry_init(
         integrations=[AnthropicIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -2403,7 +2379,6 @@ def test_set_output_data_with_input_json_delta(sentry_init):
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with start_transaction(name="test"):
@@ -2454,7 +2429,6 @@ def test_anthropic_message_role_mapping(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -2501,7 +2475,6 @@ def test_anthropic_message_truncation(sentry_init, capture_items):
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -2552,7 +2525,6 @@ async def test_anthropic_message_truncation_async(sentry_init, capture_items):
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -2613,7 +2585,6 @@ def test_nonstreaming_create_message_with_system_prompt(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
@@ -2700,7 +2671,6 @@ async def test_nonstreaming_create_message_with_system_prompt_async(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     client = AsyncAnthropic(api_key="z")
@@ -2830,7 +2800,6 @@ def test_streaming_create_message_with_system_prompt(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -2961,7 +2930,6 @@ def test_stream_messages_with_system_prompt(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -3094,7 +3062,6 @@ async def test_stream_message_with_system_prompt_async(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -3227,7 +3194,6 @@ async def test_streaming_create_message_with_system_prompt_async(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -3303,7 +3269,6 @@ def test_system_prompt_with_complex_structure(sentry_init, capture_items):
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
@@ -3557,7 +3522,6 @@ def test_message_with_base64_image(sentry_init, capture_items):
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
@@ -3608,7 +3572,6 @@ def test_message_with_url_image(sentry_init, capture_items):
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
@@ -3652,7 +3615,6 @@ def test_message_with_file_image(sentry_init, capture_items):
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
@@ -3697,7 +3659,6 @@ def test_message_with_base64_pdf(sentry_init, capture_items):
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
@@ -3742,7 +3703,6 @@ def test_message_with_url_pdf(sentry_init, capture_items):
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
@@ -3786,7 +3746,6 @@ def test_message_with_file_document(sentry_init, capture_items):
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
@@ -3831,7 +3790,6 @@ def test_message_with_mixed_content(sentry_init, capture_items):
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
@@ -3914,7 +3872,6 @@ def test_message_with_multiple_images_different_formats(sentry_init, capture_ite
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
@@ -3989,7 +3946,6 @@ def test_binary_content_not_stored_when_pii_disabled(sentry_init, capture_items)
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=False,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
@@ -4028,7 +3984,6 @@ def test_binary_content_not_stored_when_prompts_disabled(sentry_init, capture_it
         integrations=[AnthropicIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
@@ -4064,7 +4019,6 @@ def test_binary_content_not_stored_when_prompts_disabled(sentry_init, capture_it
 def test_cache_tokens_nonstreaming(sentry_init, capture_items):
     """Test cache read/write tokens are tracked for non-streaming responses."""
     sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
-    _experiments = ({"gen_ai_as_v2_spans": True},)
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
 
@@ -4113,7 +4067,6 @@ def test_input_tokens_include_cache_write_nonstreaming(sentry_init, capture_item
               cache_creation_input_tokens=2846, cache_read_input_tokens=0)
     """
     sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
-    _experiments = ({"gen_ai_as_v2_spans": True},)
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
 
@@ -4162,7 +4115,6 @@ def test_input_tokens_include_cache_read_nonstreaming(sentry_init, capture_items
               cache_creation_input_tokens=0, cache_read_input_tokens=2846)
     """
     sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
-    _experiments = ({"gen_ai_as_v2_spans": True},)
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
 
@@ -4240,7 +4192,6 @@ def test_input_tokens_include_cache_read_streaming(
     )
 
     sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
-    _experiments = ({"gen_ai_as_v2_spans": True},)
     items = capture_items("transaction", "span")
 
     with mock.patch.object(
@@ -4307,7 +4258,6 @@ def test_stream_messages_input_tokens_include_cache_read_streaming(
     )
 
     sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
-    _experiments = ({"gen_ai_as_v2_spans": True},)
     items = capture_items("transaction", "span")
 
     with mock.patch.object(
@@ -4341,7 +4291,6 @@ def test_input_tokens_unchanged_without_caching(sentry_init, capture_items):
         Usage(input_tokens=20, output_tokens=12)
     """
     sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
-    _experiments = ({"gen_ai_as_v2_spans": True},)
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
 
@@ -4410,7 +4359,6 @@ def test_cache_tokens_streaming(
     )
 
     sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
-    _experiments = ({"gen_ai_as_v2_spans": True},)
     items = capture_items("transaction", "span")
 
     with mock.patch.object(
@@ -4471,7 +4419,6 @@ def test_stream_messages_cache_tokens(
     )
 
     sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
-    _experiments = ({"gen_ai_as_v2_spans": True},)
     items = capture_items("transaction", "span")
 
     with mock.patch.object(
diff --git a/tests/integrations/google_genai/test_google_genai.py b/tests/integrations/google_genai/test_google_genai.py
index ae31fe565b..e074b79c8c 100644
--- a/tests/integrations/google_genai/test_google_genai.py
+++ b/tests/integrations/google_genai/test_google_genai.py
@@ -130,7 +130,6 @@ def test_nonstreaming_generate_content(
         integrations=[GoogleGenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -220,7 +219,6 @@ def test_generate_content_with_system_instruction(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -264,7 +262,6 @@ def test_generate_content_with_tools(sentry_init, capture_items, mock_genai_clie
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -347,7 +344,6 @@ def test_tool_execution(sentry_init, capture_items):
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -384,7 +380,6 @@ def test_error_handling(sentry_init, capture_items, mock_genai_client):
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("event", "transaction")
 
@@ -416,7 +411,6 @@ def test_streaming_generate_content(sentry_init, capture_items, mock_genai_clien
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -533,7 +527,6 @@ def test_span_origin(sentry_init, capture_items, mock_genai_client):
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span", "transaction")
 
@@ -561,7 +554,6 @@ def test_response_without_usage_metadata(sentry_init, capture_items, mock_genai_
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -603,7 +595,6 @@ def test_multiple_candidates(sentry_init, capture_items, mock_genai_client):
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -668,7 +659,6 @@ def test_all_configuration_parameters(sentry_init, capture_items, mock_genai_cli
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -708,7 +698,6 @@ def test_empty_response(sentry_init, capture_items, mock_genai_client):
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -740,7 +729,6 @@ def test_response_with_different_id_fields(
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -783,7 +771,6 @@ def test_tool_with_async_function(sentry_init):
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Create an async tool function
@@ -806,7 +793,6 @@ def test_contents_as_none(sentry_init, capture_items, mock_genai_client):
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -833,7 +819,6 @@ def test_tool_calls_extraction(sentry_init, capture_items, mock_genai_client):
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -920,7 +905,6 @@ def test_google_genai_message_truncation(sentry_init, capture_items, mock_genai_
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -996,7 +980,6 @@ def test_embed_content(
         integrations=[GoogleGenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1058,7 +1041,6 @@ def test_embed_content_string_input(sentry_init, capture_items, mock_genai_clien
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1105,7 +1087,6 @@ def test_embed_content_error_handling(sentry_init, capture_items, mock_genai_cli
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "event")
 
@@ -1139,7 +1120,6 @@ def test_embed_content_without_statistics(
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1179,7 +1159,6 @@ def test_embed_content_span_origin(sentry_init, capture_items, mock_genai_client
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1220,7 +1199,6 @@ async def test_async_embed_content(
         integrations=[GoogleGenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1285,7 +1263,6 @@ async def test_async_embed_content_string_input(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1335,7 +1312,6 @@ async def test_async_embed_content_error_handling(
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "event")
 
@@ -1370,7 +1346,6 @@ async def test_async_embed_content_without_statistics(
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1413,7 +1388,6 @@ async def test_async_embed_content_span_origin(
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1445,7 +1419,6 @@ def test_generate_content_with_content_object(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1482,7 +1455,6 @@ def test_generate_content_with_dict_format(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1515,7 +1487,6 @@ def test_generate_content_with_file_data(sentry_init, capture_items, mock_genai_
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1565,7 +1536,6 @@ def test_generate_content_with_inline_data(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1611,7 +1581,6 @@ def test_generate_content_with_function_response(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1666,7 +1635,6 @@ def test_generate_content_with_mixed_string_and_content(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1710,7 +1678,6 @@ def test_generate_content_with_part_object_directly(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1749,7 +1716,6 @@ def test_generate_content_with_list_of_dicts(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1786,7 +1752,6 @@ def test_generate_content_with_dict_inline_data(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1831,7 +1796,6 @@ def test_generate_content_without_parts_property_inline_data(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1875,7 +1839,6 @@ def test_generate_content_without_parts_property_inline_data_and_binary_data_wit
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py
index 16c27b678d..98abbb00fa 100644
--- a/tests/integrations/huggingface_hub/test_huggingface_hub.py
+++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py
@@ -480,7 +480,6 @@ def test_text_generation(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
         integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)],
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -556,7 +555,6 @@ def test_text_generation_streaming(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
         integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)],
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -633,7 +631,6 @@ def test_chat_completion(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
         integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)],
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -712,7 +709,6 @@ def test_chat_completion_streaming(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
         integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)],
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -784,7 +780,6 @@ def test_chat_completion_api_error(
     sentry_init: "Any", capture_items: "Any", mock_hf_api_with_errors: "Any"
 ) -> None:
     sentry_init(traces_sample_rate=1.0)
-    _experiments = ({"gen_ai_as_v2_spans": True},)
     items = capture_items("event", "transaction", "span")
 
     client = get_hf_provider_inference_client()
@@ -844,7 +839,6 @@ def test_span_status_error(
     sentry_init: "Any", capture_items: "Any", mock_hf_api_with_errors: "Any"
 ) -> None:
     sentry_init(traces_sample_rate=1.0)
-    _experiments = ({"gen_ai_as_v2_spans": True},)
     items = capture_items("event", "transaction", "span")
 
     client = get_hf_provider_inference_client()
@@ -887,7 +881,6 @@ def test_chat_completion_with_tools(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
         integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)],
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -983,7 +976,6 @@ def test_chat_completion_streaming_with_tools(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
         integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)],
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py
index 5002d050b9..f709d12129 100644
--- a/tests/integrations/langchain/test_langchain.py
+++ b/tests/integrations/langchain/test_langchain.py
@@ -108,7 +108,6 @@ def test_langchain_text_completion(
         ],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -217,7 +216,6 @@ def test_langchain_create_agent(
         ],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -338,7 +336,6 @@ def test_tool_execution_span(
         ],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -531,7 +528,6 @@ def test_langchain_openai_tools_agent(
         ],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -869,7 +865,6 @@ def test_langchain_error(sentry_init, capture_items):
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("event", "transaction", "span")
 
@@ -908,7 +903,6 @@ def test_span_status_error(sentry_init, capture_items):
     sentry_init(
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("event", "transaction", "span")
 
@@ -994,7 +988,6 @@ def _identifying_params(self):
             return {}
 
     sentry_init(integrations=[LangchainIntegration()])
-    _experiments = ({"gen_ai_as_v2_spans": True},)
 
     # Create a manual SentryLangchainCallback
     manual_callback = SentryLangchainCallback(
@@ -1035,7 +1028,6 @@ def test_langchain_callback_manager(sentry_init):
     sentry_init(
         integrations=[LangchainIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     local_manager = BaseCallbackManager(handlers=[])
 
@@ -1068,7 +1060,6 @@ def test_langchain_callback_manager_with_sentry_callback(sentry_init):
     sentry_init(
         integrations=[LangchainIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     sentry_callback = SentryLangchainCallback(0, False)
     local_manager = BaseCallbackManager(handlers=[sentry_callback])
@@ -1101,7 +1092,6 @@ def test_langchain_callback_list(sentry_init):
     sentry_init(
         integrations=[LangchainIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     local_callbacks = []
 
@@ -1134,7 +1124,6 @@ def test_langchain_callback_list_existing_callback(sentry_init):
     sentry_init(
         integrations=[LangchainIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     sentry_callback = SentryLangchainCallback(0, False)
     local_callbacks = [sentry_callback]
@@ -1172,7 +1161,6 @@ def test_langchain_message_role_mapping(sentry_init, capture_items):
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1310,7 +1298,6 @@ def test_langchain_message_truncation(sentry_init, capture_items):
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1403,7 +1390,6 @@ def test_langchain_embeddings_sync(
         integrations=[LangchainIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1482,7 +1468,6 @@ def test_langchain_embeddings_embed_query(
         integrations=[LangchainIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1557,7 +1542,6 @@ async def test_langchain_embeddings_async(
         integrations=[LangchainIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1630,7 +1614,6 @@ async def test_langchain_embeddings_aembed_query(sentry_init, capture_items):
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1692,7 +1675,6 @@ def test_langchain_embeddings_no_model_name(sentry_init, capture_items):
     sentry_init(
         integrations=[LangchainIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1742,7 +1724,6 @@ def test_langchain_embeddings_integration_disabled(sentry_init, capture_items):
 
     # Initialize without LangchainIntegration
     sentry_init(traces_sample_rate=1.0)
-    _experiments = ({"gen_ai_as_v2_spans": True},)
     items = capture_items("transaction", "span")
 
     with mock.patch.object(
@@ -1779,7 +1760,6 @@ def test_langchain_embeddings_multiple_providers(sentry_init, capture_items):
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1837,7 +1817,6 @@ def test_langchain_embeddings_error_handling(sentry_init, capture_items):
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1878,7 +1857,6 @@ def test_langchain_embeddings_multiple_calls(sentry_init, capture_items):
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1942,7 +1920,6 @@ def test_langchain_embeddings_span_hierarchy(sentry_init, capture_items):
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1996,7 +1973,6 @@ def test_langchain_embeddings_with_list_and_string_inputs(sentry_init, capture_i
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -2061,7 +2037,6 @@ def test_langchain_response_model_extraction(
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -2367,7 +2342,6 @@ def test_langchain_ai_system_detection(
     sentry_init(
         integrations=[LangchainIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py
index b9365e7008..90807744e7 100644
--- a/tests/integrations/litellm/test_litellm.py
+++ b/tests/integrations/litellm/test_litellm.py
@@ -152,7 +152,6 @@ def test_nonstreaming_chat_completion(
         integrations=[LiteLLMIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -234,7 +233,6 @@ async def test_async_nonstreaming_chat_completion(
         integrations=[LiteLLMIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -318,7 +316,6 @@ def test_streaming_chat_completion(
         integrations=[LiteLLMIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -389,7 +386,6 @@ async def test_async_streaming_chat_completion(
         integrations=[LiteLLMIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -456,7 +452,6 @@ def test_embeddings_create(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -526,7 +521,6 @@ async def test_async_embeddings_create(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -591,7 +585,6 @@ def test_embeddings_create_with_list_input(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -654,7 +647,6 @@ async def test_async_embeddings_create_with_list_input(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -717,7 +709,6 @@ def test_embeddings_no_pii(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=False,  # PII disabled
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -774,7 +765,6 @@ async def test_async_embeddings_no_pii(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=False,  # PII disabled
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -825,7 +815,6 @@ def test_exception_handling(
     sentry_init(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("event")
 
@@ -864,7 +853,6 @@ async def test_async_exception_handling(
     sentry_init(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("event")
 
@@ -906,7 +894,6 @@ def test_span_origin(
     sentry_init(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -954,7 +941,6 @@ def test_multiple_providers(
     sentry_init(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction")
 
@@ -1050,7 +1036,6 @@ async def test_async_multiple_providers(
     sentry_init(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1147,7 +1132,6 @@ def test_additional_parameters(
     sentry_init(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1207,7 +1191,6 @@ async def test_async_additional_parameters(
     sentry_init(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1267,7 +1250,6 @@ def test_no_integration(
     """Test that when integration is not enabled, callbacks don't break."""
     sentry_init(
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1314,7 +1296,6 @@ async def test_async_no_integration(
     """Test that when integration is not enabled, callbacks don't break."""
     sentry_init(
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1357,7 +1338,6 @@ def test_response_without_usage(sentry_init, capture_items):
     sentry_init(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1399,7 +1379,6 @@ def test_integration_setup(sentry_init):
     sentry_init(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Check that callbacks are registered
@@ -1414,7 +1393,6 @@ def test_litellm_message_truncation(sentry_init, capture_items):
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1481,7 +1459,6 @@ def test_binary_content_encoding_image_url(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1561,7 +1538,6 @@ async def test_async_binary_content_encoding_image_url(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1642,7 +1618,6 @@ def test_binary_content_encoding_mixed_content(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1711,7 +1686,6 @@ async def test_async_binary_content_encoding_mixed_content(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1781,7 +1755,6 @@ def test_binary_content_encoding_uri_type(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1855,7 +1828,6 @@ async def test_async_binary_content_encoding_uri_type(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py
index 4c7df84b8b..e53f8e4f55 100644
--- a/tests/integrations/openai/test_openai.py
+++ b/tests/integrations/openai/test_openai.py
@@ -138,7 +138,6 @@ def test_nonstreaming_chat_completion_no_prompts(
         integrations=[OpenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -234,7 +233,6 @@ def test_nonstreaming_chat_completion(sentry_init, capture_items, messages, requ
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -314,7 +312,6 @@ async def test_nonstreaming_chat_completion_async_no_prompts(
         integrations=[OpenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -410,7 +407,6 @@ async def test_nonstreaming_chat_completion_async(
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -506,7 +502,6 @@ def test_streaming_chat_completion_no_prompts(
         ],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -626,7 +621,6 @@ def test_streaming_chat_completion_with_usage_in_stream(
         integrations=[OpenAIIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
         send_default_pii=False,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -707,7 +701,6 @@ def test_streaming_chat_completion_empty_content_preserves_token_usage(
         integrations=[OpenAIIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
         send_default_pii=False,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -771,7 +764,6 @@ async def test_streaming_chat_completion_empty_content_preserves_token_usage_asy
         integrations=[OpenAIIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
         send_default_pii=False,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -837,7 +829,6 @@ async def test_streaming_chat_completion_async_with_usage_in_stream(
         integrations=[OpenAIIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
         send_default_pii=False,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -966,7 +957,6 @@ def test_streaming_chat_completion(
         ],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1119,7 +1109,6 @@ async def test_streaming_chat_completion_async_no_prompts(
         ],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1291,7 +1280,6 @@ async def test_streaming_chat_completion_async(
         ],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1424,7 +1412,6 @@ async def test_streaming_chat_completion_async(
 
 def test_bad_chat_completion(sentry_init, capture_items):
     sentry_init(integrations=[OpenAIIntegration()], traces_sample_rate=1.0)
-    _experiments = ({"gen_ai_as_v2_spans": True},)
     items = capture_items("event")
 
     client = OpenAI(api_key="z")
@@ -1443,7 +1430,6 @@ def test_bad_chat_completion(sentry_init, capture_items):
 
 def test_span_status_error(sentry_init, capture_items):
     sentry_init(integrations=[OpenAIIntegration()], traces_sample_rate=1.0)
-    _experiments = ({"gen_ai_as_v2_spans": True},)
     items = capture_items("event", "transaction", "span")
 
     with start_transaction(name="test"):
@@ -1469,7 +1455,6 @@ def test_span_status_error(sentry_init, capture_items):
 @pytest.mark.asyncio
 async def test_bad_chat_completion_async(sentry_init, capture_items):
     sentry_init(integrations=[OpenAIIntegration()], traces_sample_rate=1.0)
-    _experiments = ({"gen_ai_as_v2_spans": True},)
     items = capture_items("event")
 
     client = AsyncOpenAI(api_key="z")
@@ -1500,7 +1485,6 @@ def test_embeddings_create_no_pii(
         integrations=[OpenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1583,7 +1567,6 @@ def test_embeddings_create(sentry_init, capture_items, input, request):
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1655,7 +1638,6 @@ async def test_embeddings_create_async_no_pii(
         integrations=[OpenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1739,7 +1721,6 @@ async def test_embeddings_create_async(sentry_init, capture_items, input, reques
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1808,7 +1789,6 @@ def test_embeddings_create_raises_error(
         integrations=[OpenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("event")
 
@@ -1837,7 +1817,6 @@ async def test_embeddings_create_raises_error_async(
         integrations=[OpenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("event")
 
@@ -1858,7 +1837,6 @@ def test_span_origin_nonstreaming_chat(sentry_init, capture_items):
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1882,7 +1860,6 @@ async def test_span_origin_nonstreaming_chat_async(sentry_init, capture_items):
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1905,7 +1882,6 @@ def test_span_origin_streaming_chat(sentry_init, capture_items):
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1969,7 +1945,6 @@ async def test_span_origin_streaming_chat_async(
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -2036,7 +2011,6 @@ def test_span_origin_embeddings(sentry_init, capture_items):
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -2068,7 +2042,6 @@ async def test_span_origin_embeddings_async(sentry_init, capture_items):
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -2462,7 +2435,6 @@ def test_ai_client_span_responses_api_no_pii(sentry_init, capture_items):
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -2585,7 +2557,6 @@ def test_ai_client_span_responses_api(
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -2796,7 +2767,6 @@ def test_error_in_responses_api(sentry_init, capture_items):
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("event", "transaction", "span")
 
@@ -2903,7 +2873,6 @@ async def test_ai_client_span_responses_async_api(
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -3189,7 +3158,6 @@ async def test_ai_client_span_streaming_responses_async_api(
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -3415,7 +3383,6 @@ async def test_error_in_responses_async_api(sentry_init, capture_items):
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("event", "transaction", "span")
 
@@ -3543,7 +3510,6 @@ def test_streaming_responses_api(
         ],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -3620,7 +3586,6 @@ async def test_streaming_responses_api_async(
         ],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -3684,7 +3649,6 @@ def test_empty_tools_in_chat_completion(sentry_init, capture_items, tools):
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -3727,7 +3691,6 @@ def test_openai_message_role_mapping(
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -3758,7 +3721,6 @@ def test_openai_message_truncation(sentry_init, capture_items):
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -3808,7 +3770,6 @@ def test_streaming_chat_completion_ttft(
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -3887,7 +3848,6 @@ async def test_streaming_chat_completion_ttft_async(
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -3964,7 +3924,6 @@ def test_streaming_responses_api_ttft(
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -4014,7 +3973,6 @@ async def test_streaming_responses_api_ttft_async(
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py
index 9e74848a04..294812b0ca 100644
--- a/tests/integrations/openai_agents/test_openai_agents.py
+++ b/tests/integrations/openai_agents/test_openai_agents.py
@@ -182,7 +182,6 @@ async def test_agent_invocation_span_no_pii(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=False,
-            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -340,7 +339,6 @@ async def test_agent_invocation_span(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=True,
-            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -525,7 +523,6 @@ async def test_client_span_custom_model(
         sentry_init(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
-            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span")
@@ -573,7 +570,6 @@ def test_agent_invocation_span_sync_no_pii(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=False,
-            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -725,7 +721,6 @@ def test_agent_invocation_span_sync(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=True,
-            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -968,7 +963,6 @@ async def test_handoff_span(sentry_init, capture_items, get_model_response):
         sentry_init(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
-            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("transaction", "span")
@@ -1098,7 +1092,6 @@ async def test_max_turns_before_handoff_span(
         sentry_init(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
-            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("transaction", "span")
@@ -1197,7 +1190,6 @@ def simple_test_tool(message: str) -> str:
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=True,
-            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("transaction", "span")
@@ -1426,7 +1418,6 @@ async def test_hosted_mcp_tool_propagation_header_streamed(
         integrations=[OpenAIAgentsIntegration()],
         traces_sample_rate=1.0,
         release="d08ebdb9309e1b004c6f52202de58a09c2268e42",
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     request_headers = {}
@@ -1589,7 +1580,6 @@ async def test_hosted_mcp_tool_propagation_headers(
         integrations=[OpenAIAgentsIntegration()],
         traces_sample_rate=1.0,
         release="d08ebdb9309e1b004c6f52202de58a09c2268e42",
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     response = get_model_response(EXAMPLE_RESPONSE, serialize_pydantic=True)
@@ -1688,7 +1678,6 @@ def simple_test_tool(message: str) -> str:
                 integrations=[OpenAIAgentsIntegration()],
                 traces_sample_rate=1.0,
                 send_default_pii=True,
-                _experiments={"gen_ai_as_v2_spans": True},
             )
 
             items = capture_items("span", "transaction")
@@ -1737,7 +1726,6 @@ async def test_error_handling(sentry_init, capture_items, test_agent):
                     LoggingIntegration(event_level=logging.CRITICAL),
                 ],
                 traces_sample_rate=1.0,
-                _experiments={"gen_ai_as_v2_spans": True},
             )
 
             items = capture_items("event", "span", "transaction")
@@ -1803,7 +1791,6 @@ async def test_error_captures_input_data(sentry_init, capture_items, test_agent)
             ],
             traces_sample_rate=1.0,
             send_default_pii=True,
-            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("event", "span")
@@ -1848,7 +1835,6 @@ async def test_span_status_error(sentry_init, capture_items, test_agent):
                     LoggingIntegration(event_level=logging.CRITICAL),
                 ],
                 traces_sample_rate=1.0,
-                _experiments={"gen_ai_as_v2_spans": True},
             )
 
             items = capture_items("event", "transaction", "span")
@@ -1962,7 +1948,6 @@ async def test_mcp_tool_execution_spans(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=True,
-            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -2093,7 +2078,6 @@ async def test_mcp_tool_execution_with_error(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=True,
-            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -2218,7 +2202,6 @@ async def test_mcp_tool_execution_without_pii(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=False,  # PII disabled
-            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -2276,7 +2259,6 @@ async def test_multiple_agents_asyncio(
         sentry_init(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
-            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -2320,7 +2302,6 @@ def test_openai_agents_message_role_mapping(
         integrations=[OpenAIAgentsIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     get_response_kwargs = {"input": [test_message]}
@@ -2420,7 +2401,6 @@ def failing_tool(message: str) -> str:
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=True,
-            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -2518,7 +2498,6 @@ async def test_invoke_agent_span_includes_usage_data(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=True,
-            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -2612,7 +2591,6 @@ async def test_ai_client_span_includes_response_model(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=True,
-            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -2701,7 +2679,6 @@ async def test_ai_client_span_response_model_with_chat_completions(
         sentry_init(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
-            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -2824,7 +2801,6 @@ def calculator(a: int, b: int) -> int:
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=True,
-            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -2913,7 +2889,6 @@ async def test_invoke_agent_span_includes_response_model(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=True,
-            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -3048,7 +3023,6 @@ def calculator(a: int, b: int) -> int:
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=True,
-            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -3091,7 +3065,6 @@ def test_openai_agents_message_truncation(sentry_init, capture_items):
         integrations=[OpenAIAgentsIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     test_messages = [
@@ -3138,7 +3111,6 @@ async def test_streaming_span_update_captures_response_data(
         integrations=[OpenAIAgentsIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Create a mock streaming response object (similar to what we'd get from ResponseCompletedEvent)
@@ -3204,7 +3176,6 @@ async def test_streaming_ttft_on_chat_span(
     sentry_init(
         integrations=[OpenAIAgentsIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     request_headers = {}
@@ -3359,7 +3330,6 @@ async def test_conversation_id_on_all_spans(
         sentry_init(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
-            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -3498,7 +3468,6 @@ def simple_tool(message: str) -> str:
         sentry_init(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
-            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -3562,7 +3531,6 @@ async def test_no_conversation_id_when_not_provided(
         sentry_init(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
-            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
diff --git a/tests/integrations/pydantic_ai/test_pydantic_ai.py b/tests/integrations/pydantic_ai/test_pydantic_ai.py
index bab2f6208d..fe34dd0f5d 100644
--- a/tests/integrations/pydantic_ai/test_pydantic_ai.py
+++ b/tests/integrations/pydantic_ai/test_pydantic_ai.py
@@ -61,7 +61,6 @@ async def test_agent_run_async(sentry_init, capture_items, get_test_agent):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -103,7 +102,6 @@ async def test_agent_run_async_model_error(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("event", "transaction", "span")
@@ -137,7 +135,6 @@ async def test_agent_run_async_usage_data(sentry_init, capture_items, get_test_a
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -182,7 +179,6 @@ def test_agent_run_sync(sentry_init, capture_items, get_test_agent):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -215,7 +211,6 @@ def test_agent_run_sync_model_error(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("event", "transaction", "span")
@@ -249,7 +244,6 @@ async def test_agent_run_stream(sentry_init, capture_items, get_test_agent):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -294,7 +288,6 @@ async def test_agent_run_stream_events(sentry_init, capture_items, get_test_agen
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -329,7 +322,6 @@ async def test_agent_with_tools(sentry_init, capture_items, get_test_agent):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     test_agent = get_test_agent()
@@ -395,7 +387,6 @@ async def test_agent_with_tool_model_retry(
         ],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     retries = 0
@@ -479,7 +470,6 @@ async def test_agent_with_tool_validation_error(
         ],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     test_agent = get_test_agent()
@@ -544,7 +534,6 @@ async def test_agent_with_tools_streaming(sentry_init, capture_items, get_test_a
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     test_agent = get_test_agent()
@@ -594,7 +583,6 @@ async def test_model_settings(sentry_init, capture_items, get_test_agent_with_se
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -643,7 +631,6 @@ async def test_system_prompt_attribute(
         integrations=[PydanticAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -689,7 +676,6 @@ async def test_error_handling(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -714,7 +700,6 @@ async def test_without_pii(sentry_init, capture_items, get_test_agent):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=False,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -744,7 +729,6 @@ async def test_without_pii_tools(sentry_init, capture_items, get_test_agent):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=False,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     test_agent = get_test_agent()
@@ -781,7 +765,6 @@ async def test_multiple_agents_concurrent(sentry_init, capture_items, get_test_a
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -816,7 +799,6 @@ async def test_message_history(sentry_init, capture_items):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -866,7 +848,6 @@ async def test_gen_ai_system(sentry_init, capture_items, get_test_agent):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -897,7 +878,6 @@ async def test_include_prompts_false(sentry_init, capture_items, get_test_agent)
         integrations=[PydanticAIIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
         send_default_pii=True,  # Even with PII enabled, prompts should not be captured
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -927,7 +907,6 @@ async def test_include_prompts_true(sentry_init, capture_items, get_test_agent):
         integrations=[PydanticAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -959,7 +938,6 @@ async def test_include_prompts_false_with_tools(
         integrations=[PydanticAIIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     test_agent = get_test_agent()
@@ -997,7 +975,6 @@ async def test_include_prompts_requires_pii(sentry_init, capture_items, get_test
         integrations=[PydanticAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=False,  # PII disabled
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -1089,7 +1066,6 @@ async def mock_map_tool_result_part(part):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -1159,7 +1135,6 @@ async def test_context_cleanup_after_run(sentry_init, get_test_agent):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Verify context is not set before run
@@ -1183,7 +1158,6 @@ def test_context_cleanup_after_run_sync(sentry_init, get_test_agent):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Verify context is not set before run
@@ -1208,7 +1182,6 @@ async def test_context_cleanup_after_streaming(sentry_init, get_test_agent):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Verify context is not set before run
@@ -1235,7 +1208,6 @@ async def test_context_cleanup_on_error(sentry_init, get_test_agent):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     test_agent = get_test_agent()
@@ -1270,7 +1242,6 @@ async def test_context_isolation_concurrent_agents(sentry_init, get_test_agent):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Create a second agent
@@ -1326,7 +1297,6 @@ async def test_invoke_agent_with_list_user_prompt(sentry_init, capture_items):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -1377,7 +1347,6 @@ async def test_invoke_agent_with_instructions(
         integrations=[PydanticAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -1417,7 +1386,6 @@ async def test_model_name_extraction_with_callable(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Test the utility function directly
@@ -1444,7 +1412,6 @@ async def test_model_name_extraction_fallback_to_str(sentry_init, capture_items)
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Test the utility function directly
@@ -1473,7 +1440,6 @@ async def test_model_settings_object_style(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -1509,7 +1475,6 @@ async def test_usage_data_partial(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -1543,7 +1508,6 @@ async def test_agent_data_from_scope(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -1566,7 +1530,6 @@ async def test_available_tools_without_description(
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     test_agent = get_test_agent()
@@ -1601,7 +1564,6 @@ async def test_output_with_tool_calls(sentry_init, capture_items, get_test_agent
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     test_agent = get_test_agent()
@@ -1647,7 +1609,6 @@ async def test_message_formatting_with_different_parts(sentry_init, capture_item
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -1696,7 +1657,6 @@ async def test_update_invoke_agent_span_with_none_output(sentry_init, capture_it
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -1724,7 +1684,6 @@ async def test_update_ai_client_span_with_none_response(sentry_init, capture_ite
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -1750,7 +1709,6 @@ async def test_agent_without_name(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -1776,7 +1734,6 @@ async def test_model_response_without_parts(sentry_init, capture_items):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -1807,7 +1764,6 @@ async def test_input_messages_error_handling(sentry_init, capture_items):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -1837,7 +1793,6 @@ async def test_available_tools_error_handling(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -1867,7 +1822,6 @@ async def test_set_usage_data_with_none_usage(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -1894,7 +1848,6 @@ async def test_set_usage_data_with_partial_fields(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -1936,7 +1889,6 @@ def test_tool(x: int) -> int:
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -1966,7 +1918,6 @@ async def test_message_parts_with_list_content(sentry_init, capture_items):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2003,7 +1954,6 @@ async def test_output_data_with_text_and_tool_calls(sentry_init, capture_items):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2043,7 +1993,6 @@ async def test_output_data_error_handling(sentry_init, capture_items):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2076,7 +2025,6 @@ async def test_message_with_system_prompt_part(sentry_init, capture_items):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2112,7 +2060,6 @@ async def test_message_with_instructions(sentry_init, capture_items):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2147,7 +2094,6 @@ async def test_set_input_messages_without_prompts(sentry_init, capture_items):
         integrations=[PydanticAIIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2176,7 +2122,6 @@ async def test_set_output_data_without_prompts(sentry_init, capture_items):
         integrations=[PydanticAIIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2204,7 +2149,6 @@ async def test_get_model_name_with_exception_in_callable(sentry_init, capture_it
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Create model with callable name that raises exception
@@ -2228,7 +2172,6 @@ async def test_get_model_name_with_string_model(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Pass a string as model
@@ -2248,7 +2191,6 @@ async def test_get_model_name_with_none(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Pass None
@@ -2270,7 +2212,6 @@ async def test_set_model_data_with_system(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2302,7 +2243,6 @@ async def test_set_model_data_from_agent_scope(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2336,7 +2276,6 @@ async def test_set_model_data_with_none_settings_values(sentry_init, capture_ite
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2369,7 +2308,6 @@ async def test_should_send_prompts_without_pii(sentry_init, capture_items):
         integrations=[PydanticAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=False,  # PII disabled
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Should return False
@@ -2388,7 +2326,6 @@ async def test_set_agent_data_without_agent(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2415,7 +2352,6 @@ async def test_set_agent_data_from_scope(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2448,7 +2384,6 @@ async def test_set_agent_data_without_name(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2479,7 +2414,6 @@ async def test_set_available_tools_without_toolset(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2510,7 +2444,6 @@ async def test_set_available_tools_with_schema(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2550,7 +2483,6 @@ async def test_execute_tool_span_creation(sentry_init, capture_items):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2577,7 +2509,6 @@ async def test_execute_tool_span_with_mcp_type(sentry_init, capture_items):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2605,7 +2536,6 @@ async def test_execute_tool_span_without_prompts(sentry_init, capture_items):
         integrations=[PydanticAIIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2630,7 +2560,6 @@ async def test_execute_tool_span_with_none_args(sentry_init, capture_items):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2654,7 +2583,6 @@ async def test_update_execute_tool_span_with_none_span(sentry_init, capture_item
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Update with None span - should not raise
@@ -2679,7 +2607,6 @@ async def test_update_execute_tool_span_with_none_result(sentry_init, capture_it
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2703,7 +2630,6 @@ async def test_tool_execution_without_span_context(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Create a simple agent with no tools (won't have function_toolset)
@@ -2735,7 +2661,6 @@ async def test_invoke_agent_span_with_callable_instruction(sentry_init, capture_
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2769,7 +2694,6 @@ async def test_invoke_agent_span_with_string_instructions(sentry_init, capture_i
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2798,7 +2722,6 @@ async def test_ai_client_span_with_streaming_flag(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2826,7 +2749,6 @@ async def test_ai_client_span_gets_agent_from_scope(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2875,7 +2797,6 @@ async def test_binary_content_encoding_image(sentry_init, capture_items):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -2906,7 +2827,6 @@ async def test_binary_content_encoding_mixed_content(sentry_init, capture_items)
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -2950,7 +2870,6 @@ async def test_binary_content_in_agent_run(sentry_init, capture_items):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -2975,7 +2894,6 @@ async def test_binary_content_in_agent_run(sentry_init, capture_items):
 async def test_set_usage_data_with_cache_tokens(sentry_init, capture_items):
     """Test that cache_read_tokens and cache_write_tokens are tracked."""
     sentry_init(integrations=[PydanticAIIntegration()], traces_sample_rate=1.0)
-    _experiments = ({"gen_ai_as_v2_spans": True},)
 
     items = capture_items("transaction", "span")
 
@@ -3046,7 +2964,6 @@ def test_image_url_base64_content_in_span(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -3117,7 +3034,6 @@ async def test_invoke_agent_image_url(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     agent = Agent("test", name="test_image_url_agent")
@@ -3165,7 +3081,6 @@ def multiply_numbers(a: int, b: int) -> int:
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")

From 700e8a17934b20734797472a9270e054b8c1bb90 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Fri, 17 Apr 2026 15:09:05 +0200
Subject: [PATCH 35/84] retry adding experimental option to tests

---
 .../integrations/anthropic/test_anthropic.py  | 113 ++++++++++++++++--
 .../google_genai/test_google_genai.py         |  37 ++++++
 .../huggingface_hub/test_huggingface_hub.py   |  10 +-
 .../integrations/langchain/test_langchain.py  |  30 ++++-
 tests/integrations/litellm/test_litellm.py    |  28 +++++
 tests/integrations/openai/test_openai.py      |  57 ++++++++-
 .../openai_agents/test_openai_agents.py       |  32 +++++
 .../pydantic_ai/test_pydantic_ai.py           |  90 +++++++++++++-
 8 files changed, 377 insertions(+), 20 deletions(-)

diff --git a/tests/integrations/anthropic/test_anthropic.py b/tests/integrations/anthropic/test_anthropic.py
index c7fc280b6c..b19cca9347 100644
--- a/tests/integrations/anthropic/test_anthropic.py
+++ b/tests/integrations/anthropic/test_anthropic.py
@@ -97,6 +97,7 @@ def test_nonstreaming_create_message(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
@@ -171,6 +172,7 @@ async def test_nonstreaming_create_message_async(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     client = AsyncAnthropic(api_key="z")
@@ -287,6 +289,7 @@ def test_streaming_create_message(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -395,6 +398,7 @@ def test_streaming_create_message_close(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -498,6 +502,7 @@ def test_streaming_create_message_api_error(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -614,6 +619,7 @@ def test_stream_messages(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -723,6 +729,7 @@ def test_stream_messages_close(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -831,6 +838,7 @@ def test_stream_messages_api_error(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -953,6 +961,7 @@ async def test_streaming_create_message_async(
         traces_sample_rate=1.0,
         default_integrations=False,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1064,6 +1073,7 @@ async def test_streaming_create_message_async_close(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1170,6 +1180,7 @@ async def test_streaming_create_message_async_api_error(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1290,6 +1301,7 @@ async def test_stream_message_async(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1400,6 +1412,7 @@ async def test_stream_messages_async_api_error(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1510,6 +1523,7 @@ async def test_stream_messages_async_close(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1666,6 +1680,7 @@ def test_streaming_create_message_with_input_json_delta(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1815,6 +1830,7 @@ def test_stream_messages_with_input_json_delta(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1972,6 +1988,7 @@ async def test_streaming_create_message_with_input_json_delta_async(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -2129,6 +2146,7 @@ async def test_stream_message_with_input_json_delta_async(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -2187,7 +2205,11 @@ async def test_stream_message_with_input_json_delta_async(
 
 
 def test_exception_message_create(sentry_init, capture_items):
-    sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
+    sentry_init(
+        integrations=[AnthropicIntegration()],
+        traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
+    )
     items = capture_items("event", "transaction")
 
     client = Anthropic(api_key="z")
@@ -2209,7 +2231,11 @@ def test_exception_message_create(sentry_init, capture_items):
 
 
 def test_span_status_error(sentry_init, capture_items):
-    sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
+    sentry_init(
+        integrations=[AnthropicIntegration()],
+        traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
+    )
     items = capture_items("event", "span")
 
     with start_transaction(name="anthropic"):
@@ -2235,7 +2261,11 @@ def test_span_status_error(sentry_init, capture_items):
 
 @pytest.mark.asyncio
 async def test_span_status_error_async(sentry_init, capture_items):
-    sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
+    sentry_init(
+        integrations=[AnthropicIntegration()],
+        traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
+    )
     items = capture_items("event", "span")
 
     with start_transaction(name="anthropic"):
@@ -2261,7 +2291,11 @@ async def test_span_status_error_async(sentry_init, capture_items):
 
 @pytest.mark.asyncio
 async def test_exception_message_create_async(sentry_init, capture_items):
-    sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
+    sentry_init(
+        integrations=[AnthropicIntegration()],
+        traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
+    )
     items = capture_items("event", "transaction")
 
     client = AsyncAnthropic(api_key="z")
@@ -2286,6 +2320,7 @@ def test_span_origin(sentry_init, capture_items):
     sentry_init(
         integrations=[AnthropicIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -2316,6 +2351,7 @@ async def test_span_origin_async(sentry_init, capture_items):
     sentry_init(
         integrations=[AnthropicIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -2379,6 +2415,7 @@ def test_set_output_data_with_input_json_delta(sentry_init):
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with start_transaction(name="test"):
@@ -2429,6 +2466,7 @@ def test_anthropic_message_role_mapping(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -2475,6 +2513,7 @@ def test_anthropic_message_truncation(sentry_init, capture_items):
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -2525,6 +2564,7 @@ async def test_anthropic_message_truncation_async(sentry_init, capture_items):
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -2585,6 +2625,7 @@ def test_nonstreaming_create_message_with_system_prompt(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
@@ -2671,6 +2712,7 @@ async def test_nonstreaming_create_message_with_system_prompt_async(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     client = AsyncAnthropic(api_key="z")
@@ -2800,6 +2842,7 @@ def test_streaming_create_message_with_system_prompt(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -2930,6 +2973,7 @@ def test_stream_messages_with_system_prompt(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -3062,6 +3106,7 @@ async def test_stream_message_with_system_prompt_async(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -3194,6 +3239,7 @@ async def test_streaming_create_message_with_system_prompt_async(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -3269,6 +3315,7 @@ def test_system_prompt_with_complex_structure(sentry_init, capture_items):
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
@@ -3522,6 +3569,7 @@ def test_message_with_base64_image(sentry_init, capture_items):
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
@@ -3572,6 +3620,7 @@ def test_message_with_url_image(sentry_init, capture_items):
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
@@ -3615,6 +3664,7 @@ def test_message_with_file_image(sentry_init, capture_items):
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
@@ -3659,6 +3709,7 @@ def test_message_with_base64_pdf(sentry_init, capture_items):
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
@@ -3703,6 +3754,7 @@ def test_message_with_url_pdf(sentry_init, capture_items):
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
@@ -3746,6 +3798,7 @@ def test_message_with_file_document(sentry_init, capture_items):
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
@@ -3790,6 +3843,7 @@ def test_message_with_mixed_content(sentry_init, capture_items):
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
@@ -3872,6 +3926,7 @@ def test_message_with_multiple_images_different_formats(sentry_init, capture_ite
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
@@ -3946,6 +4001,7 @@ def test_binary_content_not_stored_when_pii_disabled(sentry_init, capture_items)
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=False,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
@@ -3984,6 +4040,7 @@ def test_binary_content_not_stored_when_prompts_disabled(sentry_init, capture_it
         integrations=[AnthropicIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
@@ -4018,7 +4075,11 @@ def test_binary_content_not_stored_when_prompts_disabled(sentry_init, capture_it
 
 def test_cache_tokens_nonstreaming(sentry_init, capture_items):
     """Test cache read/write tokens are tracked for non-streaming responses."""
-    sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
+    sentry_init(
+        integrations=[AnthropicIntegration()],
+        traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
+    )
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
 
@@ -4066,7 +4127,11 @@ def test_input_tokens_include_cache_write_nonstreaming(sentry_init, capture_item
         Usage(input_tokens=19, output_tokens=14,
               cache_creation_input_tokens=2846, cache_read_input_tokens=0)
     """
-    sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
+    sentry_init(
+        integrations=[AnthropicIntegration()],
+        traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
+    )
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
 
@@ -4114,7 +4179,11 @@ def test_input_tokens_include_cache_read_nonstreaming(sentry_init, capture_items
         Usage(input_tokens=19, output_tokens=14,
               cache_creation_input_tokens=0, cache_read_input_tokens=2846)
     """
-    sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
+    sentry_init(
+        integrations=[AnthropicIntegration()],
+        traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
+    )
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
 
@@ -4191,7 +4260,11 @@ def test_input_tokens_include_cache_read_streaming(
         )
     )
 
-    sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
+    sentry_init(
+        integrations=[AnthropicIntegration()],
+        traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
+    )
     items = capture_items("transaction", "span")
 
     with mock.patch.object(
@@ -4257,7 +4330,11 @@ def test_stream_messages_input_tokens_include_cache_read_streaming(
         )
     )
 
-    sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
+    sentry_init(
+        integrations=[AnthropicIntegration()],
+        traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
+    )
     items = capture_items("transaction", "span")
 
     with mock.patch.object(
@@ -4290,7 +4367,11 @@ def test_input_tokens_unchanged_without_caching(sentry_init, capture_items):
     Real Anthropic response (from E2E test, simple call without caching):
         Usage(input_tokens=20, output_tokens=12)
     """
-    sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
+    sentry_init(
+        integrations=[AnthropicIntegration()],
+        traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
+    )
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
 
@@ -4358,7 +4439,11 @@ def test_cache_tokens_streaming(
         )
     )
 
-    sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
+    sentry_init(
+        integrations=[AnthropicIntegration()],
+        traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
+    )
     items = capture_items("transaction", "span")
 
     with mock.patch.object(
@@ -4418,7 +4503,11 @@ def test_stream_messages_cache_tokens(
         )
     )
 
-    sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
+    sentry_init(
+        integrations=[AnthropicIntegration()],
+        traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
+    )
     items = capture_items("transaction", "span")
 
     with mock.patch.object(
diff --git a/tests/integrations/google_genai/test_google_genai.py b/tests/integrations/google_genai/test_google_genai.py
index e074b79c8c..ae31fe565b 100644
--- a/tests/integrations/google_genai/test_google_genai.py
+++ b/tests/integrations/google_genai/test_google_genai.py
@@ -130,6 +130,7 @@ def test_nonstreaming_generate_content(
         integrations=[GoogleGenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -219,6 +220,7 @@ def test_generate_content_with_system_instruction(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -262,6 +264,7 @@ def test_generate_content_with_tools(sentry_init, capture_items, mock_genai_clie
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -344,6 +347,7 @@ def test_tool_execution(sentry_init, capture_items):
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -380,6 +384,7 @@ def test_error_handling(sentry_init, capture_items, mock_genai_client):
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("event", "transaction")
 
@@ -411,6 +416,7 @@ def test_streaming_generate_content(sentry_init, capture_items, mock_genai_clien
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -527,6 +533,7 @@ def test_span_origin(sentry_init, capture_items, mock_genai_client):
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span", "transaction")
 
@@ -554,6 +561,7 @@ def test_response_without_usage_metadata(sentry_init, capture_items, mock_genai_
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -595,6 +603,7 @@ def test_multiple_candidates(sentry_init, capture_items, mock_genai_client):
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -659,6 +668,7 @@ def test_all_configuration_parameters(sentry_init, capture_items, mock_genai_cli
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -698,6 +708,7 @@ def test_empty_response(sentry_init, capture_items, mock_genai_client):
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -729,6 +740,7 @@ def test_response_with_different_id_fields(
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -771,6 +783,7 @@ def test_tool_with_async_function(sentry_init):
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Create an async tool function
@@ -793,6 +806,7 @@ def test_contents_as_none(sentry_init, capture_items, mock_genai_client):
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -819,6 +833,7 @@ def test_tool_calls_extraction(sentry_init, capture_items, mock_genai_client):
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -905,6 +920,7 @@ def test_google_genai_message_truncation(sentry_init, capture_items, mock_genai_
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -980,6 +996,7 @@ def test_embed_content(
         integrations=[GoogleGenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1041,6 +1058,7 @@ def test_embed_content_string_input(sentry_init, capture_items, mock_genai_clien
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1087,6 +1105,7 @@ def test_embed_content_error_handling(sentry_init, capture_items, mock_genai_cli
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "event")
 
@@ -1120,6 +1139,7 @@ def test_embed_content_without_statistics(
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1159,6 +1179,7 @@ def test_embed_content_span_origin(sentry_init, capture_items, mock_genai_client
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1199,6 +1220,7 @@ async def test_async_embed_content(
         integrations=[GoogleGenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1263,6 +1285,7 @@ async def test_async_embed_content_string_input(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1312,6 +1335,7 @@ async def test_async_embed_content_error_handling(
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "event")
 
@@ -1346,6 +1370,7 @@ async def test_async_embed_content_without_statistics(
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1388,6 +1413,7 @@ async def test_async_embed_content_span_origin(
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1419,6 +1445,7 @@ def test_generate_content_with_content_object(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1455,6 +1482,7 @@ def test_generate_content_with_dict_format(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1487,6 +1515,7 @@ def test_generate_content_with_file_data(sentry_init, capture_items, mock_genai_
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1536,6 +1565,7 @@ def test_generate_content_with_inline_data(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1581,6 +1611,7 @@ def test_generate_content_with_function_response(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1635,6 +1666,7 @@ def test_generate_content_with_mixed_string_and_content(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1678,6 +1710,7 @@ def test_generate_content_with_part_object_directly(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1716,6 +1749,7 @@ def test_generate_content_with_list_of_dicts(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1752,6 +1786,7 @@ def test_generate_content_with_dict_inline_data(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1796,6 +1831,7 @@ def test_generate_content_without_parts_property_inline_data(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1839,6 +1875,7 @@ def test_generate_content_without_parts_property_inline_data_and_binary_data_wit
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py
index 98abbb00fa..eaac8c1ab1 100644
--- a/tests/integrations/huggingface_hub/test_huggingface_hub.py
+++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py
@@ -480,6 +480,7 @@ def test_text_generation(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
         integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)],
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -555,6 +556,7 @@ def test_text_generation_streaming(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
         integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)],
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -631,6 +633,7 @@ def test_chat_completion(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
         integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)],
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -709,6 +712,7 @@ def test_chat_completion_streaming(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
         integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)],
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -779,7 +783,7 @@ def test_chat_completion_streaming(
 def test_chat_completion_api_error(
     sentry_init: "Any", capture_items: "Any", mock_hf_api_with_errors: "Any"
 ) -> None:
-    sentry_init(traces_sample_rate=1.0)
+    sentry_init(traces_sample_rate=1.0, _experiments={"gen_ai_as_v2_spans": True})
     items = capture_items("event", "transaction", "span")
 
     client = get_hf_provider_inference_client()
@@ -838,7 +842,7 @@ def test_chat_completion_api_error(
 def test_span_status_error(
     sentry_init: "Any", capture_items: "Any", mock_hf_api_with_errors: "Any"
 ) -> None:
-    sentry_init(traces_sample_rate=1.0)
+    sentry_init(traces_sample_rate=1.0, _experiments={"gen_ai_as_v2_spans": True})
     items = capture_items("event", "transaction", "span")
 
     client = get_hf_provider_inference_client()
@@ -881,6 +885,7 @@ def test_chat_completion_with_tools(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
         integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)],
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -976,6 +981,7 @@ def test_chat_completion_streaming_with_tools(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
         integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)],
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py
index f709d12129..ef27d45767 100644
--- a/tests/integrations/langchain/test_langchain.py
+++ b/tests/integrations/langchain/test_langchain.py
@@ -108,6 +108,7 @@ def test_langchain_text_completion(
         ],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -216,6 +217,7 @@ def test_langchain_create_agent(
         ],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -336,6 +338,7 @@ def test_tool_execution_span(
         ],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -528,6 +531,7 @@ def test_langchain_openai_tools_agent(
         ],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -865,6 +869,7 @@ def test_langchain_error(sentry_init, capture_items):
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("event", "transaction", "span")
 
@@ -903,6 +908,7 @@ def test_span_status_error(sentry_init, capture_items):
     sentry_init(
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("event", "transaction", "span")
 
@@ -987,7 +993,9 @@ def _llm_type(self):
         def _identifying_params(self):
             return {}
 
-    sentry_init(integrations=[LangchainIntegration()])
+    sentry_init(
+        integrations=[LangchainIntegration()], _experiments={"gen_ai_as_v2_spans": True}
+    )
 
     # Create a manual SentryLangchainCallback
     manual_callback = SentryLangchainCallback(
@@ -1028,6 +1036,7 @@ def test_langchain_callback_manager(sentry_init):
     sentry_init(
         integrations=[LangchainIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     local_manager = BaseCallbackManager(handlers=[])
 
@@ -1060,6 +1069,7 @@ def test_langchain_callback_manager_with_sentry_callback(sentry_init):
     sentry_init(
         integrations=[LangchainIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     sentry_callback = SentryLangchainCallback(0, False)
     local_manager = BaseCallbackManager(handlers=[sentry_callback])
@@ -1092,6 +1102,7 @@ def test_langchain_callback_list(sentry_init):
     sentry_init(
         integrations=[LangchainIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     local_callbacks = []
 
@@ -1124,6 +1135,7 @@ def test_langchain_callback_list_existing_callback(sentry_init):
     sentry_init(
         integrations=[LangchainIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     sentry_callback = SentryLangchainCallback(0, False)
     local_callbacks = [sentry_callback]
@@ -1161,6 +1173,7 @@ def test_langchain_message_role_mapping(sentry_init, capture_items):
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1298,6 +1311,7 @@ def test_langchain_message_truncation(sentry_init, capture_items):
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1390,6 +1404,7 @@ def test_langchain_embeddings_sync(
         integrations=[LangchainIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1468,6 +1483,7 @@ def test_langchain_embeddings_embed_query(
         integrations=[LangchainIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1542,6 +1558,7 @@ async def test_langchain_embeddings_async(
         integrations=[LangchainIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1614,6 +1631,7 @@ async def test_langchain_embeddings_aembed_query(sentry_init, capture_items):
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1675,6 +1693,7 @@ def test_langchain_embeddings_no_model_name(sentry_init, capture_items):
     sentry_init(
         integrations=[LangchainIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1723,7 +1742,7 @@ def test_langchain_embeddings_integration_disabled(sentry_init, capture_items):
         pytest.skip("langchain_openai not installed")
 
     # Initialize without LangchainIntegration
-    sentry_init(traces_sample_rate=1.0)
+    sentry_init(traces_sample_rate=1.0, _experiments={"gen_ai_as_v2_spans": True})
     items = capture_items("transaction", "span")
 
     with mock.patch.object(
@@ -1760,6 +1779,7 @@ def test_langchain_embeddings_multiple_providers(sentry_init, capture_items):
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1817,6 +1837,7 @@ def test_langchain_embeddings_error_handling(sentry_init, capture_items):
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1857,6 +1878,7 @@ def test_langchain_embeddings_multiple_calls(sentry_init, capture_items):
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1920,6 +1942,7 @@ def test_langchain_embeddings_span_hierarchy(sentry_init, capture_items):
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1973,6 +1996,7 @@ def test_langchain_embeddings_with_list_and_string_inputs(sentry_init, capture_i
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -2037,6 +2061,7 @@ def test_langchain_response_model_extraction(
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -2342,6 +2367,7 @@ def test_langchain_ai_system_detection(
     sentry_init(
         integrations=[LangchainIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py
index 90807744e7..b9365e7008 100644
--- a/tests/integrations/litellm/test_litellm.py
+++ b/tests/integrations/litellm/test_litellm.py
@@ -152,6 +152,7 @@ def test_nonstreaming_chat_completion(
         integrations=[LiteLLMIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -233,6 +234,7 @@ async def test_async_nonstreaming_chat_completion(
         integrations=[LiteLLMIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -316,6 +318,7 @@ def test_streaming_chat_completion(
         integrations=[LiteLLMIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -386,6 +389,7 @@ async def test_async_streaming_chat_completion(
         integrations=[LiteLLMIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -452,6 +456,7 @@ def test_embeddings_create(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -521,6 +526,7 @@ async def test_async_embeddings_create(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -585,6 +591,7 @@ def test_embeddings_create_with_list_input(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -647,6 +654,7 @@ async def test_async_embeddings_create_with_list_input(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -709,6 +717,7 @@ def test_embeddings_no_pii(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=False,  # PII disabled
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -765,6 +774,7 @@ async def test_async_embeddings_no_pii(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=False,  # PII disabled
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -815,6 +825,7 @@ def test_exception_handling(
     sentry_init(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("event")
 
@@ -853,6 +864,7 @@ async def test_async_exception_handling(
     sentry_init(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("event")
 
@@ -894,6 +906,7 @@ def test_span_origin(
     sentry_init(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -941,6 +954,7 @@ def test_multiple_providers(
     sentry_init(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction")
 
@@ -1036,6 +1050,7 @@ async def test_async_multiple_providers(
     sentry_init(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1132,6 +1147,7 @@ def test_additional_parameters(
     sentry_init(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1191,6 +1207,7 @@ async def test_async_additional_parameters(
     sentry_init(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1250,6 +1267,7 @@ def test_no_integration(
     """Test that when integration is not enabled, callbacks don't break."""
     sentry_init(
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1296,6 +1314,7 @@ async def test_async_no_integration(
     """Test that when integration is not enabled, callbacks don't break."""
     sentry_init(
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1338,6 +1357,7 @@ def test_response_without_usage(sentry_init, capture_items):
     sentry_init(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1379,6 +1399,7 @@ def test_integration_setup(sentry_init):
     sentry_init(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Check that callbacks are registered
@@ -1393,6 +1414,7 @@ def test_litellm_message_truncation(sentry_init, capture_items):
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1459,6 +1481,7 @@ def test_binary_content_encoding_image_url(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1538,6 +1561,7 @@ async def test_async_binary_content_encoding_image_url(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1618,6 +1642,7 @@ def test_binary_content_encoding_mixed_content(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1686,6 +1711,7 @@ async def test_async_binary_content_encoding_mixed_content(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1755,6 +1781,7 @@ def test_binary_content_encoding_uri_type(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1828,6 +1855,7 @@ async def test_async_binary_content_encoding_uri_type(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py
index e53f8e4f55..c4d77db5c8 100644
--- a/tests/integrations/openai/test_openai.py
+++ b/tests/integrations/openai/test_openai.py
@@ -138,6 +138,7 @@ def test_nonstreaming_chat_completion_no_prompts(
         integrations=[OpenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -233,6 +234,7 @@ def test_nonstreaming_chat_completion(sentry_init, capture_items, messages, requ
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -312,6 +314,7 @@ async def test_nonstreaming_chat_completion_async_no_prompts(
         integrations=[OpenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -407,6 +410,7 @@ async def test_nonstreaming_chat_completion_async(
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -502,6 +506,7 @@ def test_streaming_chat_completion_no_prompts(
         ],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -621,6 +626,7 @@ def test_streaming_chat_completion_with_usage_in_stream(
         integrations=[OpenAIIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
         send_default_pii=False,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -701,6 +707,7 @@ def test_streaming_chat_completion_empty_content_preserves_token_usage(
         integrations=[OpenAIIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
         send_default_pii=False,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -764,6 +771,7 @@ async def test_streaming_chat_completion_empty_content_preserves_token_usage_asy
         integrations=[OpenAIIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
         send_default_pii=False,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -829,6 +837,7 @@ async def test_streaming_chat_completion_async_with_usage_in_stream(
         integrations=[OpenAIIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
         send_default_pii=False,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -957,6 +966,7 @@ def test_streaming_chat_completion(
         ],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1109,6 +1119,7 @@ async def test_streaming_chat_completion_async_no_prompts(
         ],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1280,6 +1291,7 @@ async def test_streaming_chat_completion_async(
         ],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1411,7 +1423,11 @@ async def test_streaming_chat_completion_async(
 
 
 def test_bad_chat_completion(sentry_init, capture_items):
-    sentry_init(integrations=[OpenAIIntegration()], traces_sample_rate=1.0)
+    sentry_init(
+        integrations=[OpenAIIntegration()],
+        traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
+    )
     items = capture_items("event")
 
     client = OpenAI(api_key="z")
@@ -1429,7 +1445,11 @@ def test_bad_chat_completion(sentry_init, capture_items):
 
 
 def test_span_status_error(sentry_init, capture_items):
-    sentry_init(integrations=[OpenAIIntegration()], traces_sample_rate=1.0)
+    sentry_init(
+        integrations=[OpenAIIntegration()],
+        traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
+    )
     items = capture_items("event", "transaction", "span")
 
     with start_transaction(name="test"):
@@ -1454,7 +1474,11 @@ def test_span_status_error(sentry_init, capture_items):
 
 @pytest.mark.asyncio
 async def test_bad_chat_completion_async(sentry_init, capture_items):
-    sentry_init(integrations=[OpenAIIntegration()], traces_sample_rate=1.0)
+    sentry_init(
+        integrations=[OpenAIIntegration()],
+        traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
+    )
     items = capture_items("event")
 
     client = AsyncOpenAI(api_key="z")
@@ -1485,6 +1509,7 @@ def test_embeddings_create_no_pii(
         integrations=[OpenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1567,6 +1592,7 @@ def test_embeddings_create(sentry_init, capture_items, input, request):
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1638,6 +1664,7 @@ async def test_embeddings_create_async_no_pii(
         integrations=[OpenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1721,6 +1748,7 @@ async def test_embeddings_create_async(sentry_init, capture_items, input, reques
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1789,6 +1817,7 @@ def test_embeddings_create_raises_error(
         integrations=[OpenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("event")
 
@@ -1817,6 +1846,7 @@ async def test_embeddings_create_raises_error_async(
         integrations=[OpenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("event")
 
@@ -1837,6 +1867,7 @@ def test_span_origin_nonstreaming_chat(sentry_init, capture_items):
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1860,6 +1891,7 @@ async def test_span_origin_nonstreaming_chat_async(sentry_init, capture_items):
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1882,6 +1914,7 @@ def test_span_origin_streaming_chat(sentry_init, capture_items):
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1945,6 +1978,7 @@ async def test_span_origin_streaming_chat_async(
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -2011,6 +2045,7 @@ def test_span_origin_embeddings(sentry_init, capture_items):
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -2042,6 +2077,7 @@ async def test_span_origin_embeddings_async(sentry_init, capture_items):
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -2435,6 +2471,7 @@ def test_ai_client_span_responses_api_no_pii(sentry_init, capture_items):
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -2557,6 +2594,7 @@ def test_ai_client_span_responses_api(
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -2767,6 +2805,7 @@ def test_error_in_responses_api(sentry_init, capture_items):
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("event", "transaction", "span")
 
@@ -2873,6 +2912,7 @@ async def test_ai_client_span_responses_async_api(
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -3158,6 +3198,7 @@ async def test_ai_client_span_streaming_responses_async_api(
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -3383,6 +3424,7 @@ async def test_error_in_responses_async_api(sentry_init, capture_items):
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("event", "transaction", "span")
 
@@ -3510,6 +3552,7 @@ def test_streaming_responses_api(
         ],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -3586,6 +3629,7 @@ async def test_streaming_responses_api_async(
         ],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -3649,6 +3693,7 @@ def test_empty_tools_in_chat_completion(sentry_init, capture_items, tools):
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -3691,6 +3736,7 @@ def test_openai_message_role_mapping(
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -3721,6 +3767,7 @@ def test_openai_message_truncation(sentry_init, capture_items):
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -3770,6 +3817,7 @@ def test_streaming_chat_completion_ttft(
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -3848,6 +3896,7 @@ async def test_streaming_chat_completion_ttft_async(
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -3924,6 +3973,7 @@ def test_streaming_responses_api_ttft(
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -3973,6 +4023,7 @@ async def test_streaming_responses_api_ttft_async(
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py
index 294812b0ca..9e74848a04 100644
--- a/tests/integrations/openai_agents/test_openai_agents.py
+++ b/tests/integrations/openai_agents/test_openai_agents.py
@@ -182,6 +182,7 @@ async def test_agent_invocation_span_no_pii(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=False,
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -339,6 +340,7 @@ async def test_agent_invocation_span(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=True,
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -523,6 +525,7 @@ async def test_client_span_custom_model(
         sentry_init(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span")
@@ -570,6 +573,7 @@ def test_agent_invocation_span_sync_no_pii(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=False,
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -721,6 +725,7 @@ def test_agent_invocation_span_sync(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=True,
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -963,6 +968,7 @@ async def test_handoff_span(sentry_init, capture_items, get_model_response):
         sentry_init(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("transaction", "span")
@@ -1092,6 +1098,7 @@ async def test_max_turns_before_handoff_span(
         sentry_init(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("transaction", "span")
@@ -1190,6 +1197,7 @@ def simple_test_tool(message: str) -> str:
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=True,
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("transaction", "span")
@@ -1418,6 +1426,7 @@ async def test_hosted_mcp_tool_propagation_header_streamed(
         integrations=[OpenAIAgentsIntegration()],
         traces_sample_rate=1.0,
         release="d08ebdb9309e1b004c6f52202de58a09c2268e42",
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     request_headers = {}
@@ -1580,6 +1589,7 @@ async def test_hosted_mcp_tool_propagation_headers(
         integrations=[OpenAIAgentsIntegration()],
         traces_sample_rate=1.0,
         release="d08ebdb9309e1b004c6f52202de58a09c2268e42",
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     response = get_model_response(EXAMPLE_RESPONSE, serialize_pydantic=True)
@@ -1678,6 +1688,7 @@ def simple_test_tool(message: str) -> str:
                 integrations=[OpenAIAgentsIntegration()],
                 traces_sample_rate=1.0,
                 send_default_pii=True,
+                _experiments={"gen_ai_as_v2_spans": True},
             )
 
             items = capture_items("span", "transaction")
@@ -1726,6 +1737,7 @@ async def test_error_handling(sentry_init, capture_items, test_agent):
                     LoggingIntegration(event_level=logging.CRITICAL),
                 ],
                 traces_sample_rate=1.0,
+                _experiments={"gen_ai_as_v2_spans": True},
             )
 
             items = capture_items("event", "span", "transaction")
@@ -1791,6 +1803,7 @@ async def test_error_captures_input_data(sentry_init, capture_items, test_agent)
             ],
             traces_sample_rate=1.0,
             send_default_pii=True,
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("event", "span")
@@ -1835,6 +1848,7 @@ async def test_span_status_error(sentry_init, capture_items, test_agent):
                     LoggingIntegration(event_level=logging.CRITICAL),
                 ],
                 traces_sample_rate=1.0,
+                _experiments={"gen_ai_as_v2_spans": True},
             )
 
             items = capture_items("event", "transaction", "span")
@@ -1948,6 +1962,7 @@ async def test_mcp_tool_execution_spans(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=True,
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -2078,6 +2093,7 @@ async def test_mcp_tool_execution_with_error(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=True,
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -2202,6 +2218,7 @@ async def test_mcp_tool_execution_without_pii(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=False,  # PII disabled
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -2259,6 +2276,7 @@ async def test_multiple_agents_asyncio(
         sentry_init(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -2302,6 +2320,7 @@ def test_openai_agents_message_role_mapping(
         integrations=[OpenAIAgentsIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     get_response_kwargs = {"input": [test_message]}
@@ -2401,6 +2420,7 @@ def failing_tool(message: str) -> str:
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=True,
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -2498,6 +2518,7 @@ async def test_invoke_agent_span_includes_usage_data(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=True,
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -2591,6 +2612,7 @@ async def test_ai_client_span_includes_response_model(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=True,
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -2679,6 +2701,7 @@ async def test_ai_client_span_response_model_with_chat_completions(
         sentry_init(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -2801,6 +2824,7 @@ def calculator(a: int, b: int) -> int:
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=True,
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -2889,6 +2913,7 @@ async def test_invoke_agent_span_includes_response_model(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=True,
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -3023,6 +3048,7 @@ def calculator(a: int, b: int) -> int:
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=True,
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -3065,6 +3091,7 @@ def test_openai_agents_message_truncation(sentry_init, capture_items):
         integrations=[OpenAIAgentsIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     test_messages = [
@@ -3111,6 +3138,7 @@ async def test_streaming_span_update_captures_response_data(
         integrations=[OpenAIAgentsIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Create a mock streaming response object (similar to what we'd get from ResponseCompletedEvent)
@@ -3176,6 +3204,7 @@ async def test_streaming_ttft_on_chat_span(
     sentry_init(
         integrations=[OpenAIAgentsIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     request_headers = {}
@@ -3330,6 +3359,7 @@ async def test_conversation_id_on_all_spans(
         sentry_init(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -3468,6 +3498,7 @@ def simple_tool(message: str) -> str:
         sentry_init(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -3531,6 +3562,7 @@ async def test_no_conversation_id_when_not_provided(
         sentry_init(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
+            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
diff --git a/tests/integrations/pydantic_ai/test_pydantic_ai.py b/tests/integrations/pydantic_ai/test_pydantic_ai.py
index fe34dd0f5d..9faccb0a84 100644
--- a/tests/integrations/pydantic_ai/test_pydantic_ai.py
+++ b/tests/integrations/pydantic_ai/test_pydantic_ai.py
@@ -61,6 +61,7 @@ async def test_agent_run_async(sentry_init, capture_items, get_test_agent):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -102,6 +103,7 @@ async def test_agent_run_async_model_error(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("event", "transaction", "span")
@@ -135,6 +137,7 @@ async def test_agent_run_async_usage_data(sentry_init, capture_items, get_test_a
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -179,6 +182,7 @@ def test_agent_run_sync(sentry_init, capture_items, get_test_agent):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -211,6 +215,7 @@ def test_agent_run_sync_model_error(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("event", "transaction", "span")
@@ -244,6 +249,7 @@ async def test_agent_run_stream(sentry_init, capture_items, get_test_agent):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -288,6 +294,7 @@ async def test_agent_run_stream_events(sentry_init, capture_items, get_test_agen
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -322,6 +329,7 @@ async def test_agent_with_tools(sentry_init, capture_items, get_test_agent):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     test_agent = get_test_agent()
@@ -387,6 +395,7 @@ async def test_agent_with_tool_model_retry(
         ],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     retries = 0
@@ -470,6 +479,7 @@ async def test_agent_with_tool_validation_error(
         ],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     test_agent = get_test_agent()
@@ -534,6 +544,7 @@ async def test_agent_with_tools_streaming(sentry_init, capture_items, get_test_a
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     test_agent = get_test_agent()
@@ -583,6 +594,7 @@ async def test_model_settings(sentry_init, capture_items, get_test_agent_with_se
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -631,6 +643,7 @@ async def test_system_prompt_attribute(
         integrations=[PydanticAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -676,6 +689,7 @@ async def test_error_handling(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -700,6 +714,7 @@ async def test_without_pii(sentry_init, capture_items, get_test_agent):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=False,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -729,6 +744,7 @@ async def test_without_pii_tools(sentry_init, capture_items, get_test_agent):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=False,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     test_agent = get_test_agent()
@@ -765,6 +781,7 @@ async def test_multiple_agents_concurrent(sentry_init, capture_items, get_test_a
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -799,6 +816,7 @@ async def test_message_history(sentry_init, capture_items):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -848,6 +866,7 @@ async def test_gen_ai_system(sentry_init, capture_items, get_test_agent):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -878,6 +897,7 @@ async def test_include_prompts_false(sentry_init, capture_items, get_test_agent)
         integrations=[PydanticAIIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
         send_default_pii=True,  # Even with PII enabled, prompts should not be captured
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -907,6 +927,7 @@ async def test_include_prompts_true(sentry_init, capture_items, get_test_agent):
         integrations=[PydanticAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -938,6 +959,7 @@ async def test_include_prompts_false_with_tools(
         integrations=[PydanticAIIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     test_agent = get_test_agent()
@@ -975,6 +997,7 @@ async def test_include_prompts_requires_pii(sentry_init, capture_items, get_test
         integrations=[PydanticAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=False,  # PII disabled
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -1066,6 +1089,7 @@ async def mock_map_tool_result_part(part):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -1135,6 +1159,7 @@ async def test_context_cleanup_after_run(sentry_init, get_test_agent):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Verify context is not set before run
@@ -1158,6 +1183,7 @@ def test_context_cleanup_after_run_sync(sentry_init, get_test_agent):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Verify context is not set before run
@@ -1182,6 +1208,7 @@ async def test_context_cleanup_after_streaming(sentry_init, get_test_agent):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Verify context is not set before run
@@ -1208,6 +1235,7 @@ async def test_context_cleanup_on_error(sentry_init, get_test_agent):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     test_agent = get_test_agent()
@@ -1242,6 +1270,7 @@ async def test_context_isolation_concurrent_agents(sentry_init, get_test_agent):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Create a second agent
@@ -1297,6 +1326,7 @@ async def test_invoke_agent_with_list_user_prompt(sentry_init, capture_items):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -1347,6 +1377,7 @@ async def test_invoke_agent_with_instructions(
         integrations=[PydanticAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -1386,6 +1417,7 @@ async def test_model_name_extraction_with_callable(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Test the utility function directly
@@ -1412,6 +1444,7 @@ async def test_model_name_extraction_fallback_to_str(sentry_init, capture_items)
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Test the utility function directly
@@ -1440,6 +1473,7 @@ async def test_model_settings_object_style(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -1475,6 +1509,7 @@ async def test_usage_data_partial(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -1508,6 +1543,7 @@ async def test_agent_data_from_scope(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -1530,6 +1566,7 @@ async def test_available_tools_without_description(
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     test_agent = get_test_agent()
@@ -1564,6 +1601,7 @@ async def test_output_with_tool_calls(sentry_init, capture_items, get_test_agent
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     test_agent = get_test_agent()
@@ -1609,6 +1647,7 @@ async def test_message_formatting_with_different_parts(sentry_init, capture_item
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -1657,6 +1696,7 @@ async def test_update_invoke_agent_span_with_none_output(sentry_init, capture_it
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -1684,6 +1724,7 @@ async def test_update_ai_client_span_with_none_response(sentry_init, capture_ite
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -1709,6 +1750,7 @@ async def test_agent_without_name(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -1734,6 +1776,7 @@ async def test_model_response_without_parts(sentry_init, capture_items):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -1764,6 +1807,7 @@ async def test_input_messages_error_handling(sentry_init, capture_items):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -1793,6 +1837,7 @@ async def test_available_tools_error_handling(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -1822,6 +1867,7 @@ async def test_set_usage_data_with_none_usage(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -1848,6 +1894,7 @@ async def test_set_usage_data_with_partial_fields(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -1889,6 +1936,7 @@ def test_tool(x: int) -> int:
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -1918,6 +1966,7 @@ async def test_message_parts_with_list_content(sentry_init, capture_items):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -1954,6 +2003,7 @@ async def test_output_data_with_text_and_tool_calls(sentry_init, capture_items):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -1993,6 +2043,7 @@ async def test_output_data_error_handling(sentry_init, capture_items):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2025,6 +2076,7 @@ async def test_message_with_system_prompt_part(sentry_init, capture_items):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2060,6 +2112,7 @@ async def test_message_with_instructions(sentry_init, capture_items):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2094,6 +2147,7 @@ async def test_set_input_messages_without_prompts(sentry_init, capture_items):
         integrations=[PydanticAIIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2122,6 +2176,7 @@ async def test_set_output_data_without_prompts(sentry_init, capture_items):
         integrations=[PydanticAIIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2149,6 +2204,7 @@ async def test_get_model_name_with_exception_in_callable(sentry_init, capture_it
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Create model with callable name that raises exception
@@ -2172,6 +2228,7 @@ async def test_get_model_name_with_string_model(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Pass a string as model
@@ -2191,6 +2248,7 @@ async def test_get_model_name_with_none(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Pass None
@@ -2212,6 +2270,7 @@ async def test_set_model_data_with_system(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2243,6 +2302,7 @@ async def test_set_model_data_from_agent_scope(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2276,6 +2336,7 @@ async def test_set_model_data_with_none_settings_values(sentry_init, capture_ite
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2308,6 +2369,7 @@ async def test_should_send_prompts_without_pii(sentry_init, capture_items):
         integrations=[PydanticAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=False,  # PII disabled
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Should return False
@@ -2326,6 +2388,7 @@ async def test_set_agent_data_without_agent(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2352,6 +2415,7 @@ async def test_set_agent_data_from_scope(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2384,6 +2448,7 @@ async def test_set_agent_data_without_name(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2414,6 +2479,7 @@ async def test_set_available_tools_without_toolset(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2444,6 +2510,7 @@ async def test_set_available_tools_with_schema(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2483,6 +2550,7 @@ async def test_execute_tool_span_creation(sentry_init, capture_items):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2509,6 +2577,7 @@ async def test_execute_tool_span_with_mcp_type(sentry_init, capture_items):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2536,6 +2605,7 @@ async def test_execute_tool_span_without_prompts(sentry_init, capture_items):
         integrations=[PydanticAIIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2560,6 +2630,7 @@ async def test_execute_tool_span_with_none_args(sentry_init, capture_items):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2583,6 +2654,7 @@ async def test_update_execute_tool_span_with_none_span(sentry_init, capture_item
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Update with None span - should not raise
@@ -2607,6 +2679,7 @@ async def test_update_execute_tool_span_with_none_result(sentry_init, capture_it
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2630,6 +2703,7 @@ async def test_tool_execution_without_span_context(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Create a simple agent with no tools (won't have function_toolset)
@@ -2661,6 +2735,7 @@ async def test_invoke_agent_span_with_callable_instruction(sentry_init, capture_
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2694,6 +2769,7 @@ async def test_invoke_agent_span_with_string_instructions(sentry_init, capture_i
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2722,6 +2798,7 @@ async def test_ai_client_span_with_streaming_flag(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2749,6 +2826,7 @@ async def test_ai_client_span_gets_agent_from_scope(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2797,6 +2875,7 @@ async def test_binary_content_encoding_image(sentry_init, capture_items):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -2827,6 +2906,7 @@ async def test_binary_content_encoding_mixed_content(sentry_init, capture_items)
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -2870,6 +2950,7 @@ async def test_binary_content_in_agent_run(sentry_init, capture_items):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -2893,7 +2974,11 @@ async def test_binary_content_in_agent_run(sentry_init, capture_items):
 @pytest.mark.asyncio
 async def test_set_usage_data_with_cache_tokens(sentry_init, capture_items):
     """Test that cache_read_tokens and cache_write_tokens are tracked."""
-    sentry_init(integrations=[PydanticAIIntegration()], traces_sample_rate=1.0)
+    sentry_init(
+        integrations=[PydanticAIIntegration()],
+        traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
+    )
 
     items = capture_items("transaction", "span")
 
@@ -2964,6 +3049,7 @@ def test_image_url_base64_content_in_span(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -3034,6 +3120,7 @@ async def test_invoke_agent_image_url(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     agent = Agent("test", name="test_image_url_agent")
@@ -3081,6 +3168,7 @@ def multiply_numbers(a: int, b: int) -> int:
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")

From 9b20bd24b001af3953f6d4094d6461eea2c58231 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Fri, 17 Apr 2026 15:17:29 +0200
Subject: [PATCH 36/84] add experimental option to langgraph tests

---
 tests/integrations/langgraph/test_langgraph.py | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/tests/integrations/langgraph/test_langgraph.py b/tests/integrations/langgraph/test_langgraph.py
index e1a3baa0a8..b70889548f 100644
--- a/tests/integrations/langgraph/test_langgraph.py
+++ b/tests/integrations/langgraph/test_langgraph.py
@@ -154,6 +154,7 @@ def test_state_graph_compile(
         integrations=[LanggraphIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     graph = MockStateGraph()
@@ -209,6 +210,7 @@ def test_pregel_invoke(sentry_init, capture_items, send_default_pii, include_pro
         integrations=[LanggraphIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -311,6 +313,7 @@ def test_pregel_ainvoke(sentry_init, capture_items, send_default_pii, include_pr
         integrations=[LanggraphIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     test_state = {"messages": [MockMessage("What's the weather like?", name="user")]}
@@ -391,6 +394,7 @@ def test_pregel_invoke_error(sentry_init, capture_items):
         integrations=[LanggraphIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     test_state = {"messages": [MockMessage("This will fail")]}
@@ -421,6 +425,7 @@ def test_pregel_ainvoke_error(sentry_init, capture_items):
         integrations=[LanggraphIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     test_state = {"messages": [MockMessage("This will fail async")]}
@@ -455,6 +460,7 @@ def test_span_origin(sentry_init, capture_items):
     sentry_init(
         integrations=[LanggraphIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -486,6 +492,7 @@ def test_pregel_invoke_with_different_graph_names(
         integrations=[LanggraphIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -529,6 +536,7 @@ def test_pregel_invoke_span_includes_usage_data(sentry_init, capture_items):
     sentry_init(
         integrations=[LanggraphIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -605,6 +613,7 @@ def test_pregel_ainvoke_span_includes_usage_data(sentry_init, capture_items):
     sentry_init(
         integrations=[LanggraphIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -684,6 +693,7 @@ def test_pregel_invoke_multiple_llm_calls_aggregate_usage(sentry_init, capture_i
     sentry_init(
         integrations=[LanggraphIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -765,6 +775,7 @@ def test_pregel_ainvoke_multiple_llm_calls_aggregate_usage(sentry_init, capture_
     sentry_init(
         integrations=[LanggraphIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -849,6 +860,7 @@ def test_pregel_invoke_span_includes_response_model(sentry_init, capture_items):
     sentry_init(
         integrations=[LanggraphIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -921,6 +933,7 @@ def test_pregel_ainvoke_span_includes_response_model(sentry_init, capture_items)
     sentry_init(
         integrations=[LanggraphIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -996,6 +1009,7 @@ def test_pregel_invoke_span_uses_last_response_model(sentry_init, capture_items)
     sentry_init(
         integrations=[LanggraphIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1080,6 +1094,7 @@ def test_pregel_ainvoke_span_uses_last_response_model(sentry_init, capture_items
     sentry_init(
         integrations=[LanggraphIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1212,6 +1227,7 @@ def test_extraction_functions_complex_scenario(sentry_init, capture_items):
         integrations=[LanggraphIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1287,6 +1303,7 @@ def test_langgraph_message_role_mapping(sentry_init, capture_items):
         integrations=[LanggraphIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1361,6 +1378,7 @@ def test_langgraph_message_truncation(sentry_init, capture_items):
         integrations=[LanggraphIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 

From 88fc76ebaaf757b6f79db84f25eb5a1f9d5c858e Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Mon, 20 Apr 2026 15:00:15 +0200
Subject: [PATCH 37/84] cleanup

---
 .../integrations/anthropic/test_anthropic.py  | 26 ++++++------
 .../google_genai/test_google_genai.py         | 10 +----
 .../integrations/langchain/test_langchain.py  |  4 +-
 tests/integrations/openai/test_openai.py      |  8 ++--
 .../openai_agents/test_openai_agents.py       | 40 +++++--------------
 .../pydantic_ai/test_pydantic_ai.py           |  1 +
 6 files changed, 31 insertions(+), 58 deletions(-)

diff --git a/tests/integrations/anthropic/test_anthropic.py b/tests/integrations/anthropic/test_anthropic.py
index b19cca9347..f38443bc94 100644
--- a/tests/integrations/anthropic/test_anthropic.py
+++ b/tests/integrations/anthropic/test_anthropic.py
@@ -3725,7 +3725,7 @@ def test_message_with_base64_pdf(sentry_init, capture_items):
                     "source": {
                         "type": "base64",
                         "media_type": "application/pdf",
-                        "attributes": "JVBERi0xLjQKJeLj...base64pdfdata",
+                        "data": "JVBERi0xLjQKJeLj...base64pdfdata",
                     },
                 },
             ],
@@ -3859,7 +3859,7 @@ def test_message_with_mixed_content(sentry_init, capture_items):
                     "source": {
                         "type": "base64",
                         "media_type": "image/png",
-                        "attributes": "iVBORw0KGgo...base64imagedata",
+                        "data": "iVBORw0KGgo...base64imagedata",
                     },
                 },
                 {
@@ -3874,7 +3874,7 @@ def test_message_with_mixed_content(sentry_init, capture_items):
                     "source": {
                         "type": "base64",
                         "media_type": "application/pdf",
-                        "attributes": "JVBERi0xLjQK...base64pdfdata",
+                        "data": "JVBERi0xLjQK...base64pdfdata",
                     },
                 },
                 {"type": "text", "text": "Please provide a detailed analysis."},
@@ -3941,7 +3941,7 @@ def test_message_with_multiple_images_different_formats(sentry_init, capture_ite
                     "source": {
                         "type": "base64",
                         "media_type": "image/jpeg",
-                        "attributes": "base64data1...",
+                        "data": "base64data1...",
                     },
                 },
                 {
@@ -4017,7 +4017,7 @@ def test_binary_content_not_stored_when_pii_disabled(sentry_init, capture_items)
                     "source": {
                         "type": "base64",
                         "media_type": "image/jpeg",
-                        "attributes": "base64encodeddatahere...",
+                        "data": "base64encodeddatahere...",
                     },
                 },
             ],
@@ -4056,7 +4056,7 @@ def test_binary_content_not_stored_when_prompts_disabled(sentry_init, capture_it
                     "source": {
                         "type": "base64",
                         "media_type": "image/jpeg",
-                        "attributes": "base64encodeddatahere...",
+                        "data": "base64encodeddatahere...",
                     },
                 },
             ],
@@ -4106,7 +4106,7 @@ def test_cache_tokens_nonstreaming(sentry_init, capture_items):
             model="claude-3-5-sonnet-20241022",
         )
 
-    (span,) = [item.payload for item in items if item.type == "span"]
+    (span,) = (item.payload for item in items if item.type == "span")
     # input_tokens normalized: 100 + 80 (cache_read) + 20 (cache_write) = 200
     assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 200
     assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 50
@@ -4158,7 +4158,7 @@ def test_input_tokens_include_cache_write_nonstreaming(sentry_init, capture_item
             model="claude-sonnet-4-20250514",
         )
 
-    (span,) = [item.payload for item in items if item.type == "span"]
+    (span,) = (item.payload for item in items if item.type == "span")
 
     # input_tokens should be total: 19 (non-cached) + 2846 (cache_write) = 2865
     assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865
@@ -4281,7 +4281,7 @@ def test_input_tokens_include_cache_read_streaming(
             ):
                 pass
 
-    (span,) = [item.payload for item in items if item.type == "span"]
+    (span,) = (item.payload for item in items if item.type == "span")
 
     # input_tokens should be total: 19 + 2846 = test_stream_messages_input_tokens_include_cache_read_streaming
     assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865
@@ -4351,7 +4351,7 @@ def test_stream_messages_input_tokens_include_cache_read_streaming(
                 for event in stream:
                     pass
 
-    (span,) = [item.payload for item in items if item.type == "span"]
+    (span,) = (item.payload for item in items if item.type == "span")
 
     # input_tokens should be total: 19 + 2846 = 2865
     assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865
@@ -4396,7 +4396,7 @@ def test_input_tokens_unchanged_without_caching(sentry_init, capture_items):
             model="claude-sonnet-4-20250514",
         )
 
-    (span,) = [item.payload for item in items if item.type == "span"]
+    (span,) = (item.payload for item in items if item.type == "span")
 
     assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 20
     assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 32  # 20 + 12
@@ -4460,7 +4460,7 @@ def test_cache_tokens_streaming(
             ):
                 pass
 
-    (span,) = [item.payload for item in items if item.type == "span"]
+    (span,) = (item.payload for item in items if item.type == "span")
     # input_tokens normalized: 100 + 80 (cache_read) + 20 (cache_write) = 200
     assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 200
     assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
@@ -4524,7 +4524,7 @@ def test_stream_messages_cache_tokens(
                 for event in stream:
                     pass
 
-    (span,) = [item.payload for item in items if item.type == "span"]
+    (span,) = (item.payload for item in items if item.type == "span")
     # input_tokens normalized: 100 + 80 (cache_read) + 20 (cache_write) = 200
     assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 200
     assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
diff --git a/tests/integrations/google_genai/test_google_genai.py b/tests/integrations/google_genai/test_google_genai.py
index ae31fe565b..62c0530c31 100644
--- a/tests/integrations/google_genai/test_google_genai.py
+++ b/tests/integrations/google_genai/test_google_genai.py
@@ -400,8 +400,6 @@ def test_error_handling(sentry_init, capture_items, mock_genai_client):
                     config=create_test_config(),
                 )
 
-    # Should have both transaction and error events
-    assert len([item for item in items if item.type == "transaction"]) == 1
     (error_event,) = (item.payload for item in items if item.type == "event")
 
     assert error_event["level"] == "error"
@@ -1122,8 +1120,6 @@ def test_embed_content_error_handling(sentry_init, capture_items, mock_genai_cli
                     contents=["This will fail"],
                 )
 
-    # Should have both transaction and error events
-    assert len([item for item in items if item.type == "transaction"]) == 1
     (error_event,) = (item.payload for item in items if item.type == "event")
 
     assert error_event["level"] == "error"
@@ -1352,8 +1348,6 @@ async def test_async_embed_content_error_handling(
                     contents=["This will fail"],
                 )
 
-    # Should have both transaction and error events
-    assert len([item for item in items if item.type == "transaction"]) == 1
     (error_event,) = (item.payload for item in items if item.type == "event")
 
     assert error_event["level"] == "error"
@@ -2186,9 +2180,7 @@ def test_extract_contents_messages_dict_inline_data():
     """Test extract_contents_messages with dict containing inline_data"""
     content_dict = {
         "role": "user",
-        "parts": [
-            {"inline_data": {"attributes": b"binary_data", "mime_type": "image/gif"}}
-        ],
+        "parts": [{"inline_data": {"data": b"binary_data", "mime_type": "image/gif"}}],
     }
     result = extract_contents_messages(content_dict)
 
diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py
index ef27d45767..243a059432 100644
--- a/tests/integrations/langchain/test_langchain.py
+++ b/tests/integrations/langchain/test_langchain.py
@@ -897,7 +897,7 @@ def test_langchain_error(sentry_init, capture_items):
     with start_transaction(), pytest.raises(ValueError):
         list(agent_executor.stream({"input": "How many letters in the word eudca"}))
 
-    error = next(item.payload for item in items if item.type == "event")
+    (error,) = (item.payload for item in items if item.type == "event")
     assert error["level"] == "error"
 
 
@@ -939,7 +939,7 @@ def test_span_status_error(sentry_init, capture_items):
         with pytest.raises(ValueError):
             list(agent_executor.stream({"input": "How many letters in the word eudca"}))
 
-    error = next(item.payload for item in items if item.type == "event")
+    (error,) = (item.payload for item in items if item.type == "event")
     assert error["level"] == "error"
 
     spans = [item.payload for item in items if item.type == "span"]
diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py
index c4d77db5c8..8263dedc70 100644
--- a/tests/integrations/openai/test_openai.py
+++ b/tests/integrations/openai/test_openai.py
@@ -1462,8 +1462,8 @@ def test_span_status_error(sentry_init, capture_items):
                 model="some-model", messages=[{"role": "system", "content": "hello"}]
             )
 
-    (event,) = (item.payload for item in items if item.type == "event")
-    assert event["level"] == "error"
+    (error,) = (item.payload for item in items if item.type == "event")
+    assert error["level"] == "error"
 
     spans = [item.payload for item in items if item.type == "span"]
     assert spans[0]["status"] == "error"
@@ -1964,8 +1964,8 @@ def test_span_origin_streaming_chat(sentry_init, capture_items):
 
         "".join(map(lambda x: x.choices[0].delta.content, response_stream))
 
-    (transaction,) = (item.payload for item in items if item.type == "transaction")
-    assert transaction["contexts"]["trace"]["origin"] == "manual"
+    (event,) = (item.payload for item in items if item.type == "transaction")
+    assert event["contexts"]["trace"]["origin"] == "manual"
 
     spans = [item.payload for item in items if item.type == "span"]
     assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.openai"
diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py
index 9e74848a04..ffcf8685a7 100644
--- a/tests/integrations/openai_agents/test_openai_agents.py
+++ b/tests/integrations/openai_agents/test_openai_agents.py
@@ -204,9 +204,7 @@ async def test_agent_invocation_span_no_pii(
         span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
     )
 
-    transactions = [item.payload for item in items if item.type == "transaction"]
-    assert len(transactions) == 1
-    transaction = transactions[0]
+    (transaction,) = (item.payload for item in items if item.type == "transaction")
     assert transaction["transaction"] == "test_agent workflow"
     assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
 
@@ -354,9 +352,7 @@ async def test_agent_invocation_span(
         assert result is not None
         assert result.final_output == "Hello, how can I help you?"
 
-    transactions = [item.payload for item in items if item.type == "transaction"]
-    assert len(transactions) == 1
-    transaction = transactions[0]
+    (transaction,) = (item.payload for item in items if item.type == "transaction")
     assert transaction["transaction"] == "test_agent workflow"
     assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
 
@@ -583,9 +579,7 @@ def test_agent_invocation_span_sync_no_pii(
         assert result is not None
         assert result.final_output == "Hello, how can I help you?"
 
-    transactions = [item.payload for item in items if item.type == "transaction"]
-    assert len(transactions) == 1
-    transaction = transactions[0]
+    (transaction,) = (item.payload for item in items if item.type == "transaction")
     assert transaction["transaction"] == "test_agent workflow"
     assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
 
@@ -739,9 +733,7 @@ def test_agent_invocation_span_sync(
         assert result is not None
         assert result.final_output == "Hello, how can I help you?"
 
-    transactions = [item.payload for item in items if item.type == "transaction"]
-    assert len(transactions) == 1
-    transaction = transactions[0]
+    (transaction,) = (item.payload for item in items if item.type == "transaction")
     assert transaction["transaction"] == "test_agent workflow"
     assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
 
@@ -1208,9 +1200,7 @@ def simple_test_tool(message: str) -> str:
             run_config=test_run_config,
         )
 
-    transactions = [item.payload for item in items if item.type == "transaction"]
-    assert len(transactions) == 1
-    transaction = transactions[0]
+    (transaction,) = (item.payload for item in items if item.type == "transaction")
     assert transaction["transaction"] == "test_agent workflow"
     assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
 
@@ -1700,9 +1690,7 @@ def simple_test_tool(message: str) -> str:
                     run_config=test_run_config,
                 )
 
-    transactions = [item.payload for item in items if item.type == "transaction"]
-    assert len(transactions) == 1
-    transaction = transactions[0]
+    (transaction,) = (item.payload for item in items if item.type == "transaction")
     assert transaction["transaction"] == "test_agent workflow"
     assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
 
@@ -1747,16 +1735,12 @@ async def test_error_handling(sentry_init, capture_items, test_agent):
                     test_agent, "Test input", run_config=test_run_config
                 )
 
-    error_events = [item.payload for item in items if item.type == "event"]
-    assert len(error_events) == 1
-    error_event = error_events[0]
+    (error_event,) = (item.payload for item in items if item.type == "event")
     assert error_event["exception"]["values"][0]["type"] == "Exception"
     assert error_event["exception"]["values"][0]["value"] == "Model Error"
     assert error_event["exception"]["values"][0]["mechanism"]["type"] == "openai_agents"
 
-    transactions = [item.payload for item in items if item.type == "transaction"]
-    assert len(transactions) == 1
-    transaction = transactions[0]
+    (transaction,) = (item.payload for item in items if item.type == "transaction")
     assert transaction["transaction"] == "test_agent workflow"
     assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
 
@@ -1811,9 +1795,7 @@ async def test_error_captures_input_data(sentry_init, capture_items, test_agent)
         with pytest.raises(InternalServerError, match="Error code: 500"):
             await agents.Runner.run(agent, "Test input", run_config=test_run_config)
 
-    error_events = [item.payload for item in items if item.type == "event"]
-    assert len(error_events) == 1
-    error_event = error_events[0]
+    (error_event,) = (item.payload for item in items if item.type == "event")
     assert error_event["exception"]["values"][0]["type"] == "InternalServerError"
     assert error_event["exception"]["values"][0]["value"] == "Error code: 500"
 
@@ -3574,9 +3556,7 @@ async def test_no_conversation_id_when_not_provided(
 
         assert result is not None
 
-    transactions = [item.payload for item in items if item.type == "transaction"]
-    assert len(transactions) == 1
-    transaction = transactions[0]
+    (transaction,) = (item.payload for item in items if item.type == "transaction")
 
     spans = [item.payload for item in items if item.type == "span"]
     invoke_agent_span = next(
diff --git a/tests/integrations/pydantic_ai/test_pydantic_ai.py b/tests/integrations/pydantic_ai/test_pydantic_ai.py
index 9faccb0a84..571d82279f 100644
--- a/tests/integrations/pydantic_ai/test_pydantic_ai.py
+++ b/tests/integrations/pydantic_ai/test_pydantic_ai.py
@@ -798,6 +798,7 @@ async def run_agent(input_text):
 
     # Verify each transaction is separate
     events = [item.payload for item in items if item.type == "transaction"]
+    assert len(events) == 3
     for i, transaction in enumerate(events):
         assert transaction["transaction"] == "invoke_agent test_agent"
 

From 08af4b4f5cde74e2f46a1dbe9ff651c27ff57658 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Mon, 20 Apr 2026 17:16:28 +0200
Subject: [PATCH 38/84] remove experimental option

---
 sentry_sdk/_types.py                          |  2 +-
 sentry_sdk/client.py                          |  6 +-
 sentry_sdk/consts.py                          |  1 -
 sentry_sdk/tracing.py                         | 18 ++--
 .../integrations/anthropic/test_anthropic.py  | 53 ------------
 .../google_genai/test_google_genai.py         | 37 --------
 .../huggingface_hub/test_huggingface_hub.py   |  6 --
 .../integrations/langchain/test_langchain.py  | 24 ------
 .../integrations/langgraph/test_langgraph.py  | 18 ----
 tests/integrations/litellm/test_litellm.py    | 28 ------
 tests/integrations/openai/test_openai.py      | 42 ---------
 .../openai_agents/test_openai_agents.py       | 32 -------
 .../pydantic_ai/test_pydantic_ai.py           | 85 -------------------
 13 files changed, 17 insertions(+), 335 deletions(-)

diff --git a/sentry_sdk/_types.py b/sentry_sdk/_types.py
index baf5f6a2fd..fbb9a166b8 100644
--- a/sentry_sdk/_types.py
+++ b/sentry_sdk/_types.py
@@ -12,7 +12,6 @@
 
 
 SENSITIVE_DATA_SUBSTITUTE = "[Filtered]"
-BLOB_DATA_SUBSTITUTE = "[Blob substitute]"
 
 
 class AnnotatedValue:
@@ -209,6 +208,7 @@ class SDKInfo(TypedDict):
             "type": Literal["check_in", "transaction"],
             "user": dict[str, object],
             "_dropped_spans": int,
+            "_has_gen_ai_span": bool,
         },
         total=False,
     )
diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py
index 87504c94b1..fd102e0679 100644
--- a/sentry_sdk/client.py
+++ b/sentry_sdk/client.py
@@ -1116,9 +1116,9 @@ def capture_event(
         if is_transaction and isinstance(profile, Profile):
             envelope.add_profile(profile.to_json(event_opt, self.options))
 
-        if is_transaction and not self.options["_experiments"].get(
-            "gen_ai_as_v2_spans", False
-        ):
+        span_recorder_has_gen_ai_span = event.pop("_has_gen_ai_span", False)
+
+        if is_transaction and not span_recorder_has_gen_ai_span:
             envelope.add_transaction(event_opt)
         elif is_transaction:
             split_spans = _split_gen_ai_spans(event_opt)
diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py
index 82107b49ee..73e5a6d9cb 100644
--- a/sentry_sdk/consts.py
+++ b/sentry_sdk/consts.py
@@ -86,7 +86,6 @@ class CompressionAlgo(Enum):
             "trace_lifecycle": Optional[Literal["static", "stream"]],
             "ignore_spans": Optional[IgnoreSpansConfig],
             "suppress_asgi_chained_exceptions": Optional[bool],
-            "gen_ai_as_v2_spans": Optional[bool],
         },
         total=False,
     )
diff --git a/sentry_sdk/tracing.py b/sentry_sdk/tracing.py
index 7f2baba0c9..6c8cbb87e4 100644
--- a/sentry_sdk/tracing.py
+++ b/sentry_sdk/tracing.py
@@ -1042,11 +1042,16 @@ def finish(
 
             return None
 
-        finished_spans = [
-            span.to_json()
-            for span in self._span_recorder.spans
-            if span.timestamp is not None
-        ]
+        finished_spans = []
+        has_gen_ai_span = False
+        for span in self._span_recorder.spans:
+            if span.timestamp is None:
+                continue
+
+            if isinstance(span.op, str) and span.op.startswith("gen_ai."):
+                has_gen_ai_span = True
+
+            finished_spans.append(span.to_json())
 
         len_diff = len(self._span_recorder.spans) - len(finished_spans)
         dropped_spans = len_diff + self._span_recorder.dropped_spans
@@ -1078,6 +1083,9 @@ def finish(
         if dropped_spans > 0:
             event["_dropped_spans"] = dropped_spans
 
+        if has_gen_ai_span:
+            event["_has_gen_ai_span"] = True
+
         if self._profile is not None and self._profile.valid():
             event["profile"] = self._profile
             self._profile = None
diff --git a/tests/integrations/anthropic/test_anthropic.py b/tests/integrations/anthropic/test_anthropic.py
index f38443bc94..865013f0b4 100644
--- a/tests/integrations/anthropic/test_anthropic.py
+++ b/tests/integrations/anthropic/test_anthropic.py
@@ -97,7 +97,6 @@ def test_nonstreaming_create_message(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
@@ -172,7 +171,6 @@ async def test_nonstreaming_create_message_async(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     client = AsyncAnthropic(api_key="z")
@@ -289,7 +287,6 @@ def test_streaming_create_message(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -398,7 +395,6 @@ def test_streaming_create_message_close(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -502,7 +498,6 @@ def test_streaming_create_message_api_error(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -619,7 +614,6 @@ def test_stream_messages(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -729,7 +723,6 @@ def test_stream_messages_close(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -838,7 +831,6 @@ def test_stream_messages_api_error(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -961,7 +953,6 @@ async def test_streaming_create_message_async(
         traces_sample_rate=1.0,
         default_integrations=False,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1073,7 +1064,6 @@ async def test_streaming_create_message_async_close(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1180,7 +1170,6 @@ async def test_streaming_create_message_async_api_error(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1301,7 +1290,6 @@ async def test_stream_message_async(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1412,7 +1400,6 @@ async def test_stream_messages_async_api_error(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1523,7 +1510,6 @@ async def test_stream_messages_async_close(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1680,7 +1666,6 @@ def test_streaming_create_message_with_input_json_delta(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1830,7 +1815,6 @@ def test_stream_messages_with_input_json_delta(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1988,7 +1972,6 @@ async def test_streaming_create_message_with_input_json_delta_async(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -2146,7 +2129,6 @@ async def test_stream_message_with_input_json_delta_async(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -2208,7 +2190,6 @@ def test_exception_message_create(sentry_init, capture_items):
     sentry_init(
         integrations=[AnthropicIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("event", "transaction")
 
@@ -2234,7 +2215,6 @@ def test_span_status_error(sentry_init, capture_items):
     sentry_init(
         integrations=[AnthropicIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("event", "span")
 
@@ -2264,7 +2244,6 @@ async def test_span_status_error_async(sentry_init, capture_items):
     sentry_init(
         integrations=[AnthropicIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("event", "span")
 
@@ -2294,7 +2273,6 @@ async def test_exception_message_create_async(sentry_init, capture_items):
     sentry_init(
         integrations=[AnthropicIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("event", "transaction")
 
@@ -2320,7 +2298,6 @@ def test_span_origin(sentry_init, capture_items):
     sentry_init(
         integrations=[AnthropicIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -2351,7 +2328,6 @@ async def test_span_origin_async(sentry_init, capture_items):
     sentry_init(
         integrations=[AnthropicIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -2415,7 +2391,6 @@ def test_set_output_data_with_input_json_delta(sentry_init):
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with start_transaction(name="test"):
@@ -2466,7 +2441,6 @@ def test_anthropic_message_role_mapping(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -2513,7 +2487,6 @@ def test_anthropic_message_truncation(sentry_init, capture_items):
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -2564,7 +2537,6 @@ async def test_anthropic_message_truncation_async(sentry_init, capture_items):
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -2625,7 +2597,6 @@ def test_nonstreaming_create_message_with_system_prompt(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
@@ -2712,7 +2683,6 @@ async def test_nonstreaming_create_message_with_system_prompt_async(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     client = AsyncAnthropic(api_key="z")
@@ -2842,7 +2812,6 @@ def test_streaming_create_message_with_system_prompt(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -2973,7 +2942,6 @@ def test_stream_messages_with_system_prompt(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -3106,7 +3074,6 @@ async def test_stream_message_with_system_prompt_async(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -3239,7 +3206,6 @@ async def test_streaming_create_message_with_system_prompt_async(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -3315,7 +3281,6 @@ def test_system_prompt_with_complex_structure(sentry_init, capture_items):
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
@@ -3569,7 +3534,6 @@ def test_message_with_base64_image(sentry_init, capture_items):
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
@@ -3620,7 +3584,6 @@ def test_message_with_url_image(sentry_init, capture_items):
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
@@ -3664,7 +3627,6 @@ def test_message_with_file_image(sentry_init, capture_items):
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
@@ -3709,7 +3671,6 @@ def test_message_with_base64_pdf(sentry_init, capture_items):
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
@@ -3754,7 +3715,6 @@ def test_message_with_url_pdf(sentry_init, capture_items):
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
@@ -3798,7 +3758,6 @@ def test_message_with_file_document(sentry_init, capture_items):
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
@@ -3843,7 +3802,6 @@ def test_message_with_mixed_content(sentry_init, capture_items):
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
@@ -3926,7 +3884,6 @@ def test_message_with_multiple_images_different_formats(sentry_init, capture_ite
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
@@ -4001,7 +3958,6 @@ def test_binary_content_not_stored_when_pii_disabled(sentry_init, capture_items)
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=False,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
@@ -4040,7 +3996,6 @@ def test_binary_content_not_stored_when_prompts_disabled(sentry_init, capture_it
         integrations=[AnthropicIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
@@ -4078,7 +4033,6 @@ def test_cache_tokens_nonstreaming(sentry_init, capture_items):
     sentry_init(
         integrations=[AnthropicIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
@@ -4130,7 +4084,6 @@ def test_input_tokens_include_cache_write_nonstreaming(sentry_init, capture_item
     sentry_init(
         integrations=[AnthropicIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
@@ -4182,7 +4135,6 @@ def test_input_tokens_include_cache_read_nonstreaming(sentry_init, capture_items
     sentry_init(
         integrations=[AnthropicIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
@@ -4263,7 +4215,6 @@ def test_input_tokens_include_cache_read_streaming(
     sentry_init(
         integrations=[AnthropicIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -4333,7 +4284,6 @@ def test_stream_messages_input_tokens_include_cache_read_streaming(
     sentry_init(
         integrations=[AnthropicIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -4370,7 +4320,6 @@ def test_input_tokens_unchanged_without_caching(sentry_init, capture_items):
     sentry_init(
         integrations=[AnthropicIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     client = Anthropic(api_key="z")
@@ -4442,7 +4391,6 @@ def test_cache_tokens_streaming(
     sentry_init(
         integrations=[AnthropicIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -4506,7 +4454,6 @@ def test_stream_messages_cache_tokens(
     sentry_init(
         integrations=[AnthropicIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
diff --git a/tests/integrations/google_genai/test_google_genai.py b/tests/integrations/google_genai/test_google_genai.py
index 62c0530c31..3974041314 100644
--- a/tests/integrations/google_genai/test_google_genai.py
+++ b/tests/integrations/google_genai/test_google_genai.py
@@ -130,7 +130,6 @@ def test_nonstreaming_generate_content(
         integrations=[GoogleGenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -220,7 +219,6 @@ def test_generate_content_with_system_instruction(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -264,7 +262,6 @@ def test_generate_content_with_tools(sentry_init, capture_items, mock_genai_clie
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -347,7 +344,6 @@ def test_tool_execution(sentry_init, capture_items):
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -384,7 +380,6 @@ def test_error_handling(sentry_init, capture_items, mock_genai_client):
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("event", "transaction")
 
@@ -414,7 +409,6 @@ def test_streaming_generate_content(sentry_init, capture_items, mock_genai_clien
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -531,7 +525,6 @@ def test_span_origin(sentry_init, capture_items, mock_genai_client):
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span", "transaction")
 
@@ -559,7 +552,6 @@ def test_response_without_usage_metadata(sentry_init, capture_items, mock_genai_
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -601,7 +593,6 @@ def test_multiple_candidates(sentry_init, capture_items, mock_genai_client):
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -666,7 +657,6 @@ def test_all_configuration_parameters(sentry_init, capture_items, mock_genai_cli
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -706,7 +696,6 @@ def test_empty_response(sentry_init, capture_items, mock_genai_client):
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -738,7 +727,6 @@ def test_response_with_different_id_fields(
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -781,7 +769,6 @@ def test_tool_with_async_function(sentry_init):
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Create an async tool function
@@ -804,7 +791,6 @@ def test_contents_as_none(sentry_init, capture_items, mock_genai_client):
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -831,7 +817,6 @@ def test_tool_calls_extraction(sentry_init, capture_items, mock_genai_client):
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -918,7 +903,6 @@ def test_google_genai_message_truncation(sentry_init, capture_items, mock_genai_
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -994,7 +978,6 @@ def test_embed_content(
         integrations=[GoogleGenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1056,7 +1039,6 @@ def test_embed_content_string_input(sentry_init, capture_items, mock_genai_clien
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1103,7 +1085,6 @@ def test_embed_content_error_handling(sentry_init, capture_items, mock_genai_cli
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "event")
 
@@ -1135,7 +1116,6 @@ def test_embed_content_without_statistics(
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1175,7 +1155,6 @@ def test_embed_content_span_origin(sentry_init, capture_items, mock_genai_client
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1216,7 +1195,6 @@ async def test_async_embed_content(
         integrations=[GoogleGenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1281,7 +1259,6 @@ async def test_async_embed_content_string_input(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1331,7 +1308,6 @@ async def test_async_embed_content_error_handling(
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "event")
 
@@ -1364,7 +1340,6 @@ async def test_async_embed_content_without_statistics(
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1407,7 +1382,6 @@ async def test_async_embed_content_span_origin(
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1439,7 +1413,6 @@ def test_generate_content_with_content_object(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1476,7 +1449,6 @@ def test_generate_content_with_dict_format(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1509,7 +1481,6 @@ def test_generate_content_with_file_data(sentry_init, capture_items, mock_genai_
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1559,7 +1530,6 @@ def test_generate_content_with_inline_data(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1605,7 +1575,6 @@ def test_generate_content_with_function_response(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1660,7 +1629,6 @@ def test_generate_content_with_mixed_string_and_content(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1704,7 +1672,6 @@ def test_generate_content_with_part_object_directly(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1743,7 +1710,6 @@ def test_generate_content_with_list_of_dicts(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1780,7 +1746,6 @@ def test_generate_content_with_dict_inline_data(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1825,7 +1790,6 @@ def test_generate_content_without_parts_property_inline_data(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1869,7 +1833,6 @@ def test_generate_content_without_parts_property_inline_data_and_binary_data_wit
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py
index eaac8c1ab1..031627906a 100644
--- a/tests/integrations/huggingface_hub/test_huggingface_hub.py
+++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py
@@ -480,7 +480,6 @@ def test_text_generation(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
         integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)],
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -556,7 +555,6 @@ def test_text_generation_streaming(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
         integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)],
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -633,7 +631,6 @@ def test_chat_completion(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
         integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)],
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -712,7 +709,6 @@ def test_chat_completion_streaming(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
         integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)],
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -885,7 +881,6 @@ def test_chat_completion_with_tools(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
         integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)],
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -981,7 +976,6 @@ def test_chat_completion_streaming_with_tools(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
         integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)],
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py
index 243a059432..3c1d9bef54 100644
--- a/tests/integrations/langchain/test_langchain.py
+++ b/tests/integrations/langchain/test_langchain.py
@@ -108,7 +108,6 @@ def test_langchain_text_completion(
         ],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -217,7 +216,6 @@ def test_langchain_create_agent(
         ],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -338,7 +336,6 @@ def test_tool_execution_span(
         ],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -531,7 +528,6 @@ def test_langchain_openai_tools_agent(
         ],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -869,7 +865,6 @@ def test_langchain_error(sentry_init, capture_items):
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("event", "transaction", "span")
 
@@ -908,7 +903,6 @@ def test_span_status_error(sentry_init, capture_items):
     sentry_init(
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("event", "transaction", "span")
 
@@ -1036,7 +1030,6 @@ def test_langchain_callback_manager(sentry_init):
     sentry_init(
         integrations=[LangchainIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     local_manager = BaseCallbackManager(handlers=[])
 
@@ -1069,7 +1062,6 @@ def test_langchain_callback_manager_with_sentry_callback(sentry_init):
     sentry_init(
         integrations=[LangchainIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     sentry_callback = SentryLangchainCallback(0, False)
     local_manager = BaseCallbackManager(handlers=[sentry_callback])
@@ -1102,7 +1094,6 @@ def test_langchain_callback_list(sentry_init):
     sentry_init(
         integrations=[LangchainIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     local_callbacks = []
 
@@ -1135,7 +1126,6 @@ def test_langchain_callback_list_existing_callback(sentry_init):
     sentry_init(
         integrations=[LangchainIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     sentry_callback = SentryLangchainCallback(0, False)
     local_callbacks = [sentry_callback]
@@ -1173,7 +1163,6 @@ def test_langchain_message_role_mapping(sentry_init, capture_items):
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1311,7 +1300,6 @@ def test_langchain_message_truncation(sentry_init, capture_items):
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1404,7 +1392,6 @@ def test_langchain_embeddings_sync(
         integrations=[LangchainIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1483,7 +1470,6 @@ def test_langchain_embeddings_embed_query(
         integrations=[LangchainIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1558,7 +1544,6 @@ async def test_langchain_embeddings_async(
         integrations=[LangchainIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1631,7 +1616,6 @@ async def test_langchain_embeddings_aembed_query(sentry_init, capture_items):
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1693,7 +1677,6 @@ def test_langchain_embeddings_no_model_name(sentry_init, capture_items):
     sentry_init(
         integrations=[LangchainIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1779,7 +1762,6 @@ def test_langchain_embeddings_multiple_providers(sentry_init, capture_items):
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1837,7 +1819,6 @@ def test_langchain_embeddings_error_handling(sentry_init, capture_items):
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1878,7 +1859,6 @@ def test_langchain_embeddings_multiple_calls(sentry_init, capture_items):
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1942,7 +1922,6 @@ def test_langchain_embeddings_span_hierarchy(sentry_init, capture_items):
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1996,7 +1975,6 @@ def test_langchain_embeddings_with_list_and_string_inputs(sentry_init, capture_i
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -2061,7 +2039,6 @@ def test_langchain_response_model_extraction(
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -2367,7 +2344,6 @@ def test_langchain_ai_system_detection(
     sentry_init(
         integrations=[LangchainIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
diff --git a/tests/integrations/langgraph/test_langgraph.py b/tests/integrations/langgraph/test_langgraph.py
index b70889548f..e1a3baa0a8 100644
--- a/tests/integrations/langgraph/test_langgraph.py
+++ b/tests/integrations/langgraph/test_langgraph.py
@@ -154,7 +154,6 @@ def test_state_graph_compile(
         integrations=[LanggraphIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     graph = MockStateGraph()
@@ -210,7 +209,6 @@ def test_pregel_invoke(sentry_init, capture_items, send_default_pii, include_pro
         integrations=[LanggraphIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -313,7 +311,6 @@ def test_pregel_ainvoke(sentry_init, capture_items, send_default_pii, include_pr
         integrations=[LanggraphIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     test_state = {"messages": [MockMessage("What's the weather like?", name="user")]}
@@ -394,7 +391,6 @@ def test_pregel_invoke_error(sentry_init, capture_items):
         integrations=[LanggraphIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     test_state = {"messages": [MockMessage("This will fail")]}
@@ -425,7 +421,6 @@ def test_pregel_ainvoke_error(sentry_init, capture_items):
         integrations=[LanggraphIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
     test_state = {"messages": [MockMessage("This will fail async")]}
@@ -460,7 +455,6 @@ def test_span_origin(sentry_init, capture_items):
     sentry_init(
         integrations=[LanggraphIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -492,7 +486,6 @@ def test_pregel_invoke_with_different_graph_names(
         integrations=[LanggraphIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -536,7 +529,6 @@ def test_pregel_invoke_span_includes_usage_data(sentry_init, capture_items):
     sentry_init(
         integrations=[LanggraphIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -613,7 +605,6 @@ def test_pregel_ainvoke_span_includes_usage_data(sentry_init, capture_items):
     sentry_init(
         integrations=[LanggraphIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -693,7 +684,6 @@ def test_pregel_invoke_multiple_llm_calls_aggregate_usage(sentry_init, capture_i
     sentry_init(
         integrations=[LanggraphIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -775,7 +765,6 @@ def test_pregel_ainvoke_multiple_llm_calls_aggregate_usage(sentry_init, capture_
     sentry_init(
         integrations=[LanggraphIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -860,7 +849,6 @@ def test_pregel_invoke_span_includes_response_model(sentry_init, capture_items):
     sentry_init(
         integrations=[LanggraphIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -933,7 +921,6 @@ def test_pregel_ainvoke_span_includes_response_model(sentry_init, capture_items)
     sentry_init(
         integrations=[LanggraphIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1009,7 +996,6 @@ def test_pregel_invoke_span_uses_last_response_model(sentry_init, capture_items)
     sentry_init(
         integrations=[LanggraphIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1094,7 +1080,6 @@ def test_pregel_ainvoke_span_uses_last_response_model(sentry_init, capture_items
     sentry_init(
         integrations=[LanggraphIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1227,7 +1212,6 @@ def test_extraction_functions_complex_scenario(sentry_init, capture_items):
         integrations=[LanggraphIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1303,7 +1287,6 @@ def test_langgraph_message_role_mapping(sentry_init, capture_items):
         integrations=[LanggraphIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1378,7 +1361,6 @@ def test_langgraph_message_truncation(sentry_init, capture_items):
         integrations=[LanggraphIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py
index b9365e7008..90807744e7 100644
--- a/tests/integrations/litellm/test_litellm.py
+++ b/tests/integrations/litellm/test_litellm.py
@@ -152,7 +152,6 @@ def test_nonstreaming_chat_completion(
         integrations=[LiteLLMIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -234,7 +233,6 @@ async def test_async_nonstreaming_chat_completion(
         integrations=[LiteLLMIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -318,7 +316,6 @@ def test_streaming_chat_completion(
         integrations=[LiteLLMIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -389,7 +386,6 @@ async def test_async_streaming_chat_completion(
         integrations=[LiteLLMIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -456,7 +452,6 @@ def test_embeddings_create(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -526,7 +521,6 @@ async def test_async_embeddings_create(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -591,7 +585,6 @@ def test_embeddings_create_with_list_input(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -654,7 +647,6 @@ async def test_async_embeddings_create_with_list_input(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -717,7 +709,6 @@ def test_embeddings_no_pii(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=False,  # PII disabled
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -774,7 +765,6 @@ async def test_async_embeddings_no_pii(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=False,  # PII disabled
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -825,7 +815,6 @@ def test_exception_handling(
     sentry_init(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("event")
 
@@ -864,7 +853,6 @@ async def test_async_exception_handling(
     sentry_init(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("event")
 
@@ -906,7 +894,6 @@ def test_span_origin(
     sentry_init(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -954,7 +941,6 @@ def test_multiple_providers(
     sentry_init(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction")
 
@@ -1050,7 +1036,6 @@ async def test_async_multiple_providers(
     sentry_init(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1147,7 +1132,6 @@ def test_additional_parameters(
     sentry_init(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1207,7 +1191,6 @@ async def test_async_additional_parameters(
     sentry_init(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1267,7 +1250,6 @@ def test_no_integration(
     """Test that when integration is not enabled, callbacks don't break."""
     sentry_init(
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1314,7 +1296,6 @@ async def test_async_no_integration(
     """Test that when integration is not enabled, callbacks don't break."""
     sentry_init(
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1357,7 +1338,6 @@ def test_response_without_usage(sentry_init, capture_items):
     sentry_init(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1399,7 +1379,6 @@ def test_integration_setup(sentry_init):
     sentry_init(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Check that callbacks are registered
@@ -1414,7 +1393,6 @@ def test_litellm_message_truncation(sentry_init, capture_items):
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1481,7 +1459,6 @@ def test_binary_content_encoding_image_url(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1561,7 +1538,6 @@ async def test_async_binary_content_encoding_image_url(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1642,7 +1618,6 @@ def test_binary_content_encoding_mixed_content(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1711,7 +1686,6 @@ async def test_async_binary_content_encoding_mixed_content(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1781,7 +1755,6 @@ def test_binary_content_encoding_uri_type(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1855,7 +1828,6 @@ async def test_async_binary_content_encoding_uri_type(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py
index 8263dedc70..4b9d629d96 100644
--- a/tests/integrations/openai/test_openai.py
+++ b/tests/integrations/openai/test_openai.py
@@ -138,7 +138,6 @@ def test_nonstreaming_chat_completion_no_prompts(
         integrations=[OpenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -234,7 +233,6 @@ def test_nonstreaming_chat_completion(sentry_init, capture_items, messages, requ
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -314,7 +312,6 @@ async def test_nonstreaming_chat_completion_async_no_prompts(
         integrations=[OpenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -410,7 +407,6 @@ async def test_nonstreaming_chat_completion_async(
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -506,7 +502,6 @@ def test_streaming_chat_completion_no_prompts(
         ],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -626,7 +621,6 @@ def test_streaming_chat_completion_with_usage_in_stream(
         integrations=[OpenAIIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
         send_default_pii=False,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -707,7 +701,6 @@ def test_streaming_chat_completion_empty_content_preserves_token_usage(
         integrations=[OpenAIIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
         send_default_pii=False,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -771,7 +764,6 @@ async def test_streaming_chat_completion_empty_content_preserves_token_usage_asy
         integrations=[OpenAIIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
         send_default_pii=False,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -837,7 +829,6 @@ async def test_streaming_chat_completion_async_with_usage_in_stream(
         integrations=[OpenAIIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
         send_default_pii=False,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -966,7 +957,6 @@ def test_streaming_chat_completion(
         ],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1119,7 +1109,6 @@ async def test_streaming_chat_completion_async_no_prompts(
         ],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1291,7 +1280,6 @@ async def test_streaming_chat_completion_async(
         ],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1426,7 +1414,6 @@ def test_bad_chat_completion(sentry_init, capture_items):
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("event")
 
@@ -1448,7 +1435,6 @@ def test_span_status_error(sentry_init, capture_items):
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("event", "transaction", "span")
 
@@ -1477,7 +1463,6 @@ async def test_bad_chat_completion_async(sentry_init, capture_items):
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("event")
 
@@ -1509,7 +1494,6 @@ def test_embeddings_create_no_pii(
         integrations=[OpenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1592,7 +1576,6 @@ def test_embeddings_create(sentry_init, capture_items, input, request):
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1664,7 +1647,6 @@ async def test_embeddings_create_async_no_pii(
         integrations=[OpenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1748,7 +1730,6 @@ async def test_embeddings_create_async(sentry_init, capture_items, input, reques
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -1817,7 +1798,6 @@ def test_embeddings_create_raises_error(
         integrations=[OpenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("event")
 
@@ -1846,7 +1826,6 @@ async def test_embeddings_create_raises_error_async(
         integrations=[OpenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("event")
 
@@ -1867,7 +1846,6 @@ def test_span_origin_nonstreaming_chat(sentry_init, capture_items):
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1891,7 +1869,6 @@ async def test_span_origin_nonstreaming_chat_async(sentry_init, capture_items):
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1914,7 +1891,6 @@ def test_span_origin_streaming_chat(sentry_init, capture_items):
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -1978,7 +1954,6 @@ async def test_span_origin_streaming_chat_async(
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -2045,7 +2020,6 @@ def test_span_origin_embeddings(sentry_init, capture_items):
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -2077,7 +2051,6 @@ async def test_span_origin_embeddings_async(sentry_init, capture_items):
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -2471,7 +2444,6 @@ def test_ai_client_span_responses_api_no_pii(sentry_init, capture_items):
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -2594,7 +2566,6 @@ def test_ai_client_span_responses_api(
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -2805,7 +2776,6 @@ def test_error_in_responses_api(sentry_init, capture_items):
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("event", "transaction", "span")
 
@@ -2912,7 +2882,6 @@ async def test_ai_client_span_responses_async_api(
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -3198,7 +3167,6 @@ async def test_ai_client_span_streaming_responses_async_api(
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -3424,7 +3392,6 @@ async def test_error_in_responses_async_api(sentry_init, capture_items):
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("event", "transaction", "span")
 
@@ -3552,7 +3519,6 @@ def test_streaming_responses_api(
         ],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -3629,7 +3595,6 @@ async def test_streaming_responses_api_async(
         ],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -3693,7 +3658,6 @@ def test_empty_tools_in_chat_completion(sentry_init, capture_items, tools):
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -3736,7 +3700,6 @@ def test_openai_message_role_mapping(
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -3767,7 +3730,6 @@ def test_openai_message_truncation(sentry_init, capture_items):
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("transaction", "span")
 
@@ -3817,7 +3779,6 @@ def test_streaming_chat_completion_ttft(
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -3896,7 +3857,6 @@ async def test_streaming_chat_completion_ttft_async(
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -3973,7 +3933,6 @@ def test_streaming_responses_api_ttft(
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -4023,7 +3982,6 @@ async def test_streaming_responses_api_ttft_async(
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py
index ffcf8685a7..bde222274c 100644
--- a/tests/integrations/openai_agents/test_openai_agents.py
+++ b/tests/integrations/openai_agents/test_openai_agents.py
@@ -182,7 +182,6 @@ async def test_agent_invocation_span_no_pii(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=False,
-            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -338,7 +337,6 @@ async def test_agent_invocation_span(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=True,
-            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -521,7 +519,6 @@ async def test_client_span_custom_model(
         sentry_init(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
-            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span")
@@ -569,7 +566,6 @@ def test_agent_invocation_span_sync_no_pii(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=False,
-            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -719,7 +715,6 @@ def test_agent_invocation_span_sync(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=True,
-            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -960,7 +955,6 @@ async def test_handoff_span(sentry_init, capture_items, get_model_response):
         sentry_init(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
-            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("transaction", "span")
@@ -1090,7 +1084,6 @@ async def test_max_turns_before_handoff_span(
         sentry_init(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
-            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("transaction", "span")
@@ -1189,7 +1182,6 @@ def simple_test_tool(message: str) -> str:
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=True,
-            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("transaction", "span")
@@ -1416,7 +1408,6 @@ async def test_hosted_mcp_tool_propagation_header_streamed(
         integrations=[OpenAIAgentsIntegration()],
         traces_sample_rate=1.0,
         release="d08ebdb9309e1b004c6f52202de58a09c2268e42",
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     request_headers = {}
@@ -1579,7 +1570,6 @@ async def test_hosted_mcp_tool_propagation_headers(
         integrations=[OpenAIAgentsIntegration()],
         traces_sample_rate=1.0,
         release="d08ebdb9309e1b004c6f52202de58a09c2268e42",
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     response = get_model_response(EXAMPLE_RESPONSE, serialize_pydantic=True)
@@ -1678,7 +1668,6 @@ def simple_test_tool(message: str) -> str:
                 integrations=[OpenAIAgentsIntegration()],
                 traces_sample_rate=1.0,
                 send_default_pii=True,
-                _experiments={"gen_ai_as_v2_spans": True},
             )
 
             items = capture_items("span", "transaction")
@@ -1725,7 +1714,6 @@ async def test_error_handling(sentry_init, capture_items, test_agent):
                     LoggingIntegration(event_level=logging.CRITICAL),
                 ],
                 traces_sample_rate=1.0,
-                _experiments={"gen_ai_as_v2_spans": True},
             )
 
             items = capture_items("event", "span", "transaction")
@@ -1787,7 +1775,6 @@ async def test_error_captures_input_data(sentry_init, capture_items, test_agent)
             ],
             traces_sample_rate=1.0,
             send_default_pii=True,
-            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("event", "span")
@@ -1830,7 +1817,6 @@ async def test_span_status_error(sentry_init, capture_items, test_agent):
                     LoggingIntegration(event_level=logging.CRITICAL),
                 ],
                 traces_sample_rate=1.0,
-                _experiments={"gen_ai_as_v2_spans": True},
             )
 
             items = capture_items("event", "transaction", "span")
@@ -1944,7 +1930,6 @@ async def test_mcp_tool_execution_spans(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=True,
-            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -2075,7 +2060,6 @@ async def test_mcp_tool_execution_with_error(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=True,
-            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -2200,7 +2184,6 @@ async def test_mcp_tool_execution_without_pii(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=False,  # PII disabled
-            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -2258,7 +2241,6 @@ async def test_multiple_agents_asyncio(
         sentry_init(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
-            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -2302,7 +2284,6 @@ def test_openai_agents_message_role_mapping(
         integrations=[OpenAIAgentsIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     get_response_kwargs = {"input": [test_message]}
@@ -2402,7 +2383,6 @@ def failing_tool(message: str) -> str:
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=True,
-            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -2500,7 +2480,6 @@ async def test_invoke_agent_span_includes_usage_data(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=True,
-            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -2594,7 +2573,6 @@ async def test_ai_client_span_includes_response_model(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=True,
-            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -2683,7 +2661,6 @@ async def test_ai_client_span_response_model_with_chat_completions(
         sentry_init(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
-            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -2806,7 +2783,6 @@ def calculator(a: int, b: int) -> int:
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=True,
-            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -2895,7 +2871,6 @@ async def test_invoke_agent_span_includes_response_model(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=True,
-            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -3030,7 +3005,6 @@ def calculator(a: int, b: int) -> int:
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=True,
-            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -3073,7 +3047,6 @@ def test_openai_agents_message_truncation(sentry_init, capture_items):
         integrations=[OpenAIAgentsIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     test_messages = [
@@ -3120,7 +3093,6 @@ async def test_streaming_span_update_captures_response_data(
         integrations=[OpenAIAgentsIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Create a mock streaming response object (similar to what we'd get from ResponseCompletedEvent)
@@ -3186,7 +3158,6 @@ async def test_streaming_ttft_on_chat_span(
     sentry_init(
         integrations=[OpenAIAgentsIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     request_headers = {}
@@ -3341,7 +3312,6 @@ async def test_conversation_id_on_all_spans(
         sentry_init(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
-            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -3480,7 +3450,6 @@ def simple_tool(message: str) -> str:
         sentry_init(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
-            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
@@ -3544,7 +3513,6 @@ async def test_no_conversation_id_when_not_provided(
         sentry_init(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
-            _experiments={"gen_ai_as_v2_spans": True},
         )
 
         items = capture_items("span", "transaction")
diff --git a/tests/integrations/pydantic_ai/test_pydantic_ai.py b/tests/integrations/pydantic_ai/test_pydantic_ai.py
index 571d82279f..cfb1ca09ca 100644
--- a/tests/integrations/pydantic_ai/test_pydantic_ai.py
+++ b/tests/integrations/pydantic_ai/test_pydantic_ai.py
@@ -61,7 +61,6 @@ async def test_agent_run_async(sentry_init, capture_items, get_test_agent):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -103,7 +102,6 @@ async def test_agent_run_async_model_error(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("event", "transaction", "span")
@@ -137,7 +135,6 @@ async def test_agent_run_async_usage_data(sentry_init, capture_items, get_test_a
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -182,7 +179,6 @@ def test_agent_run_sync(sentry_init, capture_items, get_test_agent):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -215,7 +211,6 @@ def test_agent_run_sync_model_error(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("event", "transaction", "span")
@@ -249,7 +244,6 @@ async def test_agent_run_stream(sentry_init, capture_items, get_test_agent):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -294,7 +288,6 @@ async def test_agent_run_stream_events(sentry_init, capture_items, get_test_agen
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -329,7 +322,6 @@ async def test_agent_with_tools(sentry_init, capture_items, get_test_agent):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     test_agent = get_test_agent()
@@ -395,7 +387,6 @@ async def test_agent_with_tool_model_retry(
         ],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     retries = 0
@@ -479,7 +470,6 @@ async def test_agent_with_tool_validation_error(
         ],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     test_agent = get_test_agent()
@@ -544,7 +534,6 @@ async def test_agent_with_tools_streaming(sentry_init, capture_items, get_test_a
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     test_agent = get_test_agent()
@@ -594,7 +583,6 @@ async def test_model_settings(sentry_init, capture_items, get_test_agent_with_se
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -643,7 +631,6 @@ async def test_system_prompt_attribute(
         integrations=[PydanticAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -689,7 +676,6 @@ async def test_error_handling(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -714,7 +700,6 @@ async def test_without_pii(sentry_init, capture_items, get_test_agent):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=False,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -744,7 +729,6 @@ async def test_without_pii_tools(sentry_init, capture_items, get_test_agent):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=False,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     test_agent = get_test_agent()
@@ -781,7 +765,6 @@ async def test_multiple_agents_concurrent(sentry_init, capture_items, get_test_a
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -817,7 +800,6 @@ async def test_message_history(sentry_init, capture_items):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -867,7 +849,6 @@ async def test_gen_ai_system(sentry_init, capture_items, get_test_agent):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -898,7 +879,6 @@ async def test_include_prompts_false(sentry_init, capture_items, get_test_agent)
         integrations=[PydanticAIIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
         send_default_pii=True,  # Even with PII enabled, prompts should not be captured
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -928,7 +908,6 @@ async def test_include_prompts_true(sentry_init, capture_items, get_test_agent):
         integrations=[PydanticAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -960,7 +939,6 @@ async def test_include_prompts_false_with_tools(
         integrations=[PydanticAIIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     test_agent = get_test_agent()
@@ -998,7 +976,6 @@ async def test_include_prompts_requires_pii(sentry_init, capture_items, get_test
         integrations=[PydanticAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=False,  # PII disabled
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -1090,7 +1067,6 @@ async def mock_map_tool_result_part(part):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -1160,7 +1136,6 @@ async def test_context_cleanup_after_run(sentry_init, get_test_agent):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Verify context is not set before run
@@ -1184,7 +1159,6 @@ def test_context_cleanup_after_run_sync(sentry_init, get_test_agent):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Verify context is not set before run
@@ -1209,7 +1183,6 @@ async def test_context_cleanup_after_streaming(sentry_init, get_test_agent):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Verify context is not set before run
@@ -1236,7 +1209,6 @@ async def test_context_cleanup_on_error(sentry_init, get_test_agent):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     test_agent = get_test_agent()
@@ -1271,7 +1243,6 @@ async def test_context_isolation_concurrent_agents(sentry_init, get_test_agent):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Create a second agent
@@ -1327,7 +1298,6 @@ async def test_invoke_agent_with_list_user_prompt(sentry_init, capture_items):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -1378,7 +1348,6 @@ async def test_invoke_agent_with_instructions(
         integrations=[PydanticAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -1418,7 +1387,6 @@ async def test_model_name_extraction_with_callable(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Test the utility function directly
@@ -1445,7 +1413,6 @@ async def test_model_name_extraction_fallback_to_str(sentry_init, capture_items)
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Test the utility function directly
@@ -1474,7 +1441,6 @@ async def test_model_settings_object_style(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -1510,7 +1476,6 @@ async def test_usage_data_partial(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -1544,7 +1509,6 @@ async def test_agent_data_from_scope(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -1567,7 +1531,6 @@ async def test_available_tools_without_description(
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     test_agent = get_test_agent()
@@ -1602,7 +1565,6 @@ async def test_output_with_tool_calls(sentry_init, capture_items, get_test_agent
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     test_agent = get_test_agent()
@@ -1648,7 +1610,6 @@ async def test_message_formatting_with_different_parts(sentry_init, capture_item
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -1697,7 +1658,6 @@ async def test_update_invoke_agent_span_with_none_output(sentry_init, capture_it
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -1725,7 +1685,6 @@ async def test_update_ai_client_span_with_none_response(sentry_init, capture_ite
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -1751,7 +1710,6 @@ async def test_agent_without_name(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -1777,7 +1735,6 @@ async def test_model_response_without_parts(sentry_init, capture_items):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -1808,7 +1765,6 @@ async def test_input_messages_error_handling(sentry_init, capture_items):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -1838,7 +1794,6 @@ async def test_available_tools_error_handling(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -1868,7 +1823,6 @@ async def test_set_usage_data_with_none_usage(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -1895,7 +1849,6 @@ async def test_set_usage_data_with_partial_fields(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -1937,7 +1890,6 @@ def test_tool(x: int) -> int:
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -1967,7 +1919,6 @@ async def test_message_parts_with_list_content(sentry_init, capture_items):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2004,7 +1955,6 @@ async def test_output_data_with_text_and_tool_calls(sentry_init, capture_items):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2044,7 +1994,6 @@ async def test_output_data_error_handling(sentry_init, capture_items):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2077,7 +2026,6 @@ async def test_message_with_system_prompt_part(sentry_init, capture_items):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2113,7 +2061,6 @@ async def test_message_with_instructions(sentry_init, capture_items):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2148,7 +2095,6 @@ async def test_set_input_messages_without_prompts(sentry_init, capture_items):
         integrations=[PydanticAIIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2177,7 +2123,6 @@ async def test_set_output_data_without_prompts(sentry_init, capture_items):
         integrations=[PydanticAIIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2205,7 +2150,6 @@ async def test_get_model_name_with_exception_in_callable(sentry_init, capture_it
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Create model with callable name that raises exception
@@ -2229,7 +2173,6 @@ async def test_get_model_name_with_string_model(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Pass a string as model
@@ -2249,7 +2192,6 @@ async def test_get_model_name_with_none(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Pass None
@@ -2271,7 +2213,6 @@ async def test_set_model_data_with_system(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2303,7 +2244,6 @@ async def test_set_model_data_from_agent_scope(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2337,7 +2277,6 @@ async def test_set_model_data_with_none_settings_values(sentry_init, capture_ite
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2370,7 +2309,6 @@ async def test_should_send_prompts_without_pii(sentry_init, capture_items):
         integrations=[PydanticAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=False,  # PII disabled
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Should return False
@@ -2389,7 +2327,6 @@ async def test_set_agent_data_without_agent(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2416,7 +2353,6 @@ async def test_set_agent_data_from_scope(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2449,7 +2385,6 @@ async def test_set_agent_data_without_name(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2480,7 +2415,6 @@ async def test_set_available_tools_without_toolset(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2511,7 +2445,6 @@ async def test_set_available_tools_with_schema(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2551,7 +2484,6 @@ async def test_execute_tool_span_creation(sentry_init, capture_items):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2578,7 +2510,6 @@ async def test_execute_tool_span_with_mcp_type(sentry_init, capture_items):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2606,7 +2537,6 @@ async def test_execute_tool_span_without_prompts(sentry_init, capture_items):
         integrations=[PydanticAIIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2631,7 +2561,6 @@ async def test_execute_tool_span_with_none_args(sentry_init, capture_items):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2655,7 +2584,6 @@ async def test_update_execute_tool_span_with_none_span(sentry_init, capture_item
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Update with None span - should not raise
@@ -2680,7 +2608,6 @@ async def test_update_execute_tool_span_with_none_result(sentry_init, capture_it
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2704,7 +2631,6 @@ async def test_tool_execution_without_span_context(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     # Create a simple agent with no tools (won't have function_toolset)
@@ -2736,7 +2662,6 @@ async def test_invoke_agent_span_with_callable_instruction(sentry_init, capture_
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2770,7 +2695,6 @@ async def test_invoke_agent_span_with_string_instructions(sentry_init, capture_i
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2799,7 +2723,6 @@ async def test_ai_client_span_with_streaming_flag(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2827,7 +2750,6 @@ async def test_ai_client_span_gets_agent_from_scope(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     with sentry_sdk.start_transaction(op="test", name="test") as transaction:
@@ -2876,7 +2798,6 @@ async def test_binary_content_encoding_image(sentry_init, capture_items):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -2907,7 +2828,6 @@ async def test_binary_content_encoding_mixed_content(sentry_init, capture_items)
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -2951,7 +2871,6 @@ async def test_binary_content_in_agent_run(sentry_init, capture_items):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -2978,7 +2897,6 @@ async def test_set_usage_data_with_cache_tokens(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -3050,7 +2968,6 @@ def test_image_url_base64_content_in_span(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")
@@ -3121,7 +3038,6 @@ async def test_invoke_agent_image_url(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     agent = Agent("test", name="test_image_url_agent")
@@ -3169,7 +3085,6 @@ def multiply_numbers(a: int, b: int) -> int:
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
 
     items = capture_items("transaction", "span")

From 7bd12aef87cd69e975cb6c383f84d715f07aa1d7 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Mon, 20 Apr 2026 18:24:43 +0200
Subject: [PATCH 39/84] add constant again

---
 sentry_sdk/_types.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sentry_sdk/_types.py b/sentry_sdk/_types.py
index fbb9a166b8..814b90c440 100644
--- a/sentry_sdk/_types.py
+++ b/sentry_sdk/_types.py
@@ -12,6 +12,7 @@
 
 
 SENSITIVE_DATA_SUBSTITUTE = "[Filtered]"
+BLOB_DATA_SUBSTITUTE = "[Blob substitute]"
 
 
 class AnnotatedValue:

From ef843a0569c2c09381248ebc9b8901ca0dcbe8d8 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Mon, 20 Apr 2026 18:39:10 +0200
Subject: [PATCH 40/84] add name fallback

---
 sentry_sdk/client.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py
index fd102e0679..316bc8877a 100644
--- a/sentry_sdk/client.py
+++ b/sentry_sdk/client.py
@@ -175,6 +175,10 @@ def _serialized_v1_span_to_serialized_v2_span(
 
     if "description" in span:
         res["name"] = span["description"]
+    elif (
+        "op" in span
+    ):  # fallback based on observed downstream fallback for transactions
+        res["name"] = span["op"]
 
     if "start_timestamp" in span:
         start_timestamp = None

From 4e3e2d01bde97811466eca1d206f56066c50be8d Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Mon, 20 Apr 2026 19:11:42 +0200
Subject: [PATCH 41/84] remove remaining experimental option references

---
 tests/tracing/test_decorator.py | 3 ---
 tests/tracing/test_misc.py      | 1 -
 2 files changed, 4 deletions(-)

diff --git a/tests/tracing/test_decorator.py b/tests/tracing/test_decorator.py
index d370b4bbc9..a71ca5588f 100644
--- a/tests/tracing/test_decorator.py
+++ b/tests/tracing/test_decorator.py
@@ -124,7 +124,6 @@ async def _some_function_traced(a, b, c):
 def test_span_templates_ai_dicts(sentry_init, capture_items):
     sentry_init(
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -246,7 +245,6 @@ def my_agent():
 def test_span_templates_ai_objects(sentry_init, capture_items):
     sentry_init(
         traces_sample_rate=1.0,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
@@ -374,7 +372,6 @@ def test_span_templates_ai_pii(sentry_init, capture_items, send_default_pii):
     sentry_init(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"gen_ai_as_v2_spans": True},
     )
     items = capture_items("span")
 
diff --git a/tests/tracing/test_misc.py b/tests/tracing/test_misc.py
index 4209a02b4b..0e35668b48 100644
--- a/tests/tracing/test_misc.py
+++ b/tests/tracing/test_misc.py
@@ -652,7 +652,6 @@ def test_conversation_id_propagates_to_span_with_gen_ai_op(
         """Span with gen_ai.* op should get conversation_id."""
         sentry_init(
             traces_sample_rate=1.0,
-            _experiments={"gen_ai_as_v2_spans": True},
         )
         items = capture_items("span")
 

From 44b2c2d952c996f5f5055ed1c028dd46f184b9ac Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Tue, 21 Apr 2026 09:58:23 +0200
Subject: [PATCH 42/84] update test with hardcoded version

---
 tests/integrations/huggingface_hub/test_huggingface_hub.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py
index 031627906a..3c79ca7262 100644
--- a/tests/integrations/huggingface_hub/test_huggingface_hub.py
+++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py
@@ -520,7 +520,7 @@ def test_text_generation(
         "sentry.origin": "auto.ai.huggingface_hub",
         "sentry.release": mock.ANY,
         "sentry.sdk.name": "sentry.python",
-        "sentry.sdk.version": "2.58.0",
+        "sentry.sdk.version": mock.ANY,
         "sentry.segment.id": mock.ANY,
         "sentry.segment.name": "test",
         "thread.id": mock.ANY,

From 307db734e3e61eda241a427a0cc60d912865ab82 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Mon, 11 May 2026 14:46:40 +0200
Subject: [PATCH 43/84] merge fixes

---
 tests/integrations/openai/test_openai.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py
index 41bd7f2d51..17d80da1b5 100644
--- a/tests/integrations/openai/test_openai.py
+++ b/tests/integrations/openai/test_openai.py
@@ -1500,9 +1500,6 @@ def test_span_status_error(sentry_init, capture_items):
     spans = [item.payload for item in items if item.type == "span"]
     assert spans[0]["status"] == "error"
 
-    (transaction,) = (item.payload for item in items if item.type == "transaction")
-    assert transaction["contexts"]["trace"]["status"] == "internal_error"
-
 
 @pytest.mark.asyncio
 async def test_bad_chat_completion_async(sentry_init, capture_items):

From efc37e1864dd87ddacd27c7d4bf6fc83682cc60d Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Mon, 11 May 2026 14:59:36 +0200
Subject: [PATCH 44/84] adapt new test

---
 tests/integrations/openai/test_openai.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py
index 17d80da1b5..d5e78bad99 100644
--- a/tests/integrations/openai/test_openai.py
+++ b/tests/integrations/openai/test_openai.py
@@ -2852,13 +2852,13 @@ def test_ai_client_span_responses_api(
 )
 @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available")
 def test_responses_api_conversation_id(
-    sentry_init, capture_events, conversation, expected_id
+    sentry_init, capture_items, conversation, expected_id
 ):
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
     )
-    events = capture_events()
+    items = capture_items("span")
 
     client = OpenAI(api_key="z")
     client.responses._post = mock.Mock(return_value=EXAMPLE_RESPONSE)
@@ -2870,13 +2870,12 @@ def test_responses_api_conversation_id(
             conversation=conversation,
         )
 
-    (transaction,) = events
-    (span,) = transaction["spans"]
+    (span,) = (item.payload for item in items if item.type == "span")
 
     if expected_id is None:
-        assert "gen_ai.conversation.id" not in span["data"]
+        assert "gen_ai.conversation.id" not in span["attributes"]
     else:
-        assert span["data"]["gen_ai.conversation.id"] == expected_id
+        assert span["attributes"]["gen_ai.conversation.id"] == expected_id
 
 
 @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available")

From bee63202465ec87a6bfff74b8a75366d041e9342 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Tue, 12 May 2026 14:05:54 +0200
Subject: [PATCH 45/84] add parameter

---
 sentry_sdk/tracing.py                         |   19 +-
 .../integrations/anthropic/test_anthropic.py  | 4539 ++++++++++++-----
 .../google_genai/test_google_genai.py         | 2062 ++++++--
 .../huggingface_hub/test_huggingface_hub.py   | 1330 +++--
 .../integrations/langchain/test_langchain.py  | 3869 ++++++++++----
 .../integrations/langgraph/test_langgraph.py  | 1503 ++++--
 tests/integrations/litellm/test_litellm.py    | 2367 ++++++---
 tests/integrations/openai/test_openai.py      | 3531 +++++++++----
 .../openai_agents/test_openai_agents.py       | 3545 +++++++++----
 .../pydantic_ai/test_pydantic_ai.py           | 2498 ++++++---
 10 files changed, 18393 insertions(+), 6870 deletions(-)

diff --git a/sentry_sdk/tracing.py b/sentry_sdk/tracing.py
index 9aab29996d..96029f1f58 100644
--- a/sentry_sdk/tracing.py
+++ b/sentry_sdk/tracing.py
@@ -1042,14 +1042,21 @@ def finish(
 
         finished_spans = []
         has_gen_ai_span = False
-        for span in self._span_recorder.spans:
-            if span.timestamp is None:
-                continue
+        if client.options["_experiments"].get("stream_gen_ai_spans", False):
+            for span in self._span_recorder.spans:
+                if span.timestamp is None:
+                    continue
 
-            if isinstance(span.op, str) and span.op.startswith("gen_ai."):
-                has_gen_ai_span = True
+                if isinstance(span.op, str) and span.op.startswith("gen_ai."):
+                    has_gen_ai_span = True
 
-            finished_spans.append(span.to_json())
+                finished_spans.append(span.to_json())
+        else:
+            finished_spans = [
+                span.to_json()
+                for span in self._span_recorder.spans
+                if span.timestamp is not None
+            ]
 
         len_diff = len(self._span_recorder.spans) - len(finished_spans)
         dropped_spans = len_diff + self._span_recorder.dropped_spans
diff --git a/tests/integrations/anthropic/test_anthropic.py b/tests/integrations/anthropic/test_anthropic.py
index 865013f0b4..1378f777df 100644
--- a/tests/integrations/anthropic/test_anthropic.py
+++ b/tests/integrations/anthropic/test_anthropic.py
@@ -81,6 +81,7 @@ async def __call__(self, *args, **kwargs):
 )
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.parametrize(
     "send_default_pii, include_prompts",
     [
@@ -91,14 +92,20 @@ async def __call__(self, *args, **kwargs):
     ],
 )
 def test_nonstreaming_create_message(
-    sentry_init, capture_items, send_default_pii, include_prompts
+    sentry_init,
+    capture_events,
+    capture_items,
+    send_default_pii,
+    include_prompts,
+    stream_gen_ai_spans,
 ):
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
+
     client = Anthropic(api_key="z")
     client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE)
 
@@ -109,51 +116,106 @@ def test_nonstreaming_create_message(
         }
     ]
 
-    with start_transaction(name="anthropic"):
-        response = client.messages.create(
-            max_tokens=1024, messages=messages, model="model"
-        )
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
 
-    assert response == EXAMPLE_MESSAGE
-    usage = response.usage
+        with start_transaction(name="anthropic"):
+            response = client.messages.create(
+                max_tokens=1024, messages=messages, model="model"
+            )
 
-    assert usage.input_tokens == 10
-    assert usage.output_tokens == 20
+        assert response == EXAMPLE_MESSAGE
+        usage = response.usage
+
+        assert usage.input_tokens == 10
+        assert usage.output_tokens == 20
 
-    (event,) = (item.payload for item in items if item.type == "transaction")
-    assert event["transaction"] == "anthropic"
+        (event,) = (item.payload for item in items if item.type == "transaction")
+        assert event["transaction"] == "anthropic"
 
-    spans = [item.payload for item in items if item.type == "span"]
-    assert len(spans) == 1
-    (span,) = spans
+        spans = [item.payload for item in items if item.type == "span"]
+        assert len(spans) == 1
+        (span,) = spans
 
-    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-    assert span["name"] == "chat model"
-    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+        assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        assert span["name"] == "chat model"
+        assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+        assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
-    if send_default_pii and include_prompts:
+        if send_default_pii and include_prompts:
+            assert (
+                span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+                == '[{"role": "user", "content": "Hello, Claude"}]'
+            )
+            assert (
+                span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude."
+            )
+        else:
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
+
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False
         assert (
-            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-            == '[{"role": "user", "content": "Hello, Claude"}]'
+            span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID]
+            == "msg_01XFDUDYJgAACzvnptvVoYEL"
         )
-        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude."
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [
+            "end_turn"
+        ]
     else:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
-
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
-    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False
-    assert (
-        span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID]
-        == "msg_01XFDUDYJgAACzvnptvVoYEL"
-    )
-    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["end_turn"]
+        events = capture_events()
+
+        with start_transaction(name="anthropic"):
+            response = client.messages.create(
+                max_tokens=1024, messages=messages, model="model"
+            )
+
+        assert response == EXAMPLE_MESSAGE
+        usage = response.usage
+
+        assert usage.input_tokens == 10
+        assert usage.output_tokens == 20
+
+        assert len(events) == 1
+        (event,) = events
+
+        assert event["type"] == "transaction"
+        assert event["transaction"] == "anthropic"
+
+        assert len(event["spans"]) == 1
+        (span,) = event["spans"]
+
+        assert span["op"] == OP.GEN_AI_CHAT
+        assert span["description"] == "chat model"
+        assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+        assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+
+        if send_default_pii and include_prompts:
+            assert (
+                span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+                == '[{"role": "user", "content": "Hello, Claude"}]'
+            )
+            assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude."
+        else:
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+
+        assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+        assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
+        assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
+        assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False
+        assert (
+            span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL"
+        )
+        assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["end_turn"]
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
 @pytest.mark.parametrize(
     "send_default_pii, include_prompts",
@@ -165,14 +227,20 @@ def test_nonstreaming_create_message(
     ],
 )
 async def test_nonstreaming_create_message_async(
-    sentry_init, capture_items, send_default_pii, include_prompts
+    sentry_init,
+    capture_events,
+    capture_items,
+    send_default_pii,
+    include_prompts,
+    stream_gen_ai_spans,
 ):
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
+
     client = AsyncAnthropic(api_key="z")
     client.messages._post = AsyncMock(return_value=EXAMPLE_MESSAGE)
 
@@ -183,50 +251,102 @@ async def test_nonstreaming_create_message_async(
         }
     ]
 
-    with start_transaction(name="anthropic"):
-        response = await client.messages.create(
-            max_tokens=1024, messages=messages, model="model"
-        )
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
 
-    assert response == EXAMPLE_MESSAGE
-    usage = response.usage
+        with start_transaction(name="anthropic"):
+            response = await client.messages.create(
+                max_tokens=1024, messages=messages, model="model"
+            )
 
-    assert usage.input_tokens == 10
-    assert usage.output_tokens == 20
+        assert response == EXAMPLE_MESSAGE
+        usage = response.usage
 
-    (event,) = (item.payload for item in items if item.type == "transaction")
-    assert event["transaction"] == "anthropic"
+        assert usage.input_tokens == 10
+        assert usage.output_tokens == 20
 
-    spans = [item.payload for item in items if item.type == "span"]
-    assert len(spans) == 1
-    (span,) = spans
+        (event,) = (item.payload for item in items if item.type == "transaction")
+        assert event["transaction"] == "anthropic"
 
-    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-    assert span["name"] == "chat model"
-    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+        spans = [item.payload for item in items if item.type == "span"]
+        assert len(spans) == 1
+        (span,) = spans
 
-    if send_default_pii and include_prompts:
+        assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        assert span["name"] == "chat model"
+        assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+        assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+
+        if send_default_pii and include_prompts:
+            assert (
+                span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+                == '[{"role": "user", "content": "Hello, Claude"}]'
+            )
+            assert (
+                span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude."
+            )
+        else:
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
+
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False
         assert (
-            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-            == '[{"role": "user", "content": "Hello, Claude"}]'
+            span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID]
+            == "msg_01XFDUDYJgAACzvnptvVoYEL"
         )
-        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude."
     else:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
-
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
-    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False
-    assert (
-        span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID]
-        == "msg_01XFDUDYJgAACzvnptvVoYEL"
-    )
+        events = capture_events()
+
+        with start_transaction(name="anthropic"):
+            response = await client.messages.create(
+                max_tokens=1024, messages=messages, model="model"
+            )
+
+        assert response == EXAMPLE_MESSAGE
+        usage = response.usage
+
+        assert usage.input_tokens == 10
+        assert usage.output_tokens == 20
+
+        assert len(events) == 1
+        (event,) = events
+
+        assert event["type"] == "transaction"
+        assert event["transaction"] == "anthropic"
+
+        assert len(event["spans"]) == 1
+        (span,) = event["spans"]
+
+        assert span["op"] == OP.GEN_AI_CHAT
+        assert span["description"] == "chat model"
+        assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+        assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+
+        if send_default_pii and include_prompts:
+            assert (
+                span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+                == '[{"role": "user", "content": "Hello, Claude"}]'
+            )
+            assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude."
+        else:
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+
+        assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+        assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
+        assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
+        assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False
+        assert (
+            span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL"
+        )
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.parametrize(
     "send_default_pii, include_prompts",
     [
@@ -238,11 +358,13 @@ async def test_nonstreaming_create_message_async(
 )
 def test_streaming_create_message(
     sentry_init,
+    capture_events,
     capture_items,
     send_default_pii,
     include_prompts,
     get_model_response,
     server_side_event_chunks,
+    stream_gen_ai_spans,
 ):
     client = Anthropic(api_key="z")
 
@@ -287,8 +409,8 @@ def test_streaming_create_message(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -297,12 +419,14 @@ def test_streaming_create_message(
         }
     ]
 
-    with mock.patch.object(
-        client._client,
-        "send",
-        return_value=response,
-    ) as _:
-        with start_transaction(name="anthropic"):
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with mock.patch.object(
+            client._client,
+            "send",
+            return_value=response,
+        ) as _, start_transaction(name="anthropic"):
             message = client.messages.create(
                 max_tokens=1024, messages=messages, model="model", stream=True
             )
@@ -310,47 +434,102 @@ def test_streaming_create_message(
             for _ in message:
                 pass
 
-    (event,) = (item.payload for item in items if item.type == "transaction")
-    assert event["transaction"] == "anthropic"
+        (event,) = (item.payload for item in items if item.type == "transaction")
+        assert event["transaction"] == "anthropic"
 
-    spans = [item.payload for item in items if item.type == "span"]
-    span = next(
-        span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-    )
+        spans = [item.payload for item in items if item.type == "span"]
+        span = next(
+            span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        )
+
+        assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        assert span["name"] == "chat model"
+        assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+        assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+
+        if send_default_pii and include_prompts:
+            assert (
+                span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+                == '[{"role": "user", "content": "Hello, Claude"}]'
+            )
+            assert (
+                span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!"
+            )
 
-    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-    assert span["name"] == "chat model"
-    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+        else:
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
 
-    if send_default_pii and include_prompts:
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
         assert (
-            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-            == '[{"role": "user", "content": "Hello, Claude"}]'
+            span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID]
+            == "msg_01XFDUDYJgAACzvnptvVoYEL"
         )
-        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!"
-
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [
+            "max_tokens"
+        ]
     else:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
-
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20
-    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
-    assert (
-        span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID]
-        == "msg_01XFDUDYJgAACzvnptvVoYEL"
-    )
-    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["max_tokens"]
+        events = capture_events()
+
+        with mock.patch.object(
+            client._client,
+            "send",
+            return_value=response,
+        ) as _, start_transaction(name="anthropic"):
+            message = client.messages.create(
+                max_tokens=1024, messages=messages, model="model", stream=True
+            )
 
+            for _ in message:
+                pass
+
+        assert len(events) == 1
+        (event,) = events
+
+        assert event["type"] == "transaction"
+        assert event["transaction"] == "anthropic"
+
+        span = next(span for span in event["spans"] if span["op"] == OP.GEN_AI_CHAT)
+
+        assert span["op"] == OP.GEN_AI_CHAT
+        assert span["description"] == "chat model"
+        assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+        assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+
+        if send_default_pii and include_prompts:
+            assert (
+                span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+                == '[{"role": "user", "content": "Hello, Claude"}]'
+            )
+            assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!"
+
+        else:
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+
+        assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+        assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
+        assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20
+        assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+        assert (
+            span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL"
+        )
+        assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["max_tokens"]
 
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 def test_streaming_create_message_close(
     sentry_init,
+    capture_events,
     capture_items,
     get_model_response,
     server_side_event_chunks,
+    stream_gen_ai_spans,
 ):
     client = Anthropic(api_key="z")
 
@@ -395,8 +574,8 @@ def test_streaming_create_message_close(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -405,12 +584,14 @@ def test_streaming_create_message_close(
         }
     ]
 
-    with mock.patch.object(
-        client._client,
-        "send",
-        return_value=response,
-    ) as _:
-        with start_transaction(name="anthropic"):
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with mock.patch.object(
+            client._client,
+            "send",
+            return_value=response,
+        ) as _, start_transaction(name="anthropic"):
             messages = client.messages.create(
                 max_tokens=1024, messages=messages, model="model", stream=True
             )
@@ -420,45 +601,92 @@ def test_streaming_create_message_close(
 
             messages.close()
 
-    (event,) = (item.payload for item in items if item.type == "transaction")
-    assert event["transaction"] == "anthropic"
+        (event,) = (item.payload for item in items if item.type == "transaction")
+        assert event["transaction"] == "anthropic"
 
-    spans = [item.payload for item in items if item.type == "span"]
-    span = next(
-        span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-    )
+        spans = [item.payload for item in items if item.type == "span"]
+        span = next(
+            span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        )
 
-    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-    assert span["name"] == "chat model"
-    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+        assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        assert span["name"] == "chat model"
+        assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+        assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
-    assert (
-        span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-        == '[{"role": "user", "content": "Hello, Claude"}]'
-    )
-    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!"
-
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
-    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
-    assert (
-        span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID]
-        == "msg_01XFDUDYJgAACzvnptvVoYEL"
-    )
+        assert (
+            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+            == '[{"role": "user", "content": "Hello, Claude"}]'
+        )
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!"
+
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+        assert (
+            span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID]
+            == "msg_01XFDUDYJgAACzvnptvVoYEL"
+        )
+    else:
+        events = capture_events()
 
+        with mock.patch.object(
+            client._client,
+            "send",
+            return_value=response,
+        ) as _, start_transaction(name="anthropic"):
+            messages = client.messages.create(
+                max_tokens=1024, messages=messages, model="model", stream=True
+            )
 
+            for _ in range(4):
+                next(messages)
+
+            messages.close()
+
+        assert len(events) == 1
+        (event,) = events
+
+        assert event["type"] == "transaction"
+        assert event["transaction"] == "anthropic"
+
+        span = next(span for span in event["spans"] if span["op"] == OP.GEN_AI_CHAT)
+
+        assert span["op"] == OP.GEN_AI_CHAT
+        assert span["description"] == "chat model"
+        assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+        assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+
+        assert (
+            span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+            == '[{"role": "user", "content": "Hello, Claude"}]'
+        )
+        assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!"
+
+        assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+        assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
+        assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
+        assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+        assert (
+            span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL"
+        )
+
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.skipif(
     ANTHROPIC_VERSION < (0, 41),
     reason="Error classes moved in https://github.com/anthropics/anthropic-sdk-python/commit/4e0b15e22fe40e9aa513459564f641bf97c90954.",
 )
 def test_streaming_create_message_api_error(
     sentry_init,
+    capture_events,
     capture_items,
     get_model_response,
     server_side_event_chunks,
+    stream_gen_ai_spans,
 ):
     client = Anthropic(api_key="z")
 
@@ -498,8 +726,8 @@ def test_streaming_create_message_api_error(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -508,52 +736,99 @@ def test_streaming_create_message_api_error(
         }
     ]
 
-    with pytest.raises(APIStatusError), mock.patch.object(
-        client._client,
-        "send",
-        return_value=response,
-    ) as _:
-        with start_transaction(name="anthropic"):
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with pytest.raises(APIStatusError), mock.patch.object(
+            client._client,
+            "send",
+            return_value=response,
+        ) as _, start_transaction(name="anthropic"):
             message = client.messages.create(
                 max_tokens=1024, messages=messages, model="model", stream=True
             )
 
             for _ in message:
                 pass
+        (event,) = (item.payload for item in items if item.type == "transaction")
+        assert event["transaction"] == "anthropic"
 
-    (event,) = (item.payload for item in items if item.type == "transaction")
-    assert event["transaction"] == "anthropic"
+        spans = [item.payload for item in items if item.type == "span"]
+        span = next(
+            span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        )
 
-    spans = [item.payload for item in items if item.type == "span"]
-    span = next(
-        span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-    )
+        assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        assert span["name"] == "chat model"
+        assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+        assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
-    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-    assert span["name"] == "chat model"
-    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+        assert (
+            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+            == '[{"role": "user", "content": "Hello, Claude"}]'
+        )
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!"
 
-    assert (
-        span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-        == '[{"role": "user", "content": "Hello, Claude"}]'
-    )
-    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!"
-
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
-    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
-    assert (
-        span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID]
-        == "msg_01XFDUDYJgAACzvnptvVoYEL"
-    )
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+        assert (
+            span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID]
+            == "msg_01XFDUDYJgAACzvnptvVoYEL"
+        )
 
-    assert span["status"] == "error"
+        assert span["status"] == "error"
+    else:
+        events = capture_events()
+
+        with pytest.raises(APIStatusError), mock.patch.object(
+            client._client,
+            "send",
+            return_value=response,
+        ) as _, start_transaction(name="anthropic"):
+            message = client.messages.create(
+                max_tokens=1024, messages=messages, model="model", stream=True
+            )
+
+            for _ in message:
+                pass
+
+        assert len(events) == 1
+        (event,) = events
+
+        assert event["type"] == "transaction"
+        assert event["transaction"] == "anthropic"
+
+        span = next(span for span in event["spans"] if span["op"] == OP.GEN_AI_CHAT)
+
+        assert span["op"] == OP.GEN_AI_CHAT
+        assert span["description"] == "chat model"
+        assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+        assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+
+        assert (
+            span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+            == '[{"role": "user", "content": "Hello, Claude"}]'
+        )
+        assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!"
+
+        assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+        assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
+        assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
+        assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+        assert (
+            span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL"
+        )
+
+        assert span["status"] == "internal_error"
+        assert span["tags"]["status"] == "internal_error"
     assert event["contexts"]["trace"]["status"] == "internal_error"
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.parametrize(
     "send_default_pii, include_prompts",
     [
@@ -565,11 +840,13 @@ def test_streaming_create_message_api_error(
 )
 def test_stream_messages(
     sentry_init,
+    capture_events,
     capture_items,
     send_default_pii,
     include_prompts,
     get_model_response,
     server_side_event_chunks,
+    stream_gen_ai_spans,
 ):
     client = Anthropic(api_key="z")
 
@@ -614,8 +891,8 @@ def test_stream_messages(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -624,61 +901,116 @@ def test_stream_messages(
         }
     ]
 
-    with mock.patch.object(
-        client._client,
-        "send",
-        return_value=response,
-    ) as _:
-        with start_transaction(name="anthropic"):
-            with client.messages.stream(
-                max_tokens=1024,
-                messages=messages,
-                model="model",
-            ) as stream:
-                for event in stream:
-                    pass
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with mock.patch.object(
+            client._client,
+            "send",
+            return_value=response,
+        ) as _, start_transaction(name="anthropic"), client.messages.stream(
+            max_tokens=1024,
+            messages=messages,
+            model="model",
+        ) as stream:
+            for event in stream:
+                pass
 
-    (event,) = (item.payload for item in items if item.type == "transaction")
-    assert event["transaction"] == "anthropic"
+        (event,) = (item.payload for item in items if item.type == "transaction")
+        assert event["transaction"] == "anthropic"
 
-    spans = [item.payload for item in items if item.type == "span"]
-    span = next(
-        span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-    )
+        spans = [item.payload for item in items if item.type == "span"]
+        span = next(
+            span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        )
 
-    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-    assert span["name"] == "chat model"
-    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+        assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        assert span["name"] == "chat model"
+        assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+        assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
-    if send_default_pii and include_prompts:
+        if send_default_pii and include_prompts:
+            assert (
+                span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+                == '[{"role": "user", "content": "Hello, Claude"}]'
+            )
+            assert (
+                span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!"
+            )
+
+        else:
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
+
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
         assert (
-            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-            == '[{"role": "user", "content": "Hello, Claude"}]'
+            span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID]
+            == "msg_01XFDUDYJgAACzvnptvVoYEL"
         )
-        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!"
-
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [
+            "max_tokens"
+        ]
     else:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
-
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20
-    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
-    assert (
-        span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID]
-        == "msg_01XFDUDYJgAACzvnptvVoYEL"
-    )
-    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["max_tokens"]
+        events = capture_events()
+
+        with mock.patch.object(
+            client._client,
+            "send",
+            return_value=response,
+        ) as _, start_transaction(name="anthropic"), client.messages.stream(
+            max_tokens=1024,
+            messages=messages,
+            model="model",
+        ) as stream:
+            for event in stream:
+                pass
+
+        assert len(events) == 1
+        (event,) = events
+
+        assert event["type"] == "transaction"
+        assert event["transaction"] == "anthropic"
+
+        span = next(span for span in event["spans"] if span["op"] == OP.GEN_AI_CHAT)
+
+        assert span["op"] == OP.GEN_AI_CHAT
+        assert span["description"] == "chat model"
+        assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+        assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+
+        if send_default_pii and include_prompts:
+            assert (
+                span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+                == '[{"role": "user", "content": "Hello, Claude"}]'
+            )
+            assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!"
+        else:
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+
+        assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+        assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
+        assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20
+        assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+        assert (
+            span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL"
+        )
+        assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["max_tokens"]
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 def test_stream_messages_close(
     sentry_init,
+    capture_events,
     capture_items,
     get_model_response,
     server_side_event_chunks,
+    stream_gen_ai_spans,
 ):
     client = Anthropic(api_key="z")
 
@@ -723,8 +1055,8 @@ def test_stream_messages_close(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -733,65 +1065,117 @@ def test_stream_messages_close(
         }
     ]
 
-    with mock.patch.object(
-        client._client,
-        "send",
-        return_value=response,
-    ) as _:
-        with start_transaction(name="anthropic"):
-            with client.messages.stream(
-                max_tokens=1024,
-                messages=messages,
-                model="model",
-            ) as stream:
-                for _ in range(4):
-                    next(stream)
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
 
-                # New versions add TextEvent, so consume one more event.
-                if TextEvent is not None and isinstance(next(stream), TextEvent):
-                    next(stream)
+        with mock.patch.object(
+            client._client,
+            "send",
+            return_value=response,
+        ) as _, start_transaction(name="anthropic"), client.messages.stream(
+            max_tokens=1024,
+            messages=messages,
+            model="model",
+        ) as stream:
+            for _ in range(4):
+                next(stream)
 
-                stream.close()
+            # New versions add TextEvent, so consume one more event.
+            if TextEvent is not None and isinstance(next(stream), TextEvent):
+                next(stream)
 
-    (event,) = (item.payload for item in items if item.type == "transaction")
-    assert event["transaction"] == "anthropic"
+            stream.close()
 
-    spans = [item.payload for item in items if item.type == "span"]
-    span = next(
-        span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-    )
+        (event,) = (item.payload for item in items if item.type == "transaction")
+        assert event["transaction"] == "anthropic"
+
+        spans = [item.payload for item in items if item.type == "span"]
+        span = next(
+            span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        )
 
-    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-    assert span["name"] == "chat model"
-    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+        assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        assert span["name"] == "chat model"
+        assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+        assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
-    assert (
-        span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-        == '[{"role": "user", "content": "Hello, Claude"}]'
-    )
-    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!"
-
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
-    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
-    assert (
-        span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID]
-        == "msg_01XFDUDYJgAACzvnptvVoYEL"
-    )
+        assert (
+            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+            == '[{"role": "user", "content": "Hello, Claude"}]'
+        )
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!"
+
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+        assert (
+            span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID]
+            == "msg_01XFDUDYJgAACzvnptvVoYEL"
+        )
+    else:
+        events = capture_events()
+
+        with mock.patch.object(
+            client._client,
+            "send",
+            return_value=response,
+        ) as _, start_transaction(name="anthropic"), client.messages.stream(
+            max_tokens=1024,
+            messages=messages,
+            model="model",
+        ) as stream:
+            for _ in range(4):
+                next(stream)
+
+            # New versions add TextEvent, so consume one more event.
+            if TextEvent is not None and isinstance(next(stream), TextEvent):
+                next(stream)
 
+            stream.close()
 
+        assert len(events) == 1
+        (event,) = events
+
+        assert event["type"] == "transaction"
+        assert event["transaction"] == "anthropic"
+
+        span = next(span for span in event["spans"] if span["op"] == OP.GEN_AI_CHAT)
+
+        assert span["op"] == OP.GEN_AI_CHAT
+        assert span["description"] == "chat model"
+        assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+        assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+
+        assert (
+            span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+            == '[{"role": "user", "content": "Hello, Claude"}]'
+        )
+        assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!"
+
+        assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+        assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
+        assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
+        assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+        assert (
+            span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL"
+        )
+
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.skipif(
     ANTHROPIC_VERSION < (0, 41),
     reason="Error classes moved in https://github.com/anthropics/anthropic-sdk-python/commit/4e0b15e22fe40e9aa513459564f641bf97c90954.",
 )
 def test_stream_messages_api_error(
     sentry_init,
+    capture_events,
     capture_items,
     get_model_response,
     server_side_event_chunks,
+    stream_gen_ai_spans,
 ):
     client = Anthropic(api_key="z")
 
@@ -831,8 +1215,8 @@ def test_stream_messages_api_error(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -841,53 +1225,100 @@ def test_stream_messages_api_error(
         }
     ]
 
-    with pytest.raises(APIStatusError), mock.patch.object(
-        client._client,
-        "send",
-        return_value=response,
-    ) as _:
-        with start_transaction(name="anthropic"):
-            with client.messages.stream(
-                max_tokens=1024,
-                messages=messages,
-                model="model",
-            ) as stream:
-                for event in stream:
-                    pass
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with pytest.raises(APIStatusError), mock.patch.object(
+            client._client,
+            "send",
+            return_value=response,
+        ) as _, start_transaction(name="anthropic"), client.messages.stream(
+            max_tokens=1024,
+            messages=messages,
+            model="model",
+        ) as stream:
+            for event in stream:
+                pass
 
-    (event,) = (item.payload for item in items if item.type == "transaction")
-    assert event["transaction"] == "anthropic"
+        (event,) = (item.payload for item in items if item.type == "transaction")
+        assert event["transaction"] == "anthropic"
 
-    spans = [item.payload for item in items if item.type == "span"]
-    span = next(
-        span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-    )
+        spans = [item.payload for item in items if item.type == "span"]
+        span = next(
+            span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        )
 
-    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-    assert span["name"] == "chat model"
-    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+        assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        assert span["name"] == "chat model"
+        assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+        assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
-    assert (
-        span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-        == '[{"role": "user", "content": "Hello, Claude"}]'
-    )
-    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!"
-
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
-    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
-    assert (
-        span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID]
-        == "msg_01XFDUDYJgAACzvnptvVoYEL"
-    )
+        assert (
+            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+            == '[{"role": "user", "content": "Hello, Claude"}]'
+        )
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!"
+
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+        assert (
+            span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID]
+            == "msg_01XFDUDYJgAACzvnptvVoYEL"
+        )
+
+        assert span["status"] == "error"
+    else:
+        events = capture_events()
+
+        with pytest.raises(APIStatusError), mock.patch.object(
+            client._client,
+            "send",
+            return_value=response,
+        ) as _, start_transaction(name="anthropic"), client.messages.stream(
+            max_tokens=1024,
+            messages=messages,
+            model="model",
+        ) as stream:
+            for event in stream:
+                pass
+
+        assert len(events) == 1
+        (event,) = events
 
-    assert span["status"] == "error"
+        assert event["type"] == "transaction"
+        assert event["transaction"] == "anthropic"
+
+        span = next(span for span in event["spans"] if span["op"] == OP.GEN_AI_CHAT)
+
+        assert span["op"] == OP.GEN_AI_CHAT
+        assert span["description"] == "chat model"
+        assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+        assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+
+        assert (
+            span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+            == '[{"role": "user", "content": "Hello, Claude"}]'
+        )
+        assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!"
+
+        assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+        assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
+        assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
+        assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+        assert (
+            span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL"
+        )
+
+        assert span["status"] == "internal_error"
+        assert span["tags"]["status"] == "internal_error"
     assert event["contexts"]["trace"]["status"] == "internal_error"
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
 @pytest.mark.parametrize(
     "send_default_pii, include_prompts",
@@ -900,12 +1331,14 @@ def test_stream_messages_api_error(
 )
 async def test_streaming_create_message_async(
     sentry_init,
+    capture_events,
     capture_items,
     send_default_pii,
     include_prompts,
     get_model_response,
     async_iterator,
     server_side_event_chunks,
+    stream_gen_ai_spans,
 ):
     client = AsyncAnthropic(api_key="z")
 
@@ -953,8 +1386,8 @@ async def test_streaming_create_message_async(
         traces_sample_rate=1.0,
         default_integrations=False,
         send_default_pii=send_default_pii,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -963,12 +1396,14 @@ async def test_streaming_create_message_async(
         }
     ]
 
-    with mock.patch.object(
-        client._client,
-        "send",
-        return_value=response,
-    ) as _:
-        with start_transaction(name="anthropic"):
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with mock.patch.object(
+            client._client,
+            "send",
+            return_value=response,
+        ) as _, start_transaction(name="anthropic"):
             message = await client.messages.create(
                 max_tokens=1024, messages=messages, model="model", stream=True
             )
@@ -976,48 +1411,104 @@ async def test_streaming_create_message_async(
             async for _ in message:
                 pass
 
-    (event,) = (item.payload for item in items if item.type == "transaction")
-    assert event["transaction"] == "anthropic"
+        (event,) = (item.payload for item in items if item.type == "transaction")
+        assert event["transaction"] == "anthropic"
 
-    spans = [item.payload for item in items if item.type == "span"]
-    assert len(spans) == 1
-    (span,) = spans
+        spans = [item.payload for item in items if item.type == "span"]
+        assert len(spans) == 1
+        (span,) = spans
 
-    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-    assert span["name"] == "chat model"
-    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+        assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        assert span["name"] == "chat model"
+        assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+        assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
-    if send_default_pii and include_prompts:
+        if send_default_pii and include_prompts:
+            assert (
+                span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+                == '[{"role": "user", "content": "Hello, Claude"}]'
+            )
+            assert (
+                span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!"
+            )
+
+        else:
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
+
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
         assert (
-            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-            == '[{"role": "user", "content": "Hello, Claude"}]'
+            span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID]
+            == "msg_01XFDUDYJgAACzvnptvVoYEL"
         )
-        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!"
-
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [
+            "max_tokens"
+        ]
     else:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
-
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20
-    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
-    assert (
-        span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID]
-        == "msg_01XFDUDYJgAACzvnptvVoYEL"
-    )
-    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["max_tokens"]
+        events = capture_events()
+
+        with mock.patch.object(
+            client._client,
+            "send",
+            return_value=response,
+        ) as _, start_transaction(name="anthropic"):
+            message = await client.messages.create(
+                max_tokens=1024, messages=messages, model="model", stream=True
+            )
+
+            async for _ in message:
+                pass
+
+        assert len(events) == 1
+        (event,) = events
+
+        assert event["type"] == "transaction"
+        assert event["transaction"] == "anthropic"
 
+        assert len(event["spans"]) == 1
+        (span,) = event["spans"]
 
+        assert span["op"] == OP.GEN_AI_CHAT
+        assert span["description"] == "chat model"
+        assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+        assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+
+        if send_default_pii and include_prompts:
+            assert (
+                span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+                == '[{"role": "user", "content": "Hello, Claude"}]'
+            )
+            assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!"
+
+        else:
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+
+        assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+        assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
+        assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20
+        assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+        assert (
+            span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL"
+        )
+        assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["max_tokens"]
+
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
 async def test_streaming_create_message_async_close(
     sentry_init,
+    capture_events,
     capture_items,
     get_model_response,
     async_iterator,
     server_side_event_chunks,
+    stream_gen_ai_spans,
 ):
     client = AsyncAnthropic(api_key="z")
 
@@ -1064,8 +1555,8 @@ async def test_streaming_create_message_async_close(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -1074,12 +1565,58 @@ async def test_streaming_create_message_async_close(
         }
     ]
 
-    with mock.patch.object(
-        client._client,
-        "send",
-        return_value=response,
-    ) as _:
-        with start_transaction(name="anthropic"):
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with mock.patch.object(
+            client._client,
+            "send",
+            return_value=response,
+        ) as _, start_transaction(name="anthropic"):
+            messages = await client.messages.create(
+                max_tokens=1024, messages=messages, model="model", stream=True
+            )
+
+            for _ in range(4):
+                await messages.__anext__()
+            await messages.close()
+
+        (event,) = (item.payload for item in items if item.type == "transaction")
+        assert event["transaction"] == "anthropic"
+
+        spans = [item.payload for item in items if item.type == "span"]
+        span = next(
+            span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        )
+
+        assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        assert span["name"] == "chat model"
+        assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+        assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+
+        assert (
+            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+            == '[{"role": "user", "content": "Hello, Claude"}]'
+        )
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!"
+
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+        assert (
+            span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID]
+            == "msg_01XFDUDYJgAACzvnptvVoYEL"
+        )
+    else:
+        events = capture_events()
+
+        with mock.patch.object(
+            client._client,
+            "send",
+            return_value=response,
+        ) as _, start_transaction(name="anthropic"):
             messages = await client.messages.create(
                 max_tokens=1024, messages=messages, model="model", stream=True
             )
@@ -1088,36 +1625,36 @@ async def test_streaming_create_message_async_close(
                 await messages.__anext__()
             await messages.close()
 
-    (event,) = (item.payload for item in items if item.type == "transaction")
-    assert event["transaction"] == "anthropic"
+        assert len(events) == 1
+        (event,) = events
 
-    spans = [item.payload for item in items if item.type == "span"]
-    span = next(
-        span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-    )
+        assert event["type"] == "transaction"
+        assert event["transaction"] == "anthropic"
 
-    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-    assert span["name"] == "chat model"
-    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+        span = next(span for span in event["spans"] if span["op"] == OP.GEN_AI_CHAT)
 
-    assert (
-        span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-        == '[{"role": "user", "content": "Hello, Claude"}]'
-    )
-    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!"
-
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
-    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
-    assert (
-        span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID]
-        == "msg_01XFDUDYJgAACzvnptvVoYEL"
-    )
+        assert span["op"] == OP.GEN_AI_CHAT
+        assert span["description"] == "chat model"
+        assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+        assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+
+        assert (
+            span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+            == '[{"role": "user", "content": "Hello, Claude"}]'
+        )
+        assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!"
+
+        assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+        assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
+        assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
+        assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+        assert (
+            span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL"
+        )
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.skipif(
     ANTHROPIC_VERSION < (0, 41),
     reason="Error classes moved in https://github.com/anthropics/anthropic-sdk-python/commit/4e0b15e22fe40e9aa513459564f641bf97c90954.",
@@ -1125,10 +1662,12 @@ async def test_streaming_create_message_async_close(
 @pytest.mark.asyncio
 async def test_streaming_create_message_async_api_error(
     sentry_init,
+    capture_events,
     capture_items,
     get_model_response,
     async_iterator,
     server_side_event_chunks,
+    stream_gen_ai_spans,
 ):
     client = AsyncAnthropic(api_key="z")
 
@@ -1170,8 +1709,8 @@ async def test_streaming_create_message_async_api_error(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -1180,12 +1719,14 @@ async def test_streaming_create_message_async_api_error(
         }
     ]
 
-    with pytest.raises(APIStatusError), mock.patch.object(
-        client._client,
-        "send",
-        return_value=response,
-    ) as _:
-        with start_transaction(name="anthropic"):
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with pytest.raises(APIStatusError), mock.patch.object(
+            client._client,
+            "send",
+            return_value=response,
+        ) as _, start_transaction(name="anthropic"):
             message = await client.messages.create(
                 max_tokens=1024, messages=messages, model="model", stream=True
             )
@@ -1193,39 +1734,85 @@ async def test_streaming_create_message_async_api_error(
             async for _ in message:
                 pass
 
-    (event,) = (item.payload for item in items if item.type == "transaction")
-    assert event["transaction"] == "anthropic"
+        (event,) = (item.payload for item in items if item.type == "transaction")
+        assert event["transaction"] == "anthropic"
 
-    spans = [item.payload for item in items if item.type == "span"]
-    span = next(
-        span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-    )
+        spans = [item.payload for item in items if item.type == "span"]
+        span = next(
+            span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        )
 
-    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-    assert span["name"] == "chat model"
-    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+        assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        assert span["name"] == "chat model"
+        assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+        assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
-    assert (
-        span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-        == '[{"role": "user", "content": "Hello, Claude"}]'
-    )
-    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!"
-
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
-    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
-    assert (
-        span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID]
-        == "msg_01XFDUDYJgAACzvnptvVoYEL"
-    )
+        assert (
+            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+            == '[{"role": "user", "content": "Hello, Claude"}]'
+        )
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!"
+
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+        assert (
+            span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID]
+            == "msg_01XFDUDYJgAACzvnptvVoYEL"
+        )
+
+        assert span["status"] == "error"
+    else:
+        events = capture_events()
+
+        with pytest.raises(APIStatusError), mock.patch.object(
+            client._client,
+            "send",
+            return_value=response,
+        ) as _, start_transaction(name="anthropic"):
+            message = await client.messages.create(
+                max_tokens=1024, messages=messages, model="model", stream=True
+            )
+
+            async for _ in message:
+                pass
+
+        assert len(events) == 1
+        (event,) = events
+
+        assert event["type"] == "transaction"
+        assert event["transaction"] == "anthropic"
+
+        span = next(span for span in event["spans"] if span["op"] == OP.GEN_AI_CHAT)
+
+        assert span["op"] == OP.GEN_AI_CHAT
+        assert span["description"] == "chat model"
+        assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+        assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+
+        assert (
+            span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+            == '[{"role": "user", "content": "Hello, Claude"}]'
+        )
+        assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!"
+
+        assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+        assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
+        assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
+        assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+        assert (
+            span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL"
+        )
 
-    assert span["status"] == "error"
+        assert span["status"] == "internal_error"
+        assert span["tags"]["status"] == "internal_error"
     assert event["contexts"]["trace"]["status"] == "internal_error"
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
 @pytest.mark.parametrize(
     "send_default_pii, include_prompts",
@@ -1238,12 +1825,14 @@ async def test_streaming_create_message_async_api_error(
 )
 async def test_stream_message_async(
     sentry_init,
+    capture_events,
     capture_items,
     send_default_pii,
     include_prompts,
     get_model_response,
     async_iterator,
     server_side_event_chunks,
+    stream_gen_ai_spans,
 ):
     client = AsyncAnthropic(api_key="z")
 
@@ -1290,8 +1879,8 @@ async def test_stream_message_async(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -1300,12 +1889,14 @@ async def test_stream_message_async(
         }
     ]
 
-    with mock.patch.object(
-        client._client,
-        "send",
-        return_value=response,
-    ) as _:
-        with start_transaction(name="anthropic"):
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with mock.patch.object(
+            client._client,
+            "send",
+            return_value=response,
+        ) as _, start_transaction(name="anthropic"):
             async with client.messages.stream(
                 max_tokens=1024,
                 messages=messages,
@@ -1314,40 +1905,91 @@ async def test_stream_message_async(
                 async for event in stream:
                     pass
 
-    (event,) = (item.payload for item in items if item.type == "transaction")
-    assert event["transaction"] == "anthropic"
+        (event,) = (item.payload for item in items if item.type == "transaction")
+        assert event["transaction"] == "anthropic"
+
+        spans = [item.payload for item in items if item.type == "span"]
+        assert len(spans) == 1
+        (span,) = spans
+
+        assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        assert span["name"] == "chat model"
+        assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+        assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
-    spans = [item.payload for item in items if item.type == "span"]
-    assert len(spans) == 1
-    (span,) = spans
+        if send_default_pii and include_prompts:
+            assert (
+                span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+                == '[{"role": "user", "content": "Hello, Claude"}]'
+            )
+            assert (
+                span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!"
+            )
 
-    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-    assert span["name"] == "chat model"
-    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+        else:
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
 
-    if send_default_pii and include_prompts:
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
         assert (
-            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-            == '[{"role": "user", "content": "Hello, Claude"}]'
+            span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID]
+            == "msg_01XFDUDYJgAACzvnptvVoYEL"
         )
-        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!"
-
     else:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
-
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20
-    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
-    assert (
-        span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID]
-        == "msg_01XFDUDYJgAACzvnptvVoYEL"
-    )
+        events = capture_events()
+
+        with mock.patch.object(
+            client._client,
+            "send",
+            return_value=response,
+        ) as _, start_transaction(name="anthropic"):
+            async with client.messages.stream(
+                max_tokens=1024,
+                messages=messages,
+                model="model",
+            ) as stream:
+                async for event in stream:
+                    pass
+        assert len(events) == 1
+        (event,) = events
+
+        assert event["type"] == "transaction"
+        assert event["transaction"] == "anthropic"
+
+        assert len(event["spans"]) == 1
+        (span,) = event["spans"]
+
+        assert span["op"] == OP.GEN_AI_CHAT
+        assert span["description"] == "chat model"
+        assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+        assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+
+        if send_default_pii and include_prompts:
+            assert (
+                span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+                == '[{"role": "user", "content": "Hello, Claude"}]'
+            )
+            assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!"
+
+        else:
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+
+        assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+        assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
+        assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20
+        assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+        assert (
+            span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL"
+        )
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.skipif(
     ANTHROPIC_VERSION < (0, 41),
     reason="Error classes moved in https://github.com/anthropics/anthropic-sdk-python/commit/4e0b15e22fe40e9aa513459564f641bf97c90954.",
@@ -1355,10 +1997,12 @@ async def test_stream_message_async(
 @pytest.mark.asyncio
 async def test_stream_messages_async_api_error(
     sentry_init,
+    capture_events,
     capture_items,
     get_model_response,
     async_iterator,
     server_side_event_chunks,
+    stream_gen_ai_spans,
 ):
     client = AsyncAnthropic(api_key="z")
 
@@ -1400,8 +2044,8 @@ async def test_stream_messages_async_api_error(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -1410,12 +2054,14 @@ async def test_stream_messages_async_api_error(
         }
     ]
 
-    with pytest.raises(APIStatusError), mock.patch.object(
-        client._client,
-        "send",
-        return_value=response,
-    ) as _:
-        with start_transaction(name="anthropic"):
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with pytest.raises(APIStatusError), mock.patch.object(
+            client._client,
+            "send",
+            return_value=response,
+        ) as _, start_transaction(name="anthropic"):
             async with client.messages.stream(
                 max_tokens=1024,
                 messages=messages,
@@ -1424,46 +2070,95 @@ async def test_stream_messages_async_api_error(
                 async for event in stream:
                     pass
 
-    (event,) = (item.payload for item in items if item.type == "transaction")
-    assert event["transaction"] == "anthropic"
+        (event,) = (item.payload for item in items if item.type == "transaction")
+        assert event["transaction"] == "anthropic"
 
-    spans = [item.payload for item in items if item.type == "span"]
-    span = next(
-        span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-    )
+        spans = [item.payload for item in items if item.type == "span"]
+        span = next(
+            span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        )
 
-    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-    assert span["name"] == "chat model"
-    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+        assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        assert span["name"] == "chat model"
+        assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+        assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
-    assert (
-        span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-        == '[{"role": "user", "content": "Hello, Claude"}]'
-    )
-    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!"
-
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
-    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
-    assert (
-        span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID]
-        == "msg_01XFDUDYJgAACzvnptvVoYEL"
-    )
+        assert (
+            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+            == '[{"role": "user", "content": "Hello, Claude"}]'
+        )
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!"
+
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+        assert (
+            span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID]
+            == "msg_01XFDUDYJgAACzvnptvVoYEL"
+        )
 
-    assert span["status"] == "error"
+        assert span["status"] == "error"
+    else:
+        events = capture_events()
+
+        with pytest.raises(APIStatusError), mock.patch.object(
+            client._client,
+            "send",
+            return_value=response,
+        ) as _, start_transaction(name="anthropic"):
+            async with client.messages.stream(
+                max_tokens=1024,
+                messages=messages,
+                model="model",
+            ) as stream:
+                async for event in stream:
+                    pass
+
+        assert len(events) == 1
+        (event,) = events
+
+        assert event["type"] == "transaction"
+        assert event["transaction"] == "anthropic"
+
+        span = next(span for span in event["spans"] if span["op"] == OP.GEN_AI_CHAT)
+
+        assert span["op"] == OP.GEN_AI_CHAT
+        assert span["description"] == "chat model"
+        assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+        assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+
+        assert (
+            span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+            == '[{"role": "user", "content": "Hello, Claude"}]'
+        )
+        assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!"
+
+        assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+        assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
+        assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
+        assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+        assert (
+            span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL"
+        )
+
+        assert span["status"] == "internal_error"
+        assert span["tags"]["status"] == "internal_error"
     assert event["contexts"]["trace"]["status"] == "internal_error"
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
 async def test_stream_messages_async_close(
     sentry_init,
+    capture_events,
     capture_items,
     get_model_response,
     async_iterator,
     server_side_event_chunks,
+    stream_gen_ai_spans,
 ):
     client = AsyncAnthropic(api_key="z")
 
@@ -1510,8 +2205,8 @@ async def test_stream_messages_async_close(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -1520,12 +2215,14 @@ async def test_stream_messages_async_close(
         }
     ]
 
-    with mock.patch.object(
-        client._client,
-        "send",
-        return_value=response,
-    ) as _:
-        with start_transaction(name="anthropic"):
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with mock.patch.object(
+            client._client,
+            "send",
+            return_value=response,
+        ) as _, start_transaction(name="anthropic"):
             async with client.messages.stream(
                 max_tokens=1024,
                 messages=messages,
@@ -1542,36 +2239,88 @@ async def test_stream_messages_async_close(
 
                 await stream.close()
 
-    (event,) = (item.payload for item in items if item.type == "transaction")
-    assert event["transaction"] == "anthropic"
+        (event,) = (item.payload for item in items if item.type == "transaction")
+        assert event["transaction"] == "anthropic"
 
-    spans = [item.payload for item in items if item.type == "span"]
-    span = next(
-        span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-    )
+        spans = [item.payload for item in items if item.type == "span"]
+        span = next(
+            span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        )
 
-    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-    assert span["name"] == "chat model"
-    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+        assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        assert span["name"] == "chat model"
+        assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+        assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
-    assert (
-        span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-        == '[{"role": "user", "content": "Hello, Claude"}]'
-    )
-    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!"
-
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
-    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
-    assert (
-        span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID]
-        == "msg_01XFDUDYJgAACzvnptvVoYEL"
-    )
+        assert (
+            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+            == '[{"role": "user", "content": "Hello, Claude"}]'
+        )
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!"
+
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+        assert (
+            span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID]
+            == "msg_01XFDUDYJgAACzvnptvVoYEL"
+        )
+    else:
+        events = capture_events()
+
+        with mock.patch.object(
+            client._client,
+            "send",
+            return_value=response,
+        ) as _, start_transaction(name="anthropic"):
+            async with client.messages.stream(
+                max_tokens=1024,
+                messages=messages,
+                model="model",
+            ) as stream:
+                for _ in range(4):
+                    await stream.__anext__()
+
+                # New versions add TextEvent, so consume one more event.
+                if TextEvent is not None and isinstance(
+                    await stream.__anext__(), TextEvent
+                ):
+                    await stream.__anext__()
+
+                await stream.close()
+
+        assert len(events) == 1
+        (event,) = events
+
+        assert event["type"] == "transaction"
+        assert event["transaction"] == "anthropic"
+
+        span = next(span for span in event["spans"] if span["op"] == OP.GEN_AI_CHAT)
+
+        assert span["op"] == OP.GEN_AI_CHAT
+        assert span["description"] == "chat model"
+        assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+        assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+
+        assert (
+            span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+            == '[{"role": "user", "content": "Hello, Claude"}]'
+        )
+        assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!"
+
+        assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+        assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
+        assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
+        assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+        assert (
+            span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL"
+        )
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.skipif(
     ANTHROPIC_VERSION < (0, 27),
     reason="Versions <0.27.0 do not include InputJSONDelta, which was introduced in >=0.27.0 along with a new message delta type for tool calling.",
@@ -1587,11 +2336,13 @@ async def test_stream_messages_async_close(
 )
 def test_streaming_create_message_with_input_json_delta(
     sentry_init,
+    capture_events,
     capture_items,
     send_default_pii,
     include_prompts,
     get_model_response,
     server_side_event_chunks,
+    stream_gen_ai_spans,
 ):
     client = Anthropic(api_key="z")
 
@@ -1666,8 +2417,8 @@ def test_streaming_create_message_with_input_json_delta(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -1676,12 +2427,14 @@ def test_streaming_create_message_with_input_json_delta(
         }
     ]
 
-    with mock.patch.object(
-        client._client,
-        "send",
-        return_value=response,
-    ) as _:
-        with start_transaction(name="anthropic"):
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with mock.patch.object(
+            client._client,
+            "send",
+            return_value=response,
+        ) as _, start_transaction(name="anthropic"):
             message = client.messages.create(
                 max_tokens=1024, messages=messages, model="model", stream=True
             )
@@ -1689,38 +2442,87 @@ def test_streaming_create_message_with_input_json_delta(
             for _ in message:
                 pass
 
-    (event,) = (item.payload for item in items if item.type == "transaction")
-    assert event["transaction"] == "anthropic"
+        (event,) = (item.payload for item in items if item.type == "transaction")
+        assert event["transaction"] == "anthropic"
 
-    spans = [item.payload for item in items if item.type == "span"]
-    assert len(spans) == 1
-    (span,) = spans
+        spans = [item.payload for item in items if item.type == "span"]
+        assert len(spans) == 1
+        (span,) = spans
 
-    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-    assert span["name"] == "chat model"
-    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+        assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        assert span["name"] == "chat model"
+        assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+        assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+
+        if send_default_pii and include_prompts:
+            assert (
+                span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+                == '[{"role": "user", "content": "What is the weather like in San Francisco?"}]'
+            )
+            assert (
+                span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+                == '{"location": "San Francisco, CA"}'
+            )
+        else:
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
+
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 366
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 41
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 407
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
 
-    if send_default_pii and include_prompts:
-        assert (
-            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-            == '[{"role": "user", "content": "What is the weather like in San Francisco?"}]'
-        )
-        assert (
-            span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
-            == '{"location": "San Francisco, CA"}'
-        )
     else:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
+        events = capture_events()
+
+        with mock.patch.object(
+            client._client,
+            "send",
+            return_value=response,
+        ) as _, start_transaction(name="anthropic"):
+            message = client.messages.create(
+                max_tokens=1024, messages=messages, model="model", stream=True
+            )
+
+            for _ in message:
+                pass
+
+        assert len(events) == 1
+        (event,) = events
+
+        assert event["type"] == "transaction"
+        assert event["transaction"] == "anthropic"
+
+        assert len(event["spans"]) == 1
+        (span,) = event["spans"]
+
+        assert span["op"] == OP.GEN_AI_CHAT
+        assert span["description"] == "chat model"
+        assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+        assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+
+        if send_default_pii and include_prompts:
+            assert (
+                span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+                == '[{"role": "user", "content": "What is the weather like in San Francisco?"}]'
+            )
+            assert (
+                span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+                == '{"location": "San Francisco, CA"}'
+            )
+        else:
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
 
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 366
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 41
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 407
-    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+        assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 366
+        assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 41
+        assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 407
+        assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.skipif(
     ANTHROPIC_VERSION < (0, 27),
     reason="Versions <0.27.0 do not include InputJSONDelta, which was introduced in >=0.27.0 along with a new message delta type for tool calling.",
@@ -1736,11 +2538,13 @@ def test_streaming_create_message_with_input_json_delta(
 )
 def test_stream_messages_with_input_json_delta(
     sentry_init,
+    capture_events,
     capture_items,
     send_default_pii,
     include_prompts,
     get_model_response,
     server_side_event_chunks,
+    stream_gen_ai_spans,
 ):
     client = Anthropic(api_key="z")
 
@@ -1815,8 +2619,8 @@ def test_stream_messages_with_input_json_delta(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -1825,52 +2629,101 @@ def test_stream_messages_with_input_json_delta(
         }
     ]
 
-    with mock.patch.object(
-        client._client,
-        "send",
-        return_value=response,
-    ) as _:
-        with start_transaction(name="anthropic"):
-            with client.messages.stream(
-                max_tokens=1024,
-                messages=messages,
-                model="model",
-            ) as stream:
-                for event in stream:
-                    pass
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
 
-    (event,) = (item.payload for item in items if item.type == "transaction")
-    assert event["transaction"] == "anthropic"
+        with mock.patch.object(
+            client._client,
+            "send",
+            return_value=response,
+        ) as _, start_transaction(name="anthropic"), client.messages.stream(
+            max_tokens=1024,
+            messages=messages,
+            model="model",
+        ) as stream:
+            for event in stream:
+                pass
 
-    spans = [item.payload for item in items if item.type == "span"]
-    assert len(spans) == 1
-    (span,) = spans
+        (event,) = (item.payload for item in items if item.type == "transaction")
+        assert event["transaction"] == "anthropic"
 
-    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-    assert span["name"] == "chat model"
-    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+        spans = [item.payload for item in items if item.type == "span"]
+        assert len(spans) == 1
+        (span,) = spans
 
-    if send_default_pii and include_prompts:
-        assert (
-            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-            == '[{"role": "user", "content": "What is the weather like in San Francisco?"}]'
-        )
-        assert (
-            span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
-            == '{"location": "San Francisco, CA"}'
-        )
+        assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        assert span["name"] == "chat model"
+        assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+        assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+
+        if send_default_pii and include_prompts:
+            assert (
+                span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+                == '[{"role": "user", "content": "What is the weather like in San Francisco?"}]'
+            )
+            assert (
+                span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+                == '{"location": "San Francisco, CA"}'
+            )
+        else:
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
+
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 366
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 41
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 407
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
     else:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
+        events = capture_events()
+
+        with mock.patch.object(
+            client._client,
+            "send",
+            return_value=response,
+        ) as _, start_transaction(name="anthropic"), client.messages.stream(
+            max_tokens=1024,
+            messages=messages,
+            model="model",
+        ) as stream:
+            for event in stream:
+                pass
+
+        assert len(events) == 1
+        (event,) = events
+
+        assert event["type"] == "transaction"
+        assert event["transaction"] == "anthropic"
+
+        assert len(event["spans"]) == 1
+        (span,) = event["spans"]
+
+        assert span["op"] == OP.GEN_AI_CHAT
+        assert span["description"] == "chat model"
+        assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+        assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 366
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 41
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 407
-    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+        if send_default_pii and include_prompts:
+            assert (
+                span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+                == '[{"role": "user", "content": "What is the weather like in San Francisco?"}]'
+            )
+            assert (
+                span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+                == '{"location": "San Francisco, CA"}'
+            )
+        else:
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
 
+        assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 366
+        assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 41
+        assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 407
+        assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
 
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
 @pytest.mark.skipif(
     ANTHROPIC_VERSION < (0, 27),
@@ -1887,12 +2740,14 @@ def test_stream_messages_with_input_json_delta(
 )
 async def test_streaming_create_message_with_input_json_delta_async(
     sentry_init,
+    capture_events,
     capture_items,
     send_default_pii,
     include_prompts,
     get_model_response,
     async_iterator,
     server_side_event_chunks,
+    stream_gen_ai_spans,
 ):
     client = AsyncAnthropic(api_key="z")
     response = get_model_response(
@@ -1972,8 +2827,8 @@ async def test_streaming_create_message_with_input_json_delta_async(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -1982,12 +2837,14 @@ async def test_streaming_create_message_with_input_json_delta_async(
         }
     ]
 
-    with mock.patch.object(
-        client._client,
-        "send",
-        return_value=response,
-    ) as _:
-        with start_transaction(name="anthropic"):
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with mock.patch.object(
+            client._client,
+            "send",
+            return_value=response,
+        ) as _, start_transaction(name="anthropic"):
             message = await client.messages.create(
                 max_tokens=1024, messages=messages, model="model", stream=True
             )
@@ -1995,39 +2852,88 @@ async def test_streaming_create_message_with_input_json_delta_async(
             async for _ in message:
                 pass
 
-    (event,) = (item.payload for item in items if item.type == "transaction")
-    assert event["transaction"] == "anthropic"
+        (event,) = (item.payload for item in items if item.type == "transaction")
+        assert event["transaction"] == "anthropic"
 
-    spans = [item.payload for item in items if item.type == "span"]
-    assert len(spans) == 1
-    (span,) = spans
+        spans = [item.payload for item in items if item.type == "span"]
+        assert len(spans) == 1
+        (span,) = spans
 
-    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-    assert span["name"] == "chat model"
-    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+        assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        assert span["name"] == "chat model"
+        assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+        assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
-    if send_default_pii and include_prompts:
-        assert (
-            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-            == '[{"role": "user", "content": "What is the weather like in San Francisco?"}]'
-        )
-        assert (
-            span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
-            == '{"location": "San Francisco, CA"}'
-        )
+        if send_default_pii and include_prompts:
+            assert (
+                span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+                == '[{"role": "user", "content": "What is the weather like in San Francisco?"}]'
+            )
+            assert (
+                span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+                == '{"location": "San Francisco, CA"}'
+            )
 
+        else:
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
+
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 366
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 41
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 407
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
     else:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
+        events = capture_events()
+
+        with mock.patch.object(
+            client._client,
+            "send",
+            return_value=response,
+        ) as _, start_transaction(name="anthropic"):
+            message = await client.messages.create(
+                max_tokens=1024, messages=messages, model="model", stream=True
+            )
+
+            async for _ in message:
+                pass
+
+        assert len(events) == 1
+        (event,) = events
+
+        assert event["type"] == "transaction"
+        assert event["transaction"] == "anthropic"
+
+        assert len(event["spans"]) == 1
+        (span,) = event["spans"]
+
+        assert span["op"] == OP.GEN_AI_CHAT
+        assert span["description"] == "chat model"
+        assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+        assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+
+        if send_default_pii and include_prompts:
+            assert (
+                span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+                == '[{"role": "user", "content": "What is the weather like in San Francisco?"}]'
+            )
+            assert (
+                span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+                == '{"location": "San Francisco, CA"}'
+            )
 
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 366
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 41
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 407
-    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+        else:
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
 
+        assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 366
+        assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 41
+        assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 407
+        assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
 
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
 @pytest.mark.skipif(
     ANTHROPIC_VERSION < (0, 27),
@@ -2044,12 +2950,14 @@ async def test_streaming_create_message_with_input_json_delta_async(
 )
 async def test_stream_message_with_input_json_delta_async(
     sentry_init,
+    capture_events,
     capture_items,
     send_default_pii,
     include_prompts,
     get_model_response,
     async_iterator,
     server_side_event_chunks,
+    stream_gen_ai_spans,
 ):
     client = AsyncAnthropic(api_key="z")
     response = get_model_response(
@@ -2129,8 +3037,8 @@ async def test_stream_message_with_input_json_delta_async(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -2139,12 +3047,14 @@ async def test_stream_message_with_input_json_delta_async(
         }
     ]
 
-    with mock.patch.object(
-        client._client,
-        "send",
-        return_value=response,
-    ) as _:
-        with start_transaction(name="anthropic"):
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with mock.patch.object(
+            client._client,
+            "send",
+            return_value=response,
+        ) as _, start_transaction(name="anthropic"):
             async with client.messages.stream(
                 max_tokens=1024,
                 messages=messages,
@@ -2153,76 +3063,132 @@ async def test_stream_message_with_input_json_delta_async(
                 async for event in stream:
                     pass
 
-    (event,) = (item.payload for item in items if item.type == "transaction")
-    assert event["transaction"] == "anthropic"
-
-    spans = [item.payload for item in items if item.type == "span"]
-    assert len(spans) == 1
-    (span,) = spans
+        (event,) = (item.payload for item in items if item.type == "transaction")
+        assert event["transaction"] == "anthropic"
 
-    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-    assert span["name"] == "chat model"
-    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+        spans = [item.payload for item in items if item.type == "span"]
+        assert len(spans) == 1
+        (span,) = spans
 
-    if send_default_pii and include_prompts:
-        assert (
-            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-            == '[{"role": "user", "content": "What is the weather like in San Francisco?"}]'
-        )
-        assert (
-            span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
-            == '{"location": "San Francisco, CA"}'
-        )
+        assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        assert span["name"] == "chat model"
+        assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+        assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
+        if send_default_pii and include_prompts:
+            assert (
+                span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+                == '[{"role": "user", "content": "What is the weather like in San Francisco?"}]'
+            )
+            assert (
+                span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+                == '{"location": "San Francisco, CA"}'
+            )
+        else:
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
+
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 366
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 41
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 407
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
     else:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
-
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 366
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 41
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 407
-    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+        events = capture_events()
 
+        with mock.patch.object(
+            client._client,
+            "send",
+            return_value=response,
+        ) as _, start_transaction(name="anthropic"):
+            async with client.messages.stream(
+                max_tokens=1024,
+                messages=messages,
+                model="model",
+            ) as stream:
+                async for event in stream:
+                    pass
 
-def test_exception_message_create(sentry_init, capture_items):
+        assert len(events) == 1
+        (event,) = events
+
+        assert event["type"] == "transaction"
+        assert event["transaction"] == "anthropic"
+
+        assert len(event["spans"]) == 1
+        (span,) = event["spans"]
+
+        assert span["op"] == OP.GEN_AI_CHAT
+        assert span["description"] == "chat model"
+        assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+        assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+
+        if send_default_pii and include_prompts:
+            if stream_gen_ai_spans:
+                assert (
+                    span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+                    == '[{"role": "user", "content": "What is the weather like in San Francisco?"}]'
+                )
+                assert (
+                    span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+                    == '{"location": "San Francisco, CA"}'
+                )
+            else:
+                assert (
+                    span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+                    == '[{"role": "user", "content": "What is the weather like in San Francisco?"}]'
+                )
+                assert (
+                    span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+                    == '{"location": "San Francisco, CA"}'
+                )
+
+        else:
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+
+        assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 366
+        assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 41
+        assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 407
+        assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_exception_message_create(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     sentry_init(
         integrations=[AnthropicIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("event", "transaction")
 
     client = Anthropic(api_key="z")
     client.messages._post = mock.Mock(
         side_effect=AnthropicError("API rate limit reached")
     )
-    with pytest.raises(AnthropicError):
-        client.messages.create(
-            model="some-model",
-            messages=[{"role": "system", "content": "I'm throwing an exception"}],
-            max_tokens=1024,
-        )
 
-    (event,) = (item.payload for item in items if item.type == "event")
-    assert event["level"] == "error"
+    if stream_gen_ai_spans:
+        items = capture_items("event", "transaction")
 
-    (transaction,) = (item.payload for item in items if item.type == "transaction")
-    assert transaction["contexts"]["trace"]["status"] == "internal_error"
+        with pytest.raises(AnthropicError):
+            client.messages.create(
+                model="some-model",
+                messages=[{"role": "system", "content": "I'm throwing an exception"}],
+                max_tokens=1024,
+            )
 
+        (event,) = (item.payload for item in items if item.type == "event")
+        assert event["level"] == "error"
 
-def test_span_status_error(sentry_init, capture_items):
-    sentry_init(
-        integrations=[AnthropicIntegration()],
-        traces_sample_rate=1.0,
-    )
-    items = capture_items("event", "span")
+        (transaction,) = (item.payload for item in items if item.type == "transaction")
+    else:
+        events = capture_events()
 
-    with start_transaction(name="anthropic"):
-        client = Anthropic(api_key="z")
-        client.messages._post = mock.Mock(
-            side_effect=AnthropicError("API rate limit reached")
-        )
         with pytest.raises(AnthropicError):
             client.messages.create(
                 model="some-model",
@@ -2230,76 +3196,195 @@ def test_span_status_error(sentry_init, capture_items):
                 max_tokens=1024,
             )
 
-    (error,) = (item.payload for item in items if item.type == "event")
-    assert error["level"] == "error"
+        (event, transaction) = events
+        assert event["level"] == "error"
 
-    spans = [item.payload for item in items if item.type == "span"]
-    assert spans[0]["status"] == "error"
-    assert spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert spans[0]["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert transaction["contexts"]["trace"]["status"] == "internal_error"
 
 
-@pytest.mark.asyncio
-async def test_span_status_error_async(sentry_init, capture_items):
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_span_status_error(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     sentry_init(
         integrations=[AnthropicIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("event", "span")
+    if stream_gen_ai_spans:
+        items = capture_items("event", "span")
 
-    with start_transaction(name="anthropic"):
-        client = AsyncAnthropic(api_key="z")
-        client.messages._post = AsyncMock(
-            side_effect=AnthropicError("API rate limit reached")
-        )
-        with pytest.raises(AnthropicError):
-            await client.messages.create(
-                model="some-model",
-                messages=[{"role": "system", "content": "I'm throwing an exception"}],
-                max_tokens=1024,
+        with start_transaction(name="anthropic"):
+            client = Anthropic(api_key="z")
+            client.messages._post = mock.Mock(
+                side_effect=AnthropicError("API rate limit reached")
             )
+            with pytest.raises(AnthropicError):
+                client.messages.create(
+                    model="some-model",
+                    messages=[
+                        {"role": "system", "content": "I'm throwing an exception"}
+                    ],
+                    max_tokens=1024,
+                )
+
+        (error,) = (item.payload for item in items if item.type == "event")
+        assert error["level"] == "error"
+
+        spans = [item.payload for item in items if item.type == "span"]
+        assert spans[0]["status"] == "error"
+        assert spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+        assert spans[0]["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    else:
+        events = capture_events()
 
-    (error,) = (item.payload for item in items if item.type == "event")
-    assert error["level"] == "error"
-
-    spans = [item.payload for item in items if item.type == "span"]
-    assert spans[0]["status"] == "error"
-    assert spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert spans[0]["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+        with start_transaction(name="anthropic"):
+            client = Anthropic(api_key="z")
+            client.messages._post = mock.Mock(
+                side_effect=AnthropicError("API rate limit reached")
+            )
+            with pytest.raises(AnthropicError):
+                client.messages.create(
+                    model="some-model",
+                    messages=[
+                        {"role": "system", "content": "I'm throwing an exception"}
+                    ],
+                    max_tokens=1024,
+                )
+
+        (error, transaction) = events
+        assert error["level"] == "error"
+        assert transaction["spans"][0]["status"] == "internal_error"
+        assert transaction["spans"][0]["tags"]["status"] == "internal_error"
+        assert transaction["spans"][0]["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+        assert transaction["spans"][0]["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+@pytest.mark.asyncio
+async def test_span_status_error_async(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
+    sentry_init(
+        integrations=[AnthropicIntegration()],
+        traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+    )
+    if stream_gen_ai_spans:
+        items = capture_items("event", "span")
 
+        with start_transaction(name="anthropic"):
+            client = AsyncAnthropic(api_key="z")
+            client.messages._post = AsyncMock(
+                side_effect=AnthropicError("API rate limit reached")
+            )
+            with pytest.raises(AnthropicError):
+                await client.messages.create(
+                    model="some-model",
+                    messages=[
+                        {"role": "system", "content": "I'm throwing an exception"}
+                    ],
+                    max_tokens=1024,
+                )
+
+        (error,) = (item.payload for item in items if item.type == "event")
+        assert error["level"] == "error"
+
+        spans = [item.payload for item in items if item.type == "span"]
+        assert spans[0]["status"] == "error"
+        assert spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+        assert spans[0]["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    else:
+        events = capture_events()
 
+        with start_transaction(name="anthropic"):
+            client = AsyncAnthropic(api_key="z")
+            client.messages._post = AsyncMock(
+                side_effect=AnthropicError("API rate limit reached")
+            )
+            with pytest.raises(AnthropicError):
+                await client.messages.create(
+                    model="some-model",
+                    messages=[
+                        {"role": "system", "content": "I'm throwing an exception"}
+                    ],
+                    max_tokens=1024,
+                )
+
+        (error, transaction) = events
+        assert error["level"] == "error"
+        assert transaction["spans"][0]["status"] == "internal_error"
+        assert transaction["spans"][0]["tags"]["status"] == "internal_error"
+        assert transaction["spans"][0]["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+        assert transaction["spans"][0]["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
-async def test_exception_message_create_async(sentry_init, capture_items):
+async def test_exception_message_create_async(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     sentry_init(
         integrations=[AnthropicIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("event", "transaction")
 
     client = AsyncAnthropic(api_key="z")
     client.messages._post = AsyncMock(
         side_effect=AnthropicError("API rate limit reached")
     )
-    with pytest.raises(AnthropicError):
-        await client.messages.create(
-            model="some-model",
-            messages=[{"role": "system", "content": "I'm throwing an exception"}],
-            max_tokens=1024,
-        )
 
-    (event,) = (item.payload for item in items if item.type == "event")
-    assert event["level"] == "error"
+    if stream_gen_ai_spans:
+        items = capture_items("event", "transaction")
+
+        with pytest.raises(AnthropicError):
+            await client.messages.create(
+                model="some-model",
+                messages=[{"role": "system", "content": "I'm throwing an exception"}],
+                max_tokens=1024,
+            )
+
+        (event,) = (item.payload for item in items if item.type == "event")
+        assert event["level"] == "error"
+
+        (transaction,) = (item.payload for item in items if item.type == "transaction")
+    else:
+        events = capture_events()
+
+        with pytest.raises(AnthropicError):
+            await client.messages.create(
+                model="some-model",
+                messages=[{"role": "system", "content": "I'm throwing an exception"}],
+                max_tokens=1024,
+            )
 
-    (transaction,) = (item.payload for item in items if item.type == "transaction")
+        (event, transaction) = events
+        assert event["level"] == "error"
     assert transaction["contexts"]["trace"]["status"] == "internal_error"
 
 
-def test_span_origin(sentry_init, capture_items):
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_span_origin(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     sentry_init(
         integrations=[AnthropicIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     client = Anthropic(api_key="z")
     client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE)
@@ -2311,25 +3396,45 @@ def test_span_origin(sentry_init, capture_items):
         }
     ]
 
-    with start_transaction(name="anthropic"):
-        client.messages.create(max_tokens=1024, messages=messages, model="model")
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with start_transaction(name="anthropic"):
+            client.messages.create(max_tokens=1024, messages=messages, model="model")
+
+        (event,) = (item.payload for item in items if item.type == "transaction")
+        assert event["contexts"]["trace"]["origin"] == "manual"
+
+        spans = [item.payload for item in items if item.type == "span"]
+        assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.anthropic"
+        assert spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+        assert spans[0]["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    else:
+        events = capture_events()
 
-    (event,) = (item.payload for item in items if item.type == "transaction")
-    assert event["contexts"]["trace"]["origin"] == "manual"
+        with start_transaction(name="anthropic"):
+            client.messages.create(max_tokens=1024, messages=messages, model="model")
 
-    spans = [item.payload for item in items if item.type == "span"]
-    assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.anthropic"
-    assert spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert spans[0]["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+        (event,) = events
+        assert event["contexts"]["trace"]["origin"] == "manual"
+        assert event["spans"][0]["origin"] == "auto.ai.anthropic"
+        assert event["spans"][0]["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+        assert event["spans"][0]["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
-async def test_span_origin_async(sentry_init, capture_items):
+async def test_span_origin_async(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     sentry_init(
         integrations=[AnthropicIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     client = AsyncAnthropic(api_key="z")
     client.messages._post = AsyncMock(return_value=EXAMPLE_MESSAGE)
@@ -2341,16 +3446,35 @@ async def test_span_origin_async(sentry_init, capture_items):
         }
     ]
 
-    with start_transaction(name="anthropic"):
-        await client.messages.create(max_tokens=1024, messages=messages, model="model")
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with start_transaction(name="anthropic"):
+            await client.messages.create(
+                max_tokens=1024, messages=messages, model="model"
+            )
+
+        (event,) = (item.payload for item in items if item.type == "transaction")
+        assert event["contexts"]["trace"]["origin"] == "manual"
+
+        spans = [item.payload for item in items if item.type == "span"]
+        assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.anthropic"
+        assert spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+        assert spans[0]["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    else:
+        events = capture_events()
+
+        with start_transaction(name="anthropic"):
+            await client.messages.create(
+                max_tokens=1024, messages=messages, model="model"
+            )
 
-    (event,) = (item.payload for item in items if item.type == "transaction")
-    assert event["contexts"]["trace"]["origin"] == "manual"
+        (event,) = events
 
-    spans = [item.payload for item in items if item.type == "span"]
-    assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.anthropic"
-    assert spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert spans[0]["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+        assert event["contexts"]["trace"]["origin"] == "manual"
+        assert event["spans"][0]["origin"] == "auto.ai.anthropic"
+        assert event["spans"][0]["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+        assert event["spans"][0]["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
 
 
 @pytest.mark.skipif(
@@ -2418,6 +3542,7 @@ def test_set_output_data_with_input_json_delta(sentry_init):
 
 
 # Test messages with mixed roles including "ai" that should be mapped to "assistant"
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.parametrize(
     "test_message,expected_role",
     [
@@ -2434,15 +3559,20 @@ def test_set_output_data_with_input_json_delta(sentry_init):
     ],
 )
 def test_anthropic_message_role_mapping(
-    sentry_init, capture_items, test_message, expected_role
+    sentry_init,
+    capture_events,
+    capture_items,
+    test_message,
+    expected_role,
+    stream_gen_ai_spans,
 ):
     """Test that Anthropic integration properly maps message roles like 'ai' to 'assistant'"""
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     client = Anthropic(api_key="z")
 
@@ -2462,33 +3592,63 @@ def mock_messages_create(*args, **kwargs):
 
     test_messages = [test_message]
 
-    with start_transaction(name="anthropic tx"):
-        client.messages.create(
-            model="claude-3-opus", max_tokens=10, messages=test_messages
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with start_transaction(name="anthropic tx"):
+            client.messages.create(
+                model="claude-3-opus", max_tokens=10, messages=test_messages
+            )
+
+        span = next(item.payload for item in items if item.type == "span")
+
+        # Verify that the span was created correctly
+        assert span["attributes"]["sentry.op"] == "gen_ai.chat"
+        assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+        assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]
+
+        # Parse the stored messages
+        stored_messages = json.loads(
+            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
         )
+    else:
+        events = capture_events()
+
+        with start_transaction(name="anthropic tx"):
+            client.messages.create(
+                model="claude-3-opus", max_tokens=10, messages=test_messages
+            )
 
-    span = next(item.payload for item in items if item.type == "span")
+        (event,) = events
+        span = event["spans"][0]
 
-    # Verify that the span was created correctly
-    assert span["attributes"]["sentry.op"] == "gen_ai.chat"
-    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]
+        # Verify that the span was created correctly
+        assert span["op"] == "gen_ai.chat"
+        assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+        assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]
 
-    # Parse the stored messages
-    stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+        # Parse the stored messages
+        stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
 
     assert stored_messages[0]["role"] == expected_role
 
 
-def test_anthropic_message_truncation(sentry_init, capture_items):
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_anthropic_message_truncation(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     """Test that large messages are truncated properly in Anthropic integration."""
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     client = Anthropic(api_key="z")
     client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE)
@@ -2504,41 +3664,83 @@ def test_anthropic_message_truncation(sentry_init, capture_items):
         {"role": "user", "content": "small message 5"},
     ]
 
-    with start_transaction():
-        client.messages.create(max_tokens=1024, messages=messages, model="model")
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
 
-    spans = [item.payload for item in items if item.type == "span"]
-    chat_spans = [
-        span for span in spans if span["attributes"].get("sentry.op") == OP.GEN_AI_CHAT
-    ]
-    assert len(chat_spans) > 0
+        with start_transaction():
+            client.messages.create(max_tokens=1024, messages=messages, model="model")
 
-    chat_span = chat_spans[0]
-    assert chat_span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert chat_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["attributes"]
+        spans = [item.payload for item in items if item.type == "span"]
+        chat_spans = [
+            span
+            for span in spans
+            if span["attributes"].get("sentry.op") == OP.GEN_AI_CHAT
+        ]
 
-    messages_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-    assert isinstance(messages_data, str)
+        assert len(chat_spans) > 0
 
-    parsed_messages = json.loads(messages_data)
-    assert isinstance(parsed_messages, list)
-    assert len(parsed_messages) == 1
-    assert "small message 5" in str(parsed_messages[0])
+        chat_span = chat_spans[0]
+        assert chat_span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+        assert chat_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["attributes"]
+
+        messages_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+
+        assert isinstance(messages_data, str)
+
+        parsed_messages = json.loads(messages_data)
+        assert isinstance(parsed_messages, list)
+        assert len(parsed_messages) == 1
+        assert "small message 5" in str(parsed_messages[0])
+
+        tx = next(item.payload for item in items if item.type == "transaction")
+    else:
+        events = capture_events()
+
+        with start_transaction():
+            client.messages.create(max_tokens=1024, messages=messages, model="model")
+
+        assert len(events) > 0
+        tx = events[0]
+        assert tx["type"] == "transaction"
+
+        chat_spans = [
+            span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT
+        ]
+
+        assert len(chat_spans) > 0
+
+        chat_span = chat_spans[0]
+        assert chat_span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+        assert chat_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"]
+
+        messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        assert isinstance(messages_data, str)
+
+        parsed_messages = json.loads(messages_data)
+        assert isinstance(parsed_messages, list)
+        assert len(parsed_messages) == 1
+        assert "small message 5" in str(parsed_messages[0])
 
-    tx = next(item.payload for item in items if item.type == "transaction")
     assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
-async def test_anthropic_message_truncation_async(sentry_init, capture_items):
+async def test_anthropic_message_truncation_async(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     """Test that large messages are truncated properly in Anthropic integration."""
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     client = AsyncAnthropic(api_key="z")
     client.messages._post = mock.AsyncMock(return_value=EXAMPLE_MESSAGE)
@@ -2554,21 +3756,44 @@ async def test_anthropic_message_truncation_async(sentry_init, capture_items):
         {"role": "user", "content": "small message 5"},
     ]
 
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+    else:
+        events = capture_events()
+
     with start_transaction():
         await client.messages.create(max_tokens=1024, messages=messages, model="model")
 
-    spans = [item.payload for item in items if item.type == "span"]
-    chat_spans = [
-        span for span in spans if span["attributes"].get("sentry.op") == OP.GEN_AI_CHAT
-    ]
+    if stream_gen_ai_spans:
+        spans = [item.payload for item in items if item.type == "span"]
+        chat_spans = [
+            span
+            for span in spans
+            if span["attributes"].get("sentry.op") == OP.GEN_AI_CHAT
+        ]
+    else:
+        assert len(events) > 0
+        tx = events[0]
+        assert tx["type"] == "transaction"
+
+        chat_spans = [
+            span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT
+        ]
     assert len(chat_spans) > 0
 
     chat_span = chat_spans[0]
-    assert chat_span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert chat_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["attributes"]
+    if stream_gen_ai_spans:
+        assert chat_span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+        assert chat_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["attributes"]
 
-    messages_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        messages_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+    else:
+        assert chat_span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+        assert chat_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"]
+
+        messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
     assert isinstance(messages_data, str)
 
     parsed_messages = json.loads(messages_data)
@@ -2576,10 +3801,14 @@ async def test_anthropic_message_truncation_async(sentry_init, capture_items):
     assert len(parsed_messages) == 1
     assert "small message 5" in str(parsed_messages[0])
 
-    tx = next(item.payload for item in items if item.type == "transaction")
+    if stream_gen_ai_spans:
+        tx = next(item.payload for item in items if item.type == "transaction")
+    else:
+        pass
     assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.parametrize(
     "send_default_pii, include_prompts",
     [
@@ -2590,15 +3819,21 @@ async def test_anthropic_message_truncation_async(sentry_init, capture_items):
     ],
 )
 def test_nonstreaming_create_message_with_system_prompt(
-    sentry_init, capture_items, send_default_pii, include_prompts
+    sentry_init,
+    capture_events,
+    capture_items,
+    send_default_pii,
+    include_prompts,
+    stream_gen_ai_spans,
 ):
     """Test that system prompts are properly captured in GEN_AI_REQUEST_MESSAGES."""
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
+
     client = Anthropic(api_key="z")
     client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE)
 
@@ -2609,6 +3844,11 @@ def test_nonstreaming_create_message_with_system_prompt(
         }
     ]
 
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+    else:
+        events = capture_events()
+
     with start_transaction(name="anthropic"):
         response = client.messages.create(
             max_tokens=1024,
@@ -2623,48 +3863,103 @@ def test_nonstreaming_create_message_with_system_prompt(
     assert usage.input_tokens == 10
     assert usage.output_tokens == 20
 
-    (event,) = (item.payload for item in items if item.type == "transaction")
-    assert event["transaction"] == "anthropic"
+    if stream_gen_ai_spans:
+        (event,) = (item.payload for item in items if item.type == "transaction")
+        assert event["transaction"] == "anthropic"
 
-    spans = [item.payload for item in items if item.type == "span"]
-    assert len(spans) == 1
-    (span,) = spans
+        spans = [item.payload for item in items if item.type == "span"]
+        assert len(spans) == 1
+        (span,) = spans
 
-    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-    assert span["name"] == "chat model"
-    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+        assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        assert span["name"] == "chat model"
+        assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+        assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
-    if send_default_pii and include_prompts:
-        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["attributes"]
-        system_instructions = json.loads(
-            span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
-        )
-        assert system_instructions == [
-            {"type": "text", "content": "You are a helpful assistant."}
-        ]
+        if send_default_pii and include_prompts:
+            assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["attributes"]
+            system_instructions = json.loads(
+                span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
+            )
+            assert system_instructions == [
+                {"type": "text", "content": "You are a helpful assistant."}
+            ]
 
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]
-        stored_messages = json.loads(
-            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-        )
-        assert len(stored_messages) == 1
-        assert stored_messages[0]["role"] == "user"
-        assert stored_messages[0]["content"] == "Hello, Claude"
-        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude."
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]
+            stored_messages = json.loads(
+                span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+            )
+            assert len(stored_messages) == 1
+            assert stored_messages[0]["role"] == "user"
+            assert stored_messages[0]["content"] == "Hello, Claude"
+            assert (
+                span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude."
+            )
+        else:
+            assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"]
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
+
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [
+            "end_turn"
+        ]
     else:
-        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"]
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
+        assert len(events) == 1
+        (event,) = events
+
+        assert event["type"] == "transaction"
+        assert event["transaction"] == "anthropic"
 
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
-    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False
-    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["end_turn"]
+        assert len(event["spans"]) == 1
+        (span,) = event["spans"]
 
+        assert span["op"] == OP.GEN_AI_CHAT
+        assert span["description"] == "chat model"
+        assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+        assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
+        if send_default_pii and include_prompts:
+            assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["data"]
+            system_instructions = json.loads(
+                span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
+            )
+            assert system_instructions == [
+                {"type": "text", "content": "You are a helpful assistant."}
+            ]
+
+            if stream_gen_ai_spans:
+                assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]
+                stored_messages = json.loads(
+                    span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+                )
+            else:
+                assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]
+                stored_messages = json.loads(
+                    span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+                )
+            assert len(stored_messages) == 1
+            assert stored_messages[0]["role"] == "user"
+            assert stored_messages[0]["content"] == "Hello, Claude"
+            assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude."
+        else:
+            assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"]
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+
+        assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+        assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
+        assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
+        assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False
+        assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["end_turn"]
+
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
 @pytest.mark.parametrize(
     "send_default_pii, include_prompts",
@@ -2676,15 +3971,21 @@ def test_nonstreaming_create_message_with_system_prompt(
     ],
 )
 async def test_nonstreaming_create_message_with_system_prompt_async(
-    sentry_init, capture_items, send_default_pii, include_prompts
+    sentry_init,
+    capture_events,
+    capture_items,
+    send_default_pii,
+    include_prompts,
+    stream_gen_ai_spans,
 ):
     """Test that system prompts are properly captured in GEN_AI_REQUEST_MESSAGES (async)."""
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
+
     client = AsyncAnthropic(api_key="z")
     client.messages._post = AsyncMock(return_value=EXAMPLE_MESSAGE)
 
@@ -2695,6 +3996,11 @@ async def test_nonstreaming_create_message_with_system_prompt_async(
         }
     ]
 
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+    else:
+        events = capture_events()
+
     with start_transaction(name="anthropic"):
         response = await client.messages.create(
             max_tokens=1024,
@@ -2709,48 +4015,95 @@ async def test_nonstreaming_create_message_with_system_prompt_async(
     assert usage.input_tokens == 10
     assert usage.output_tokens == 20
 
-    (event,) = (item.payload for item in items if item.type == "transaction")
-    assert event["transaction"] == "anthropic"
+    if stream_gen_ai_spans:
+        (event,) = (item.payload for item in items if item.type == "transaction")
+        assert event["transaction"] == "anthropic"
 
-    spans = [item.payload for item in items if item.type == "span"]
-    assert len(spans) == 1
-    (span,) = spans
+        spans = [item.payload for item in items if item.type == "span"]
+        assert len(spans) == 1
+        (span,) = spans
 
-    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-    assert span["name"] == "chat model"
-    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+        assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        assert span["name"] == "chat model"
+        assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+        assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
-    if send_default_pii and include_prompts:
-        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["attributes"]
-        system_instructions = json.loads(
-            span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
-        )
-        assert system_instructions == [
-            {"type": "text", "content": "You are a helpful assistant."}
-        ]
+        if send_default_pii and include_prompts:
+            assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["attributes"]
+            system_instructions = json.loads(
+                span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
+            )
+            assert system_instructions == [
+                {"type": "text", "content": "You are a helpful assistant."}
+            ]
 
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]
-        stored_messages = json.loads(
-            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-        )
-        assert len(stored_messages) == 1
-        assert stored_messages[0]["role"] == "user"
-        assert stored_messages[0]["content"] == "Hello, Claude"
-        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude."
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]
+            stored_messages = json.loads(
+                span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+            )
+            assert len(stored_messages) == 1
+            assert stored_messages[0]["role"] == "user"
+            assert stored_messages[0]["content"] == "Hello, Claude"
+            assert (
+                span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude."
+            )
+        else:
+            assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"]
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
+
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [
+            "end_turn"
+        ]
     else:
-        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"]
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
+        assert len(events) == 1
+        (event,) = events
+
+        assert event["type"] == "transaction"
+        assert event["transaction"] == "anthropic"
+
+        assert len(event["spans"]) == 1
+        (span,) = event["spans"]
 
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
-    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False
-    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["end_turn"]
+        assert span["op"] == OP.GEN_AI_CHAT
+        assert span["description"] == "chat model"
+        assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+        assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
+        if send_default_pii and include_prompts:
+            assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["data"]
+            system_instructions = json.loads(
+                span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
+            )
+            assert system_instructions == [
+                {"type": "text", "content": "You are a helpful assistant."}
+            ]
 
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]
+            stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+            assert len(stored_messages) == 1
+            assert stored_messages[0]["role"] == "user"
+            assert stored_messages[0]["content"] == "Hello, Claude"
+            assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude."
+        else:
+            assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"]
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+
+        assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+        assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
+        assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
+        assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False
+        assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["end_turn"]
+
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.parametrize(
     "send_default_pii, include_prompts",
     [
@@ -2762,11 +4115,13 @@ async def test_nonstreaming_create_message_with_system_prompt_async(
 )
 def test_streaming_create_message_with_system_prompt(
     sentry_init,
+    capture_events,
     capture_items,
     send_default_pii,
     include_prompts,
     get_model_response,
     server_side_event_chunks,
+    stream_gen_ai_spans,
 ):
     """Test that system prompts are properly captured in streaming mode."""
     client = Anthropic(api_key="z")
@@ -2812,8 +4167,8 @@ def test_streaming_create_message_with_system_prompt(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -2822,12 +4177,76 @@ def test_streaming_create_message_with_system_prompt(
         }
     ]
 
-    with mock.patch.object(
-        client._client,
-        "send",
-        return_value=response,
-    ) as _:
-        with start_transaction(name="anthropic"):
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with mock.patch.object(
+            client._client,
+            "send",
+            return_value=response,
+        ) as _, start_transaction(name="anthropic"):
+            message = client.messages.create(
+                max_tokens=1024,
+                messages=messages,
+                model="model",
+                stream=True,
+                system="You are a helpful assistant.",
+            )
+
+            for _ in message:
+                pass
+
+        (event,) = (item.payload for item in items if item.type == "transaction")
+        assert event["transaction"] == "anthropic"
+
+        spans = [item.payload for item in items if item.type == "span"]
+        assert len(spans) == 1
+        (span,) = spans
+
+        assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        assert span["name"] == "chat model"
+        assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+        assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+
+        if send_default_pii and include_prompts:
+            assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["attributes"]
+            system_instructions = json.loads(
+                span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
+            )
+            assert system_instructions == [
+                {"type": "text", "content": "You are a helpful assistant."}
+            ]
+
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]
+            stored_messages = json.loads(
+                span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+            )
+            assert len(stored_messages) == 1
+            assert stored_messages[0]["role"] == "user"
+            assert stored_messages[0]["content"] == "Hello, Claude"
+            assert (
+                span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!"
+            )
+
+        else:
+            assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"]
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
+
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+
+    else:
+        events = capture_events()
+
+        with mock.patch.object(
+            client._client,
+            "send",
+            return_value=response,
+        ) as _, start_transaction(name="anthropic"):
             message = client.messages.create(
                 max_tokens=1024,
                 messages=messages,
@@ -2839,48 +4258,49 @@ def test_streaming_create_message_with_system_prompt(
             for _ in message:
                 pass
 
-    (event,) = (item.payload for item in items if item.type == "transaction")
-    assert event["transaction"] == "anthropic"
+        assert len(events) == 1
+        (event,) = events
 
-    spans = [item.payload for item in items if item.type == "span"]
-    assert len(spans) == 1
-    (span,) = spans
+        assert event["type"] == "transaction"
+        assert event["transaction"] == "anthropic"
 
-    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-    assert span["name"] == "chat model"
-    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+        assert len(event["spans"]) == 1
+        (span,) = event["spans"]
 
-    if send_default_pii and include_prompts:
-        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["attributes"]
-        system_instructions = json.loads(
-            span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
-        )
-        assert system_instructions == [
-            {"type": "text", "content": "You are a helpful assistant."}
-        ]
+        assert span["op"] == OP.GEN_AI_CHAT
+        assert span["description"] == "chat model"
+        assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+        assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]
-        stored_messages = json.loads(
-            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-        )
-        assert len(stored_messages) == 1
-        assert stored_messages[0]["role"] == "user"
-        assert stored_messages[0]["content"] == "Hello, Claude"
-        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!"
+        if send_default_pii and include_prompts:
+            assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["data"]
+            system_instructions = json.loads(
+                span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
+            )
+            assert system_instructions == [
+                {"type": "text", "content": "You are a helpful assistant."}
+            ]
 
-    else:
-        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"]
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]
+            stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+            assert len(stored_messages) == 1
+            assert stored_messages[0]["role"] == "user"
+            assert stored_messages[0]["content"] == "Hello, Claude"
+            assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!"
 
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20
-    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+        else:
+            assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"]
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
 
+        assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+        assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
+        assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20
+        assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
 
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.parametrize(
     "send_default_pii, include_prompts",
     [
@@ -2892,11 +4312,13 @@ def test_streaming_create_message_with_system_prompt(
 )
 def test_stream_messages_with_system_prompt(
     sentry_init,
+    capture_events,
     capture_items,
     send_default_pii,
     include_prompts,
     get_model_response,
     server_side_event_chunks,
+    stream_gen_ai_spans,
 ):
     """Test that system prompts are properly captured in streaming mode."""
     client = Anthropic(api_key="z")
@@ -2942,8 +4364,8 @@ def test_stream_messages_with_system_prompt(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -2952,63 +4374,119 @@ def test_stream_messages_with_system_prompt(
         }
     ]
 
-    with mock.patch.object(
-        client._client,
-        "send",
-        return_value=response,
-    ) as _:
-        with start_transaction(name="anthropic"):
-            with client.messages.stream(
-                max_tokens=1024,
-                messages=messages,
-                model="model",
-                system="You are a helpful assistant.",
-            ) as stream:
-                for event in stream:
-                    pass
-
-    (event,) = (item.payload for item in items if item.type == "transaction")
-    assert event["transaction"] == "anthropic"
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
 
-    spans = [item.payload for item in items if item.type == "span"]
-    assert len(spans) == 1
-    (span,) = spans
+        with mock.patch.object(
+            client._client,
+            "send",
+            return_value=response,
+        ) as _, start_transaction(name="anthropic"), client.messages.stream(
+            max_tokens=1024,
+            messages=messages,
+            model="model",
+            system="You are a helpful assistant.",
+        ) as stream:
+            for event in stream:
+                pass
 
-    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-    assert span["name"] == "chat model"
-    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+        (event,) = (item.payload for item in items if item.type == "transaction")
+        assert event["transaction"] == "anthropic"
 
-    if send_default_pii and include_prompts:
-        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["attributes"]
-        system_instructions = json.loads(
-            span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
-        )
-        assert system_instructions == [
-            {"type": "text", "content": "You are a helpful assistant."}
-        ]
+        spans = [item.payload for item in items if item.type == "span"]
+        assert len(spans) == 1
+        (span,) = spans
 
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]
-        stored_messages = json.loads(
-            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-        )
-        assert len(stored_messages) == 1
-        assert stored_messages[0]["role"] == "user"
-        assert stored_messages[0]["content"] == "Hello, Claude"
-        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!"
+        assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        assert span["name"] == "chat model"
+        assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+        assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
+        if send_default_pii and include_prompts:
+            assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["attributes"]
+            system_instructions = json.loads(
+                span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
+            )
+            assert system_instructions == [
+                {"type": "text", "content": "You are a helpful assistant."}
+            ]
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]
+            stored_messages = json.loads(
+                span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+            )
+            assert len(stored_messages) == 1
+            assert stored_messages[0]["role"] == "user"
+            assert stored_messages[0]["content"] == "Hello, Claude"
+            assert (
+                span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!"
+            )
+        else:
+            assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"]
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
+
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
     else:
-        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"]
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
+        events = capture_events()
+
+        with mock.patch.object(
+            client._client,
+            "send",
+            return_value=response,
+        ) as _, start_transaction(name="anthropic"), client.messages.stream(
+            max_tokens=1024,
+            messages=messages,
+            model="model",
+            system="You are a helpful assistant.",
+        ) as stream:
+            for event in stream:
+                pass
+
+        assert len(events) == 1
+        (event,) = events
 
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20
-    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+        assert event["type"] == "transaction"
+        assert event["transaction"] == "anthropic"
 
+        assert len(event["spans"]) == 1
+        (span,) = event["spans"]
 
+        assert span["op"] == OP.GEN_AI_CHAT
+        assert span["description"] == "chat model"
+        assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+        assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+
+        if send_default_pii and include_prompts:
+            assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["data"]
+            system_instructions = json.loads(
+                span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
+            )
+            assert system_instructions == [
+                {"type": "text", "content": "You are a helpful assistant."}
+            ]
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]
+            stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+            assert len(stored_messages) == 1
+            assert stored_messages[0]["role"] == "user"
+            assert stored_messages[0]["content"] == "Hello, Claude"
+            assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!"
+        else:
+            assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"]
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+
+        assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+        assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
+        assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20
+        assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
 @pytest.mark.parametrize(
     "send_default_pii, include_prompts",
@@ -3021,12 +4499,14 @@ def test_stream_messages_with_system_prompt(
 )
 async def test_stream_message_with_system_prompt_async(
     sentry_init,
+    capture_events,
     capture_items,
     send_default_pii,
     include_prompts,
     get_model_response,
     async_iterator,
     server_side_event_chunks,
+    stream_gen_ai_spans,
 ):
     """Test that system prompts are properly captured in streaming mode (async)."""
     client = AsyncAnthropic(api_key="z")
@@ -3074,8 +4554,8 @@ async def test_stream_message_with_system_prompt_async(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -3084,63 +4564,111 @@ async def test_stream_message_with_system_prompt_async(
         }
     ]
 
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+    else:
+        events = capture_events()
+
     with mock.patch.object(
         client._client,
         "send",
         return_value=response,
-    ) as _:
-        with start_transaction(name="anthropic"):
-            async with client.messages.stream(
-                max_tokens=1024,
-                messages=messages,
-                model="model",
-                system="You are a helpful assistant.",
-            ) as stream:
-                async for event in stream:
-                    pass
-
-    (event,) = (item.payload for item in items if item.type == "transaction")
-    assert event["transaction"] == "anthropic"
+    ) as _, start_transaction(name="anthropic"):
+        async with client.messages.stream(
+            max_tokens=1024,
+            messages=messages,
+            model="model",
+            system="You are a helpful assistant.",
+        ) as stream:
+            async for event in stream:
+                pass
 
-    spans = [item.payload for item in items if item.type == "span"]
-    assert len(spans) == 1
-    (span,) = spans
+    if stream_gen_ai_spans:
+        (event,) = (item.payload for item in items if item.type == "transaction")
+        assert event["transaction"] == "anthropic"
 
-    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-    assert span["name"] == "chat model"
-    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+        spans = [item.payload for item in items if item.type == "span"]
+        assert len(spans) == 1
+        (span,) = spans
 
-    if send_default_pii and include_prompts:
-        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["attributes"]
-        system_instructions = json.loads(
-            span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
-        )
-        assert system_instructions == [
-            {"type": "text", "content": "You are a helpful assistant."}
-        ]
+        assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        assert span["name"] == "chat model"
+        assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+        assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]
-        stored_messages = json.loads(
-            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-        )
-        assert len(stored_messages) == 1
-        assert stored_messages[0]["role"] == "user"
-        assert stored_messages[0]["content"] == "Hello, Claude"
-        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!"
+        if send_default_pii and include_prompts:
+            assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["attributes"]
+            system_instructions = json.loads(
+                span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
+            )
+            assert system_instructions == [
+                {"type": "text", "content": "You are a helpful assistant."}
+            ]
 
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]
+            stored_messages = json.loads(
+                span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+            )
+            assert len(stored_messages) == 1
+            assert stored_messages[0]["role"] == "user"
+            assert stored_messages[0]["content"] == "Hello, Claude"
+            assert (
+                span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!"
+            )
+        else:
+            assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"]
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
+
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
     else:
-        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"]
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
+        assert len(events) == 1
+        (event,) = events
+
+        assert event["type"] == "transaction"
+        assert event["transaction"] == "anthropic"
+
+        assert len(event["spans"]) == 1
+        (span,) = event["spans"]
+
+        assert span["op"] == OP.GEN_AI_CHAT
+        assert span["description"] == "chat model"
+        assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+        assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20
-    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+        if send_default_pii and include_prompts:
+            assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["data"]
+            system_instructions = json.loads(
+                span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
+            )
+            assert system_instructions == [
+                {"type": "text", "content": "You are a helpful assistant."}
+            ]
+
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]
+            stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+            assert len(stored_messages) == 1
+            assert stored_messages[0]["role"] == "user"
+            assert stored_messages[0]["content"] == "Hello, Claude"
+            assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!"
+
+        else:
+            assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"]
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
 
+        assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+        assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
+        assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20
+        assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
 
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
 @pytest.mark.parametrize(
     "send_default_pii, include_prompts",
@@ -3153,12 +4681,14 @@ async def test_stream_message_with_system_prompt_async(
 )
 async def test_streaming_create_message_with_system_prompt_async(
     sentry_init,
+    capture_events,
     capture_items,
     send_default_pii,
     include_prompts,
     get_model_response,
     async_iterator,
     server_side_event_chunks,
+    stream_gen_ai_spans,
 ):
     """Test that system prompts are properly captured in streaming mode (async)."""
     client = AsyncAnthropic(api_key="z")
@@ -3206,8 +4736,8 @@ async def test_streaming_create_message_with_system_prompt_async(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -3216,12 +4746,14 @@ async def test_streaming_create_message_with_system_prompt_async(
         }
     ]
 
-    with mock.patch.object(
-        client._client,
-        "send",
-        return_value=response,
-    ) as _:
-        with start_transaction(name="anthropic"):
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with mock.patch.object(
+            client._client,
+            "send",
+            return_value=response,
+        ) as _, start_transaction(name="anthropic"):
             message = await client.messages.create(
                 max_tokens=1024,
                 messages=messages,
@@ -3232,57 +4764,126 @@ async def test_streaming_create_message_with_system_prompt_async(
 
             async for _ in message:
                 pass
+    else:
+        events = capture_events()
+
+        with mock.patch.object(
+            client._client,
+            "send",
+            return_value=response,
+        ) as _, start_transaction(name="anthropic"):
+            message = await client.messages.create(
+                max_tokens=1024,
+                messages=messages,
+                model="model",
+                stream=True,
+                system="You are a helpful assistant.",
+            )
 
-    (event,) = (item.payload for item in items if item.type == "transaction")
-    assert event["transaction"] == "anthropic"
+            async for _ in message:
+                pass
 
-    spans = [item.payload for item in items if item.type == "span"]
-    assert len(spans) == 1
-    (span,) = spans
+    if stream_gen_ai_spans:
+        (event,) = (item.payload for item in items if item.type == "transaction")
+        assert event["transaction"] == "anthropic"
 
-    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-    assert span["name"] == "chat model"
-    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+        spans = [item.payload for item in items if item.type == "span"]
+        assert len(spans) == 1
+        (span,) = spans
 
-    if send_default_pii and include_prompts:
-        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["attributes"]
-        system_instructions = json.loads(
-            span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
-        )
-        assert system_instructions == [
-            {"type": "text", "content": "You are a helpful assistant."}
-        ]
+        assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        assert span["name"] == "chat model"
+        assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+        assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]
-        stored_messages = json.loads(
-            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-        )
-        assert len(stored_messages) == 1
-        assert stored_messages[0]["role"] == "user"
-        assert stored_messages[0]["content"] == "Hello, Claude"
-        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!"
+        if send_default_pii and include_prompts:
+            assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["attributes"]
+            system_instructions = json.loads(
+                span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
+            )
+            assert system_instructions == [
+                {"type": "text", "content": "You are a helpful assistant."}
+            ]
+
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]
+            stored_messages = json.loads(
+                span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+            )
 
+            assert len(stored_messages) == 1
+            assert stored_messages[0]["role"] == "user"
+            assert stored_messages[0]["content"] == "Hello, Claude"
+            assert (
+                span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!"
+            )
+
+        else:
+            assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"]
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
+
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
     else:
-        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"]
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
+        assert len(events) == 1
+        (event,) = events
 
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20
-    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+        assert event["type"] == "transaction"
+        assert event["transaction"] == "anthropic"
 
+        assert len(event["spans"]) == 1
+        (span,) = event["spans"]
 
-def test_system_prompt_with_complex_structure(sentry_init, capture_items):
+        assert span["op"] == OP.GEN_AI_CHAT
+        assert span["description"] == "chat model"
+        assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+        assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
+
+        if send_default_pii and include_prompts:
+            assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["data"]
+            system_instructions = json.loads(
+                span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
+            )
+            assert system_instructions == [
+                {"type": "text", "content": "You are a helpful assistant."}
+            ]
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]
+            stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+            assert len(stored_messages) == 1
+            assert stored_messages[0]["role"] == "user"
+            assert stored_messages[0]["content"] == "Hello, Claude"
+            assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!"
+
+        else:
+            assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"]
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+
+        assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+        assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
+        assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20
+        assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_system_prompt_with_complex_structure(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     """Test that complex system prompt structures (list of text blocks) are properly captured."""
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
+
     client = Anthropic(api_key="z")
     client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE)
 
@@ -3299,34 +4900,72 @@ def test_system_prompt_with_complex_structure(sentry_init, capture_items):
         }
     ]
 
-    with start_transaction(name="anthropic"):
-        response = client.messages.create(
-            max_tokens=1024, messages=messages, model="model", system=system_prompt
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with start_transaction(name="anthropic"):
+            response = client.messages.create(
+                max_tokens=1024, messages=messages, model="model", system=system_prompt
+            )
+
+        assert response == EXAMPLE_MESSAGE
+
+        spans = [item.payload for item in items if item.type == "span"]
+        assert len(spans) == 1
+        (span,) = spans
+
+        assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+        assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+
+        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["attributes"]
+        system_instructions = json.loads(
+            span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
         )
 
-    assert response == EXAMPLE_MESSAGE
+        # System content should be a list of text blocks
+        assert isinstance(system_instructions, list)
+        assert system_instructions == [
+            {"type": "text", "content": "You are a helpful assistant."},
+            {"type": "text", "content": "Be concise and clear."},
+        ]
+
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]
+        stored_messages = json.loads(
+            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        )
+    else:
+        events = capture_events()
 
-    spans = [item.payload for item in items if item.type == "span"]
-    assert len(spans) == 1
-    (span,) = spans
+        with start_transaction(name="anthropic"):
+            response = client.messages.create(
+                max_tokens=1024, messages=messages, model="model", system=system_prompt
+            )
 
-    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+        assert response == EXAMPLE_MESSAGE
 
-    assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["attributes"]
-    system_instructions = json.loads(
-        span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
-    )
+        assert len(events) == 1
+        (event,) = events
 
-    # System content should be a list of text blocks
-    assert isinstance(system_instructions, list)
-    assert system_instructions == [
-        {"type": "text", "content": "You are a helpful assistant."},
-        {"type": "text", "content": "Be concise and clear."},
-    ]
+        assert len(event["spans"]) == 1
+        (span,) = event["spans"]
 
-    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]
-    stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+        assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+        assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+
+        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["data"]
+        system_instructions = json.loads(
+            span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
+        )
+
+        # System content should be a list of text blocks
+        assert isinstance(system_instructions, list)
+        assert system_instructions == [
+            {"type": "text", "content": "You are a helpful assistant."},
+            {"type": "text", "content": "Be concise and clear."},
+        ]
+
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]
+        stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
 
     assert len(stored_messages) == 1
     assert stored_messages[0]["role"] == "user"
@@ -3528,14 +5167,21 @@ def test_transform_message_content_list_anthropic():
 # Integration tests for binary data in messages
 
 
-def test_message_with_base64_image(sentry_init, capture_items):
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_message_with_base64_image(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     """Test that messages with base64 images are properly captured."""
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
+
     client = Anthropic(api_key="z")
     client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE)
 
@@ -3556,14 +5202,31 @@ def test_message_with_base64_image(sentry_init, capture_items):
         }
     ]
 
-    with start_transaction(name="anthropic"):
-        client.messages.create(max_tokens=1024, messages=messages, model="model")
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with start_transaction(name="anthropic"):
+            client.messages.create(max_tokens=1024, messages=messages, model="model")
+
+        spans = [item.payload for item in items if item.type == "span"]
+        (span,) = spans
+
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]
+        stored_messages = json.loads(
+            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        )
+    else:
+        events = capture_events()
+
+        with start_transaction(name="anthropic"):
+            client.messages.create(max_tokens=1024, messages=messages, model="model")
 
-    spans = [item.payload for item in items if item.type == "span"]
-    (span,) = spans
+        assert len(events) == 1
+        (event,) = events
+        (span,) = event["spans"]
 
-    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]
-    stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]
+        stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
 
     assert len(stored_messages) == 1
     assert stored_messages[0]["role"] == "user"
@@ -3578,14 +5241,21 @@ def test_message_with_base64_image(sentry_init, capture_items):
     }
 
 
-def test_message_with_url_image(sentry_init, capture_items):
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_message_with_url_image(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     """Test that messages with URL-referenced images are properly captured."""
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
+
     client = Anthropic(api_key="z")
     client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE)
 
@@ -3605,13 +5275,30 @@ def test_message_with_url_image(sentry_init, capture_items):
         }
     ]
 
-    with start_transaction(name="anthropic"):
-        client.messages.create(max_tokens=1024, messages=messages, model="model")
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with start_transaction(name="anthropic"):
+            client.messages.create(max_tokens=1024, messages=messages, model="model")
+
+        spans = [item.payload for item in items if item.type == "span"]
+        (span,) = spans
+
+        stored_messages = json.loads(
+            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        )
+    else:
+        events = capture_events()
 
-    spans = [item.payload for item in items if item.type == "span"]
-    (span,) = spans
+        with start_transaction(name="anthropic"):
+            client.messages.create(max_tokens=1024, messages=messages, model="model")
+
+        assert len(events) == 1
+        (event,) = events
+        (span,) = event["spans"]
+
+        stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
 
-    stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     content = stored_messages[0]["content"]
     assert content[1] == {
         "type": "uri",
@@ -3621,14 +5308,21 @@ def test_message_with_url_image(sentry_init, capture_items):
     }
 
 
-def test_message_with_file_image(sentry_init, capture_items):
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_message_with_file_image(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     """Test that messages with file_id-referenced images are properly captured."""
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
+
     client = Anthropic(api_key="z")
     client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE)
 
@@ -3649,13 +5343,30 @@ def test_message_with_file_image(sentry_init, capture_items):
         }
     ]
 
-    with start_transaction(name="anthropic"):
-        client.messages.create(max_tokens=1024, messages=messages, model="model")
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with start_transaction(name="anthropic"):
+            client.messages.create(max_tokens=1024, messages=messages, model="model")
+
+        spans = [item.payload for item in items if item.type == "span"]
+        (span,) = spans
+
+        stored_messages = json.loads(
+            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        )
+    else:
+        events = capture_events()
 
-    spans = [item.payload for item in items if item.type == "span"]
-    (span,) = spans
+        with start_transaction(name="anthropic"):
+            client.messages.create(max_tokens=1024, messages=messages, model="model")
+
+        assert len(events) == 1
+        (event,) = events
+        (span,) = event["spans"]
+
+        stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
 
-    stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     content = stored_messages[0]["content"]
     assert content[1] == {
         "type": "file",
@@ -3665,14 +5376,21 @@ def test_message_with_file_image(sentry_init, capture_items):
     }
 
 
-def test_message_with_base64_pdf(sentry_init, capture_items):
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_message_with_base64_pdf(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     """Test that messages with base64-encoded PDF documents are properly captured."""
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
+
     client = Anthropic(api_key="z")
     client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE)
 
@@ -3693,13 +5411,30 @@ def test_message_with_base64_pdf(sentry_init, capture_items):
         }
     ]
 
-    with start_transaction(name="anthropic"):
-        client.messages.create(max_tokens=1024, messages=messages, model="model")
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with start_transaction(name="anthropic"):
+            client.messages.create(max_tokens=1024, messages=messages, model="model")
+
+        spans = [item.payload for item in items if item.type == "span"]
+        (span,) = spans
+
+        stored_messages = json.loads(
+            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        )
+    else:
+        events = capture_events()
 
-    spans = [item.payload for item in items if item.type == "span"]
-    (span,) = spans
+        with start_transaction(name="anthropic"):
+            client.messages.create(max_tokens=1024, messages=messages, model="model")
+
+        assert len(events) == 1
+        (event,) = events
+        (span,) = event["spans"]
+
+        stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
 
-    stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     content = stored_messages[0]["content"]
     assert content[1] == {
         "type": "blob",
@@ -3709,14 +5444,21 @@ def test_message_with_base64_pdf(sentry_init, capture_items):
     }
 
 
-def test_message_with_url_pdf(sentry_init, capture_items):
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_message_with_url_pdf(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     """Test that messages with URL-referenced PDF documents are properly captured."""
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
+
     client = Anthropic(api_key="z")
     client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE)
 
@@ -3736,13 +5478,30 @@ def test_message_with_url_pdf(sentry_init, capture_items):
         }
     ]
 
-    with start_transaction(name="anthropic"):
-        client.messages.create(max_tokens=1024, messages=messages, model="model")
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with start_transaction(name="anthropic"):
+            client.messages.create(max_tokens=1024, messages=messages, model="model")
+
+        spans = [item.payload for item in items if item.type == "span"]
+        (span,) = spans
+
+        stored_messages = json.loads(
+            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        )
+    else:
+        events = capture_events()
 
-    spans = [item.payload for item in items if item.type == "span"]
-    (span,) = spans
+        with start_transaction(name="anthropic"):
+            client.messages.create(max_tokens=1024, messages=messages, model="model")
+
+        assert len(events) == 1
+        (event,) = events
+        (span,) = event["spans"]
+
+        stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
 
-    stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     content = stored_messages[0]["content"]
     assert content[1] == {
         "type": "uri",
@@ -3752,14 +5511,21 @@ def test_message_with_url_pdf(sentry_init, capture_items):
     }
 
 
-def test_message_with_file_document(sentry_init, capture_items):
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_message_with_file_document(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     """Test that messages with file_id-referenced documents are properly captured."""
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
+
     client = Anthropic(api_key="z")
     client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE)
 
@@ -3780,13 +5546,30 @@ def test_message_with_file_document(sentry_init, capture_items):
         }
     ]
 
-    with start_transaction(name="anthropic"):
-        client.messages.create(max_tokens=1024, messages=messages, model="model")
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with start_transaction(name="anthropic"):
+            client.messages.create(max_tokens=1024, messages=messages, model="model")
+
+        spans = [item.payload for item in items if item.type == "span"]
+        (span,) = spans
+
+        stored_messages = json.loads(
+            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        )
+    else:
+        events = capture_events()
 
-    spans = [item.payload for item in items if item.type == "span"]
-    (span,) = spans
+        with start_transaction(name="anthropic"):
+            client.messages.create(max_tokens=1024, messages=messages, model="model")
+
+        assert len(events) == 1
+        (event,) = events
+        (span,) = event["spans"]
+
+        stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
 
-    stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     content = stored_messages[0]["content"]
     assert content[1] == {
         "type": "file",
@@ -3796,14 +5579,21 @@ def test_message_with_file_document(sentry_init, capture_items):
     }
 
 
-def test_message_with_mixed_content(sentry_init, capture_items):
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_message_with_mixed_content(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     """Test that messages with mixed content (text, images, documents) are properly captured."""
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
+
     client = Anthropic(api_key="z")
     client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE)
 
@@ -3840,13 +5630,30 @@ def test_message_with_mixed_content(sentry_init, capture_items):
         }
     ]
 
-    with start_transaction(name="anthropic"):
-        client.messages.create(max_tokens=1024, messages=messages, model="model")
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with start_transaction(name="anthropic"):
+            client.messages.create(max_tokens=1024, messages=messages, model="model")
+
+        spans = [item.payload for item in items if item.type == "span"]
+        (span,) = spans
+
+        stored_messages = json.loads(
+            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        )
+    else:
+        events = capture_events()
 
-    spans = [item.payload for item in items if item.type == "span"]
-    (span,) = spans
+        with start_transaction(name="anthropic"):
+            client.messages.create(max_tokens=1024, messages=messages, model="model")
+
+        assert len(events) == 1
+        (event,) = events
+        (span,) = event["spans"]
+
+        stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
 
-    stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     content = stored_messages[0]["content"]
 
     assert len(content) == 5
@@ -3878,14 +5685,21 @@ def test_message_with_mixed_content(sentry_init, capture_items):
     }
 
 
-def test_message_with_multiple_images_different_formats(sentry_init, capture_items):
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_message_with_multiple_images_different_formats(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     """Test that messages with multiple images of different source types are handled."""
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
+
     client = Anthropic(api_key="z")
     client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE)
 
@@ -3921,13 +5735,30 @@ def test_message_with_multiple_images_different_formats(sentry_init, capture_ite
         }
     ]
 
-    with start_transaction(name="anthropic"):
-        client.messages.create(max_tokens=1024, messages=messages, model="model")
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with start_transaction(name="anthropic"):
+            client.messages.create(max_tokens=1024, messages=messages, model="model")
+
+        spans = [item.payload for item in items if item.type == "span"]
+        (span,) = spans
+
+        stored_messages = json.loads(
+            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        )
+    else:
+        events = capture_events()
 
-    spans = [item.payload for item in items if item.type == "span"]
-    (span,) = spans
+        with start_transaction(name="anthropic"):
+            client.messages.create(max_tokens=1024, messages=messages, model="model")
+
+        assert len(events) == 1
+        (event,) = events
+        (span,) = event["spans"]
+
+        stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
 
-    stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     content = stored_messages[0]["content"]
 
     assert len(content) == 4
@@ -3952,14 +5783,21 @@ def test_message_with_multiple_images_different_formats(sentry_init, capture_ite
     assert content[3] == {"type": "text", "text": "Compare these three images."}
 
 
-def test_binary_content_not_stored_when_pii_disabled(sentry_init, capture_items):
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_binary_content_not_stored_when_pii_disabled(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     """Test that binary content is not stored when send_default_pii is False."""
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=False,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
+
     client = Anthropic(api_key="z")
     client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE)
 
@@ -3980,24 +5818,46 @@ def test_binary_content_not_stored_when_pii_disabled(sentry_init, capture_items)
         }
     ]
 
-    with start_transaction(name="anthropic"):
-        client.messages.create(max_tokens=1024, messages=messages, model="model")
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with start_transaction(name="anthropic"):
+            client.messages.create(max_tokens=1024, messages=messages, model="model")
+
+        spans = [item.payload for item in items if item.type == "span"]
+        (span,) = spans
+
+        # Messages should not be stored
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+    else:
+        events = capture_events()
+
+        with start_transaction(name="anthropic"):
+            client.messages.create(max_tokens=1024, messages=messages, model="model")
 
-    spans = [item.payload for item in items if item.type == "span"]
-    (span,) = spans
+        assert len(events) == 1
+        (event,) = events
+        (span,) = event["spans"]
 
-    # Messages should not be stored
-    assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+        # Messages should not be stored
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
 
 
-def test_binary_content_not_stored_when_prompts_disabled(sentry_init, capture_items):
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_binary_content_not_stored_when_prompts_disabled(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     """Test that binary content is not stored when include_prompts is False."""
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
+
     client = Anthropic(api_key="z")
     client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE)
 
@@ -4018,23 +5878,45 @@ def test_binary_content_not_stored_when_prompts_disabled(sentry_init, capture_it
         }
     ]
 
-    with start_transaction(name="anthropic"):
-        client.messages.create(max_tokens=1024, messages=messages, model="model")
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with start_transaction(name="anthropic"):
+            client.messages.create(max_tokens=1024, messages=messages, model="model")
+
+        spans = [item.payload for item in items if item.type == "span"]
+        (span,) = spans
+
+        # Messages should not be stored
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+    else:
+        events = capture_events()
+
+        with start_transaction(name="anthropic"):
+            client.messages.create(max_tokens=1024, messages=messages, model="model")
 
-    spans = [item.payload for item in items if item.type == "span"]
-    (span,) = spans
+        assert len(events) == 1
+        (event,) = events
+        (span,) = event["spans"]
 
-    # Messages should not be stored
-    assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+        # Messages should not be stored
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
 
 
-def test_cache_tokens_nonstreaming(sentry_init, capture_items):
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_cache_tokens_nonstreaming(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     """Test cache read/write tokens are tracked for non-streaming responses."""
     sentry_init(
         integrations=[AnthropicIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
+
     client = Anthropic(api_key="z")
 
     client.messages._post = mock.Mock(
@@ -4053,23 +5935,49 @@ def test_cache_tokens_nonstreaming(sentry_init, capture_items):
         )
     )
 
-    with start_transaction(name="anthropic"):
-        client.messages.create(
-            max_tokens=1024,
-            messages=[{"role": "user", "content": "Hello"}],
-            model="claude-3-5-sonnet-20241022",
-        )
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with start_transaction(name="anthropic"):
+            client.messages.create(
+                max_tokens=1024,
+                messages=[{"role": "user", "content": "Hello"}],
+                model="claude-3-5-sonnet-20241022",
+            )
+
+        (span,) = (item.payload for item in items if item.type == "span")
+        # input_tokens normalized: 100 + 80 (cache_read) + 20 (cache_write) = 200
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 200
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 50
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 250
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20
+    else:
+        events = capture_events()
+
+        with start_transaction(name="anthropic"):
+            client.messages.create(
+                max_tokens=1024,
+                messages=[{"role": "user", "content": "Hello"}],
+                model="claude-3-5-sonnet-20241022",
+            )
 
-    (span,) = (item.payload for item in items if item.type == "span")
-    # input_tokens normalized: 100 + 80 (cache_read) + 20 (cache_write) = 200
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 200
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 50
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 250
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20
+        (span,) = events[0]["spans"]
+        # input_tokens normalized: 100 + 80 (cache_read) + 20 (cache_write) = 200
+        assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 200
+        assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 50
+        assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 250
+        assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80
+        assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20
 
 
-def test_input_tokens_include_cache_write_nonstreaming(sentry_init, capture_items):
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_input_tokens_include_cache_write_nonstreaming(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     """
     Test that gen_ai.usage.input_tokens includes cache_write tokens (non-streaming).
 
@@ -4084,8 +5992,9 @@ def test_input_tokens_include_cache_write_nonstreaming(sentry_init, capture_item
     sentry_init(
         integrations=[AnthropicIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
+
     client = Anthropic(api_key="z")
 
     client.messages._post = mock.Mock(
@@ -4104,23 +6013,53 @@ def test_input_tokens_include_cache_write_nonstreaming(sentry_init, capture_item
         )
     )
 
-    with start_transaction(name="anthropic"):
-        client.messages.create(
-            max_tokens=1024,
-            messages=[{"role": "user", "content": "What is 3+3?"}],
-            model="claude-sonnet-4-20250514",
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with start_transaction(name="anthropic"):
+            client.messages.create(
+                max_tokens=1024,
+                messages=[{"role": "user", "content": "What is 3+3?"}],
+                model="claude-sonnet-4-20250514",
+            )
+
+        (span,) = (item.payload for item in items if item.type == "span")
+
+        # input_tokens should be total: 19 (non-cached) + 2846 (cache_write) = 2865
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865
+        assert (
+            span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 2879
+        )  # 2865 + 14
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 0
+        assert (
+            span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 2846
         )
+    else:
+        events = capture_events()
+
+        with start_transaction(name="anthropic"):
+            client.messages.create(
+                max_tokens=1024,
+                messages=[{"role": "user", "content": "What is 3+3?"}],
+                model="claude-sonnet-4-20250514",
+            )
 
-    (span,) = (item.payload for item in items if item.type == "span")
+        (span,) = events[0]["spans"]
 
-    # input_tokens should be total: 19 (non-cached) + 2846 (cache_write) = 2865
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 2879  # 2865 + 14
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 0
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 2846
+        # input_tokens should be total: 19 (non-cached) + 2846 (cache_write) = 2865
+        assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865
+        assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 2879  # 2865 + 14
+        assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 0
+        assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 2846
 
 
-def test_input_tokens_include_cache_read_nonstreaming(sentry_init, capture_items):
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_input_tokens_include_cache_read_nonstreaming(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     """
     Test that gen_ai.usage.input_tokens includes cache_read tokens (non-streaming).
 
@@ -4135,8 +6074,9 @@ def test_input_tokens_include_cache_read_nonstreaming(sentry_init, capture_items
     sentry_init(
         integrations=[AnthropicIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
+
     client = Anthropic(api_key="z")
 
     client.messages._post = mock.Mock(
@@ -4155,27 +6095,52 @@ def test_input_tokens_include_cache_read_nonstreaming(sentry_init, capture_items
         )
     )
 
-    with start_transaction(name="anthropic"):
-        client.messages.create(
-            max_tokens=1024,
-            messages=[{"role": "user", "content": "What is 5+5?"}],
-            model="claude-sonnet-4-20250514",
-        )
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with start_transaction(name="anthropic"):
+            client.messages.create(
+                max_tokens=1024,
+                messages=[{"role": "user", "content": "What is 5+5?"}],
+                model="claude-sonnet-4-20250514",
+            )
+
+        (span,) = [item.payload for item in items if item.type == "span"]
+
+        # input_tokens should be total: 19 (non-cached) + 2846 (cache_read) = 2865
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865
+        assert (
+            span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 2879
+        )  # 2865 + 14
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 2846
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 0
+    else:
+        events = capture_events()
+
+        with start_transaction(name="anthropic"):
+            client.messages.create(
+                max_tokens=1024,
+                messages=[{"role": "user", "content": "What is 5+5?"}],
+                model="claude-sonnet-4-20250514",
+            )
 
-    (span,) = [item.payload for item in items if item.type == "span"]
+        (span,) = events[0]["spans"]
 
-    # input_tokens should be total: 19 (non-cached) + 2846 (cache_read) = 2865
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 2879  # 2865 + 14
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 2846
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 0
+        # input_tokens should be total: 19 (non-cached) + 2846 (cache_read) = 2865
+        assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865
+        assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 2879  # 2865 + 14
+        assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 2846
+        assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 0
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 def test_input_tokens_include_cache_read_streaming(
     sentry_init,
+    capture_events,
     capture_items,
     get_model_response,
     server_side_event_chunks,
+    stream_gen_ai_spans,
 ):
     """
     Test that gen_ai.usage.input_tokens includes cache_read tokens (streaming).
@@ -4215,15 +6180,42 @@ def test_input_tokens_include_cache_read_streaming(
     sentry_init(
         integrations=[AnthropicIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
-    with mock.patch.object(
-        client._client,
-        "send",
-        return_value=response,
-    ) as _:
-        with start_transaction(name="anthropic"):
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with mock.patch.object(
+            client._client,
+            "send",
+            return_value=response,
+        ) as _, start_transaction(name="anthropic"):
+            for _ in client.messages.create(
+                max_tokens=1024,
+                messages=[{"role": "user", "content": "What is 5+5?"}],
+                model="claude-sonnet-4-20250514",
+                stream=True,
+            ):
+                pass
+
+        (span,) = (item.payload for item in items if item.type == "span")
+
+        # input_tokens should be total: 19 + 2846 = test_stream_messages_input_tokens_include_cache_read_streaming
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865
+        assert (
+            span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 2879
+        )  # 2865 + 14
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 2846
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 0
+    else:
+        events = capture_events()
+
+        with mock.patch.object(
+            client._client,
+            "send",
+            return_value=response,
+        ) as _, start_transaction(name="anthropic"):
             for _ in client.messages.create(
                 max_tokens=1024,
                 messages=[{"role": "user", "content": "What is 5+5?"}],
@@ -4232,20 +6224,23 @@ def test_input_tokens_include_cache_read_streaming(
             ):
                 pass
 
-    (span,) = (item.payload for item in items if item.type == "span")
+        (span,) = events[0]["spans"]
 
-    # input_tokens should be total: 19 + 2846 = test_stream_messages_input_tokens_include_cache_read_streaming
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 2879  # 2865 + 14
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 2846
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 0
+        # input_tokens should be total: 19 + 2846 = test_stream_messages_input_tokens_include_cache_read_streaming
+        assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865
+        assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 2879  # 2865 + 14
+        assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 2846
+        assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 0
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 def test_stream_messages_input_tokens_include_cache_read_streaming(
     sentry_init,
+    capture_events,
     capture_items,
     get_model_response,
     server_side_event_chunks,
+    stream_gen_ai_spans,
 ):
     """
     Test that gen_ai.usage.input_tokens includes cache_read tokens (streaming).
@@ -4284,33 +6279,64 @@ def test_stream_messages_input_tokens_include_cache_read_streaming(
     sentry_init(
         integrations=[AnthropicIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
-    with mock.patch.object(
-        client._client,
-        "send",
-        return_value=response,
-    ) as _:
-        with start_transaction(name="anthropic"):
-            with client.messages.stream(
-                max_tokens=1024,
-                messages=[{"role": "user", "content": "What is 5+5?"}],
-                model="claude-sonnet-4-20250514",
-            ) as stream:
-                for event in stream:
-                    pass
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with mock.patch.object(
+            client._client,
+            "send",
+            return_value=response,
+        ) as _, start_transaction(name="anthropic"), client.messages.stream(
+            max_tokens=1024,
+            messages=[{"role": "user", "content": "What is 5+5?"}],
+            model="claude-sonnet-4-20250514",
+        ) as stream:
+            for event in stream:
+                pass
+
+        (span,) = (item.payload for item in items if item.type == "span")
+
+        # input_tokens should be total: 19 + 2846 = 2865
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865
+        assert (
+            span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 2879
+        )  # 2865 + 14
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 2846
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 0
+    else:
+        events = capture_events()
+
+        with mock.patch.object(
+            client._client,
+            "send",
+            return_value=response,
+        ) as _, start_transaction(name="anthropic"), client.messages.stream(
+            max_tokens=1024,
+            messages=[{"role": "user", "content": "What is 5+5?"}],
+            model="claude-sonnet-4-20250514",
+        ) as stream:
+            for event in stream:
+                pass
 
-    (span,) = (item.payload for item in items if item.type == "span")
+        (span,) = events[0]["spans"]
 
-    # input_tokens should be total: 19 + 2846 = 2865
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 2879  # 2865 + 14
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 2846
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 0
+        # input_tokens should be total: 19 + 2846 = 2865
+        assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865
+        assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 2879  # 2865 + 14
+        assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 2846
+        assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 0
 
 
-def test_input_tokens_unchanged_without_caching(sentry_init, capture_items):
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_input_tokens_unchanged_without_caching(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     """
     Test that input_tokens is unchanged when there are no cached tokens.
 
@@ -4320,8 +6346,9 @@ def test_input_tokens_unchanged_without_caching(sentry_init, capture_items):
     sentry_init(
         integrations=[AnthropicIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
+
     client = Anthropic(api_key="z")
 
     client.messages._post = mock.Mock(
@@ -4338,24 +6365,44 @@ def test_input_tokens_unchanged_without_caching(sentry_init, capture_items):
         )
     )
 
-    with start_transaction(name="anthropic"):
-        client.messages.create(
-            max_tokens=1024,
-            messages=[{"role": "user", "content": "What is 2+2?"}],
-            model="claude-sonnet-4-20250514",
-        )
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with start_transaction(name="anthropic"):
+            client.messages.create(
+                max_tokens=1024,
+                messages=[{"role": "user", "content": "What is 2+2?"}],
+                model="claude-sonnet-4-20250514",
+            )
+
+        (span,) = (item.payload for item in items if item.type == "span")
+
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 20
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 32  # 20 + 12
+    else:
+        events = capture_events()
 
-    (span,) = (item.payload for item in items if item.type == "span")
+        with start_transaction(name="anthropic"):
+            client.messages.create(
+                max_tokens=1024,
+                messages=[{"role": "user", "content": "What is 2+2?"}],
+                model="claude-sonnet-4-20250514",
+            )
+
+        (span,) = events[0]["spans"]
 
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 20
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 32  # 20 + 12
+        assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 20
+        assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 32  # 20 + 12
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 def test_cache_tokens_streaming(
     sentry_init,
+    capture_events,
     capture_items,
     get_model_response,
     server_side_event_chunks,
+    stream_gen_ai_spans,
 ):
     """Test cache tokens are tracked for streaming responses."""
     client = Anthropic(api_key="z")
@@ -4391,15 +6438,40 @@ def test_cache_tokens_streaming(
     sentry_init(
         integrations=[AnthropicIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
-    with mock.patch.object(
-        client._client,
-        "send",
-        return_value=response,
-    ) as _:
-        with start_transaction(name="anthropic"):
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with mock.patch.object(
+            client._client,
+            "send",
+            return_value=response,
+        ) as _, start_transaction(name="anthropic"):
+            for _ in client.messages.create(
+                max_tokens=1024,
+                messages=[{"role": "user", "content": "Hello"}],
+                model="claude-3-5-sonnet-20241022",
+                stream=True,
+            ):
+                pass
+
+        (span,) = (item.payload for item in items if item.type == "span")
+        # input_tokens normalized: 100 + 80 (cache_read) + 20 (cache_write) = 200
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 200
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 210
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20
+    else:
+        events = capture_events()
+
+        with mock.patch.object(
+            client._client,
+            "send",
+            return_value=response,
+        ) as _, start_transaction(name="anthropic"):
             for _ in client.messages.create(
                 max_tokens=1024,
                 messages=[{"role": "user", "content": "Hello"}],
@@ -4408,17 +6480,23 @@ def test_cache_tokens_streaming(
             ):
                 pass
 
-    (span,) = (item.payload for item in items if item.type == "span")
-    # input_tokens normalized: 100 + 80 (cache_read) + 20 (cache_write) = 200
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 200
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 210
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20
+        (span,) = events[0]["spans"]
+        # input_tokens normalized: 100 + 80 (cache_read) + 20 (cache_write) = 200
+        assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 200
+        assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
+        assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 210
+        assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80
+        assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 def test_stream_messages_cache_tokens(
-    sentry_init, capture_items, get_model_response, server_side_event_chunks
+    sentry_init,
+    capture_events,
+    capture_items,
+    get_model_response,
+    server_side_event_chunks,
+    stream_gen_ai_spans,
 ):
     """Test cache tokens are tracked for streaming responses."""
     client = Anthropic(api_key="z")
@@ -4454,27 +6532,50 @@ def test_stream_messages_cache_tokens(
     sentry_init(
         integrations=[AnthropicIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
-    with mock.patch.object(
-        client._client,
-        "send",
-        return_value=response,
-    ) as _:
-        with start_transaction(name="anthropic"):
-            with client.messages.stream(
-                max_tokens=1024,
-                messages=[{"role": "user", "content": "Hello"}],
-                model="claude-3-5-sonnet-20241022",
-            ) as stream:
-                for event in stream:
-                    pass
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with mock.patch.object(
+            client._client,
+            "send",
+            return_value=response,
+        ) as _, start_transaction(name="anthropic"), client.messages.stream(
+            max_tokens=1024,
+            messages=[{"role": "user", "content": "Hello"}],
+            model="claude-3-5-sonnet-20241022",
+        ) as stream:
+            for event in stream:
+                pass
+
+        (span,) = (item.payload for item in items if item.type == "span")
+        # input_tokens normalized: 100 + 80 (cache_read) + 20 (cache_write) = 200
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 200
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 210
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20
+    else:
+        events = capture_events()
+
+        with mock.patch.object(
+            client._client,
+            "send",
+            return_value=response,
+        ) as _, start_transaction(name="anthropic"), client.messages.stream(
+            max_tokens=1024,
+            messages=[{"role": "user", "content": "Hello"}],
+            model="claude-3-5-sonnet-20241022",
+        ) as stream:
+            for event in stream:
+                pass
 
-    (span,) = (item.payload for item in items if item.type == "span")
-    # input_tokens normalized: 100 + 80 (cache_read) + 20 (cache_write) = 200
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 200
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 210
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20
+        (span,) = events[0]["spans"]
+        # input_tokens normalized: 100 + 80 (cache_read) + 20 (cache_write) = 200
+        assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 200
+        assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
+        assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 210
+        assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80
+        assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20
diff --git a/tests/integrations/google_genai/test_google_genai.py b/tests/integrations/google_genai/test_google_genai.py
index 3974041314..3cc4b42bb2 100644
--- a/tests/integrations/google_genai/test_google_genai.py
+++ b/tests/integrations/google_genai/test_google_genai.py
@@ -114,6 +114,7 @@ def create_test_config(
     return genai_types.GenerateContentConfig(**config_dict)
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.parametrize(
     "send_default_pii, include_prompts",
     [
@@ -124,61 +125,123 @@ def create_test_config(
     ],
 )
 def test_nonstreaming_generate_content(
-    sentry_init, capture_items, send_default_pii, include_prompts, mock_genai_client
+    sentry_init,
+    capture_events,
+    capture_items,
+    send_default_pii,
+    include_prompts,
+    mock_genai_client,
+    stream_gen_ai_spans,
 ):
     sentry_init(
         integrations=[GoogleGenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     # Mock the HTTP response at the _api_client.request() level
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
 
-    with mock.patch.object(
-        mock_genai_client._api_client,
-        "request",
-        return_value=mock_http_response,
-    ):
-        with start_transaction(name="google_genai"):
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with mock.patch.object(
+            mock_genai_client._api_client,
+            "request",
+            return_value=mock_http_response,
+        ), start_transaction(name="google_genai"):
             config = create_test_config(temperature=0.7, max_output_tokens=100)
             mock_genai_client.models.generate_content(
                 model="gemini-1.5-flash", contents="Tell me a joke", config=config
             )
 
-    (event,) = (item.payload for item in items if item.type == "transaction")
-    assert event["transaction"] == "google_genai"
+        (event,) = (item.payload for item in items if item.type == "transaction")
+        assert event["transaction"] == "google_genai"
 
-    spans = [item.payload for item in items if item.type == "span"]
-    assert len(spans) == 1
-    chat_span = next(item.payload for item in items if item.type == "span")
+        spans = [item.payload for item in items if item.type == "span"]
+        assert len(spans) == 1
+        chat_span = next(item.payload for item in items if item.type == "span")
 
-    # Check chat span
-    assert chat_span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-    assert chat_span["name"] == "chat gemini-1.5-flash"
-    assert chat_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-    assert chat_span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "gcp.gemini"
-    assert chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "gemini-1.5-flash"
+        # Check chat span
+        assert chat_span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        assert chat_span["name"] == "chat gemini-1.5-flash"
+        assert chat_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+        assert chat_span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "gcp.gemini"
+        assert (
+            chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "gemini-1.5-flash"
+        )
 
-    if send_default_pii and include_prompts:
-        # Response text is stored as a JSON array
-        response_text = chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
-        # Parse the JSON array
-        response_texts = json.loads(response_text)
-        assert response_texts == ["Hello! How can I help you today?"]
+        if send_default_pii and include_prompts:
+            # Response text is stored as a JSON array
+            response_text = chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+
+            # Parse the JSON array
+            response_texts = json.loads(response_text)
+            assert response_texts == ["Hello! How can I help you today?"]
+        else:
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_span["attributes"]
+
+        # Check token usage
+        assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+        # Output tokens now include reasoning tokens: candidates_token_count (20) + thoughts_token_count (3) = 23
+        assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 23
+        assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
+        assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 5
+        assert (
+            chat_span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS_REASONING] == 3
+        )
     else:
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_span["attributes"]
-
-    # Check token usage
-    assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    # Output tokens now include reasoning tokens: candidates_token_count (20) + thoughts_token_count (3) = 23
-    assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 23
-    assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
-    assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 5
-    assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS_REASONING] == 3
+        events = capture_events()
 
+        with mock.patch.object(
+            mock_genai_client._api_client,
+            "request",
+            return_value=mock_http_response,
+        ), start_transaction(name="google_genai"):
+            config = create_test_config(temperature=0.7, max_output_tokens=100)
+            mock_genai_client.models.generate_content(
+                model="gemini-1.5-flash", contents="Tell me a joke", config=config
+            )
 
+        assert len(events) == 1
+        (event,) = events
+
+        assert event["type"] == "transaction"
+        assert event["transaction"] == "google_genai"
+
+        assert len(event["spans"]) == 1
+        chat_span = event["spans"][0]
+
+        # Check chat span
+        assert chat_span["op"] == OP.GEN_AI_CHAT
+        assert chat_span["description"] == "chat gemini-1.5-flash"
+        assert chat_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+        assert chat_span["data"][SPANDATA.GEN_AI_SYSTEM] == "gcp.gemini"
+        assert chat_span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "gemini-1.5-flash"
+
+        if send_default_pii and include_prompts:
+            # Response text is stored as a JSON array
+            if stream_gen_ai_spans:
+                response_text = chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+            else:
+                response_text = chat_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+            # Parse the JSON array
+            response_texts = json.loads(response_text)
+            assert response_texts == ["Hello! How can I help you today?"]
+        else:
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_span["data"]
+
+        # Check token usage
+        assert chat_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+        # Output tokens now include reasoning tokens: candidates_token_count (20) + thoughts_token_count (3) = 23
+        assert chat_span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 23
+        assert chat_span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
+        assert chat_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 5
+        assert chat_span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS_REASONING] == 3
+
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.parametrize("generate_content_config", (False, True))
 @pytest.mark.parametrize(
     "system_instructions,expected_texts",
@@ -209,25 +272,29 @@ def test_nonstreaming_generate_content(
 )
 def test_generate_content_with_system_instruction(
     sentry_init,
+    capture_events,
     capture_items,
     mock_genai_client,
     generate_content_config,
     system_instructions,
     expected_texts,
+    stream_gen_ai_spans,
 ):
     sentry_init(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
 
-    with mock.patch.object(
-        mock_genai_client._api_client, "request", return_value=mock_http_response
-    ):
-        with start_transaction(name="google_genai"):
+    if stream_gen_ai_spans:
+        items = capture_items("span")
+
+        with mock.patch.object(
+            mock_genai_client._api_client, "request", return_value=mock_http_response
+        ), start_transaction(name="google_genai"):
             config = {
                 "system_instruction": system_instructions,
                 "temperature": 0.5,
@@ -242,28 +309,66 @@ def test_generate_content_with_system_instruction(
                 config=config,
             )
 
-    invoke_span = next(item.payload for item in items if item.type == "span")
+        invoke_span = next(item.payload for item in items if item.type == "span")
 
-    if expected_texts is None:
-        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in invoke_span["attributes"]
-        return
+        if expected_texts is None:
+            assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in invoke_span["attributes"]
+            return
 
-    # (PII is enabled and include_prompts is True in this test)
-    system_instructions = json.loads(
-        invoke_span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
-    )
+        # (PII is enabled and include_prompts is True in this test)
+        system_instructions = json.loads(
+            invoke_span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
+        )
+    else:
+        events = capture_events()
+
+        with mock.patch.object(
+            mock_genai_client._api_client, "request", return_value=mock_http_response
+        ), start_transaction(name="google_genai"):
+            config = {
+                "system_instruction": system_instructions,
+                "temperature": 0.5,
+            }
+
+            if generate_content_config:
+                config = create_test_config(**config)
+
+            mock_genai_client.models.generate_content(
+                model="gemini-1.5-flash",
+                contents="What is 2+2?",
+                config=config,
+            )
+
+        (event,) = events
+        invoke_span = event["spans"][0]
+
+        if expected_texts is None:
+            assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in invoke_span["data"]
+            return
+
+        # (PII is enabled and include_prompts is True in this test)
+        system_instructions = json.loads(
+            invoke_span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
+        )
 
     assert system_instructions == [
         {"type": "text", "content": text} for text in expected_texts
     ]
 
 
-def test_generate_content_with_tools(sentry_init, capture_items, mock_genai_client):
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_generate_content_with_tools(
+    sentry_init,
+    capture_events,
+    capture_items,
+    mock_genai_client,
+    stream_gen_ai_spans,
+):
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
 
     # Create a mock tool function
     def get_weather(location: str) -> str:
@@ -308,27 +413,45 @@ def get_weather(location: str) -> str:
 
     mock_http_response = create_mock_http_response(tool_response_json)
 
+    if stream_gen_ai_spans:
+        items = capture_items("span")
+    else:
+        events = capture_events()
+
     with mock.patch.object(
         mock_genai_client._api_client, "request", return_value=mock_http_response
-    ):
-        with start_transaction(name="google_genai"):
-            config = create_test_config(tools=[get_weather, mock_tool])
-            mock_genai_client.models.generate_content(
-                model="gemini-1.5-flash", contents="What's the weather?", config=config
-            )
+    ), start_transaction(name="google_genai"):
+        config = create_test_config(tools=[get_weather, mock_tool])
+        mock_genai_client.models.generate_content(
+            model="gemini-1.5-flash", contents="What's the weather?", config=config
+        )
+
+    if stream_gen_ai_spans:
+        invoke_span = next(item.payload for item in items if item.type == "span")
 
-    invoke_span = next(item.payload for item in items if item.type == "span")
+        # Check that tools are recorded (data is serialized as a string)
+        tools_data_str = invoke_span["attributes"][
+            SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS
+        ]
+    else:
+        (event,) = events
+        invoke_span = event["spans"][0]
 
-    # Check that tools are recorded (data is serialized as a string)
-    tools_data_str = invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS]
+        # Check that tools are recorded (data is serialized as a string)
+        tools_data_str = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS]
     # Parse the JSON string to verify content
     tools_data = json.loads(tools_data_str)
     assert len(tools_data) == 2
 
     # The order of tools may not be guaranteed, so sort by name and description for comparison
-    sorted_tools = sorted(
-        tools_data, key=lambda t: (t.get("name", ""), t.get("name", ""))
-    )
+    if stream_gen_ai_spans:
+        sorted_tools = sorted(
+            tools_data, key=lambda t: (t.get("name", ""), t.get("name", ""))
+        )
+    else:
+        sorted_tools = sorted(
+            tools_data, key=lambda t: (t.get("name", ""), t.get("description", ""))
+        )
 
     # The function tool
     assert sorted_tools[0]["name"] == "get_weather"
@@ -339,13 +462,19 @@ def get_weather(location: str) -> str:
     assert sorted_tools[1]["description"] == "Get weather information (tool object)"
 
 
-def test_tool_execution(sentry_init, capture_items):
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_tool_execution(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     sentry_init(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
 
     # Create a mock tool function
     def get_weather(location: str) -> str:
@@ -357,45 +486,79 @@ def get_weather(location: str) -> str:
 
     wrapped_weather = wrapped_tool(get_weather)
 
+    if stream_gen_ai_spans:
+        items = capture_items("span")
+    else:
+        events = capture_events()
+
     # Execute the wrapped tool
     with start_transaction(name="test_tool"):
         result = wrapped_weather("San Francisco")
 
     assert result == "The weather in San Francisco is sunny"
 
-    spans = [item.payload for item in items if item.type == "span"]
-    assert len(spans) == 1
-    tool_span = next(item.payload for item in items if item.type == "span")
-
-    assert tool_span["attributes"]["sentry.op"] == OP.GEN_AI_EXECUTE_TOOL
-    assert tool_span["name"] == "execute_tool get_weather"
-    assert tool_span["attributes"][SPANDATA.GEN_AI_TOOL_NAME] == "get_weather"
-    assert (
-        tool_span["attributes"][SPANDATA.GEN_AI_TOOL_DESCRIPTION]
-        == "Get the weather for a location"
-    )
+    if stream_gen_ai_spans:
+        spans = [item.payload for item in items if item.type == "span"]
+        assert len(spans) == 1
+        tool_span = next(item.payload for item in items if item.type == "span")
+
+        assert tool_span["attributes"]["sentry.op"] == OP.GEN_AI_EXECUTE_TOOL
+        assert tool_span["name"] == "execute_tool get_weather"
+        assert tool_span["attributes"][SPANDATA.GEN_AI_TOOL_NAME] == "get_weather"
+        assert (
+            tool_span["attributes"][SPANDATA.GEN_AI_TOOL_DESCRIPTION]
+            == "Get the weather for a location"
+        )
+    else:
+        (event,) = events
+        assert len(event["spans"]) == 1
+        tool_span = event["spans"][0]
+
+        assert tool_span["op"] == OP.GEN_AI_EXECUTE_TOOL
+        assert tool_span["description"] == "execute_tool get_weather"
+        assert tool_span["data"][SPANDATA.GEN_AI_TOOL_NAME] == "get_weather"
+        assert (
+            tool_span["data"][SPANDATA.GEN_AI_TOOL_DESCRIPTION]
+            == "Get the weather for a location"
+        )
 
 
-def test_error_handling(sentry_init, capture_items, mock_genai_client):
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_error_handling(
+    sentry_init,
+    capture_events,
+    capture_items,
+    mock_genai_client,
+    stream_gen_ai_spans,
+):
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("event", "transaction")
+    if stream_gen_ai_spans:
+        items = capture_items("event", "transaction")
+    else:
+        events = capture_events()
 
     # Mock an error at the HTTP level
     with mock.patch.object(
         mock_genai_client._api_client, "request", side_effect=Exception("API Error")
+    ), start_transaction(name="google_genai"), pytest.raises(
+        Exception, match="API Error"
     ):
-        with start_transaction(name="google_genai"):
-            with pytest.raises(Exception, match="API Error"):
-                mock_genai_client.models.generate_content(
-                    model="gemini-1.5-flash",
-                    contents="This will fail",
-                    config=create_test_config(),
-                )
+        mock_genai_client.models.generate_content(
+            model="gemini-1.5-flash",
+            contents="This will fail",
+            config=create_test_config(),
+        )
 
-    (error_event,) = (item.payload for item in items if item.type == "event")
+    if stream_gen_ai_spans:
+        (error_event,) = (item.payload for item in items if item.type == "event")
+    else:
+        # Should have both transaction and error events
+        assert len(events) == 2
+        error_event, transaction_event = events
 
     assert error_event["level"] == "error"
     assert error_event["exception"]["values"][0]["type"] == "Exception"
@@ -403,14 +566,21 @@ def test_error_handling(sentry_init, capture_items, mock_genai_client):
     assert error_event["exception"]["values"][0]["mechanism"]["type"] == "google_genai"
 
 
-def test_streaming_generate_content(sentry_init, capture_items, mock_genai_client):
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_streaming_generate_content(
+    sentry_init,
+    capture_events,
+    capture_items,
+    mock_genai_client,
+    stream_gen_ai_spans,
+):
     """Test streaming with generate_content_stream, verifying chunk accumulation."""
     sentry_init(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
 
     # Create streaming chunks - simulating a multi-chunk response
     # Chunk 1: First part of text with partial usage metadata
@@ -474,17 +644,21 @@ def test_streaming_generate_content(sentry_init, capture_items, mock_genai_clien
     stream_chunks = [chunk1_json, chunk2_json, chunk3_json]
     mock_stream = create_mock_streaming_responses(stream_chunks)
 
+    if stream_gen_ai_spans:
+        items = capture_items("span")
+    else:
+        events = capture_events()
+
     with mock.patch.object(
         mock_genai_client._api_client, "request_streamed", return_value=mock_stream
-    ):
-        with start_transaction(name="google_genai"):
-            config = create_test_config()
-            stream = mock_genai_client.models.generate_content_stream(
-                model="gemini-1.5-flash", contents="Stream me a response", config=config
-            )
+    ), start_transaction(name="google_genai"):
+        config = create_test_config()
+        stream = mock_genai_client.models.generate_content_stream(
+            model="gemini-1.5-flash", contents="Stream me a response", config=config
+        )
 
-            # Consume the stream (this is what users do with the integration wrapper)
-            collected_chunks = list(stream)
+        # Consume the stream (this is what users do with the integration wrapper)
+        collected_chunks = list(stream)
 
     # Verify we got all chunks
     assert len(collected_chunks) == 3
@@ -492,68 +666,125 @@ def test_streaming_generate_content(sentry_init, capture_items, mock_genai_clien
     assert collected_chunks[1].candidates[0].content.parts[0].text == "How can I "
     assert collected_chunks[2].candidates[0].content.parts[0].text == "help you today?"
 
-    spans = [item.payload for item in items if item.type == "span"]
-    assert len(spans) == 1
-    chat_span = next(item.payload for item in items if item.type == "span")
+    if stream_gen_ai_spans:
+        spans = [item.payload for item in items if item.type == "span"]
+        assert len(spans) == 1
+        chat_span = next(item.payload for item in items if item.type == "span")
 
-    # Check that streaming flag is set on both spans
-    assert chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+        # Check that streaming flag is set on both spans
+        assert chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+    else:
+        (event,) = events
+
+        assert len(event["spans"]) == 1
+        chat_span = event["spans"][0]
+
+        # Check that streaming flag is set on both spans
+        assert chat_span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
 
     # Verify accumulated response text (all chunks combined)
     expected_full_text = "Hello! How can I help you today?"
     # Response text is stored as a JSON string
-    chat_response_text = json.loads(
-        chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
-    )
+    if stream_gen_ai_spans:
+        chat_response_text = json.loads(
+            chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+        )
+    else:
+        chat_response_text = json.loads(
+            chat_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+        )
     assert chat_response_text == [expected_full_text]
 
     # Verify finish reasons (only the final chunk has a finish reason)
     # When there's a single finish reason, it's stored as a plain string (not JSON)
-    assert SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS in chat_span["attributes"]
-    assert chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == "STOP"
-    assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
-    assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 25
-    assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 5
-    assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS_REASONING] == 3
+    if stream_gen_ai_spans:
+        assert SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS in chat_span["attributes"]
+        assert (
+            chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == "STOP"
+        )
+        assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+        assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
+        assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 25
+        assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 5
+        assert (
+            chat_span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS_REASONING] == 3
+        )
+
+        # Verify model name
+        assert (
+            chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "gemini-1.5-flash"
+        )
+    else:
+        assert SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS in chat_span["data"]
+        assert chat_span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == "STOP"
+        assert chat_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+        assert chat_span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
+        assert chat_span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 25
+        assert chat_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 5
+        assert chat_span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS_REASONING] == 3
 
-    # Verify model name
-    assert chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "gemini-1.5-flash"
+        # Verify model name
+        assert chat_span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "gemini-1.5-flash"
 
 
-def test_span_origin(sentry_init, capture_items, mock_genai_client):
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_span_origin(
+    sentry_init,
+    capture_events,
+    capture_items,
+    mock_genai_client,
+    stream_gen_ai_spans,
+):
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span", "transaction")
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
 
+    if stream_gen_ai_spans:
+        items = capture_items("span", "transaction")
+    else:
+        events = capture_events()
+
     with mock.patch.object(
         mock_genai_client._api_client, "request", return_value=mock_http_response
-    ):
-        with start_transaction(name="google_genai"):
-            config = create_test_config()
-            mock_genai_client.models.generate_content(
-                model="gemini-1.5-flash", contents="Test origin", config=config
-            )
+    ), start_transaction(name="google_genai"):
+        config = create_test_config()
+        mock_genai_client.models.generate_content(
+            model="gemini-1.5-flash", contents="Test origin", config=config
+        )
+
+    if stream_gen_ai_spans:
+        (event,) = (item.payload for item in items if item.type == "transaction")
+        assert event["contexts"]["trace"]["origin"] == "manual"
 
-    (event,) = (item.payload for item in items if item.type == "transaction")
-    assert event["contexts"]["trace"]["origin"] == "manual"
+        spans = [item.payload for item in items if item.type == "span"]
+        for span in spans:
+            assert span["attributes"]["sentry.origin"] == "auto.ai.google_genai"
+    else:
+        (event,) = events
 
-    spans = [item.payload for item in items if item.type == "span"]
-    for span in spans:
-        assert span["attributes"]["sentry.origin"] == "auto.ai.google_genai"
+        assert event["contexts"]["trace"]["origin"] == "manual"
+        for span in event["spans"]:
+            assert span["origin"] == "auto.ai.google_genai"
 
 
-def test_response_without_usage_metadata(sentry_init, capture_items, mock_genai_client):
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_response_without_usage_metadata(
+    sentry_init,
+    capture_events,
+    capture_items,
+    mock_genai_client,
+    stream_gen_ai_spans,
+):
     """Test handling of responses without usage metadata"""
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
 
     # Response without usage metadata
     response_json = {
@@ -570,31 +801,58 @@ def test_response_without_usage_metadata(sentry_init, capture_items, mock_genai_
 
     mock_http_response = create_mock_http_response(response_json)
 
-    with mock.patch.object(
-        mock_genai_client._api_client, "request", return_value=mock_http_response
-    ):
-        with start_transaction(name="google_genai"):
+    if stream_gen_ai_spans:
+        items = capture_items("span")
+
+        with mock.patch.object(
+            mock_genai_client._api_client, "request", return_value=mock_http_response
+        ), start_transaction(name="google_genai"):
+            config = create_test_config()
+            mock_genai_client.models.generate_content(
+                model="gemini-1.5-flash", contents="Test", config=config
+            )
+
+        chat_span = next(item.payload for item in items if item.type == "span")
+
+        # Usage data should not be present
+        assert SPANDATA.GEN_AI_USAGE_INPUT_TOKENS not in chat_span["attributes"]
+        assert SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS not in chat_span["attributes"]
+        assert SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS not in chat_span["attributes"]
+    else:
+        events = capture_events()
+
+        with mock.patch.object(
+            mock_genai_client._api_client, "request", return_value=mock_http_response
+        ), start_transaction(name="google_genai"):
             config = create_test_config()
             mock_genai_client.models.generate_content(
                 model="gemini-1.5-flash", contents="Test", config=config
             )
 
-    chat_span = next(item.payload for item in items if item.type == "span")
+        (event,) = events
+        chat_span = event["spans"][0]
 
-    # Usage data should not be present
-    assert SPANDATA.GEN_AI_USAGE_INPUT_TOKENS not in chat_span["attributes"]
-    assert SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS not in chat_span["attributes"]
-    assert SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS not in chat_span["attributes"]
+        # Usage data should not be present
+        assert SPANDATA.GEN_AI_USAGE_INPUT_TOKENS not in chat_span["data"]
+        assert SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS not in chat_span["data"]
+        assert SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS not in chat_span["data"]
 
 
-def test_multiple_candidates(sentry_init, capture_items, mock_genai_client):
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_multiple_candidates(
+    sentry_init,
+    capture_events,
+    capture_items,
+    mock_genai_client,
+    stream_gen_ai_spans,
+):
     """Test handling of multiple response candidates"""
     sentry_init(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
 
     # Response with multiple candidates
     multi_candidate_json = {
@@ -623,20 +881,38 @@ def test_multiple_candidates(sentry_init, capture_items, mock_genai_client):
 
     mock_http_response = create_mock_http_response(multi_candidate_json)
 
-    with mock.patch.object(
-        mock_genai_client._api_client, "request", return_value=mock_http_response
-    ):
-        with start_transaction(name="google_genai"):
+    if stream_gen_ai_spans:
+        items = capture_items("span")
+
+        with mock.patch.object(
+            mock_genai_client._api_client, "request", return_value=mock_http_response
+        ), start_transaction(name="google_genai"):
+            config = create_test_config()
+            mock_genai_client.models.generate_content(
+                model="gemini-1.5-flash", contents="Generate multiple", config=config
+            )
+
+        chat_span = next(item.payload for item in items if item.type == "span")
+    else:
+        events = capture_events()
+
+        with mock.patch.object(
+            mock_genai_client._api_client, "request", return_value=mock_http_response
+        ), start_transaction(name="google_genai"):
             config = create_test_config()
             mock_genai_client.models.generate_content(
                 model="gemini-1.5-flash", contents="Generate multiple", config=config
             )
 
-    chat_span = next(item.payload for item in items if item.type == "span")
+        (event,) = events
+        chat_span = event["spans"][0]
 
     # Should capture all responses
     # Response text is stored as a JSON string when there are multiple responses
-    response_text = chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+    if stream_gen_ai_spans:
+        response_text = chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+    else:
+        response_text = chat_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
     if isinstance(response_text, str) and response_text.startswith("["):
         # It's a JSON array
         response_list = json.loads(response_text)
@@ -646,26 +922,73 @@ def test_multiple_candidates(sentry_init, capture_items, mock_genai_client):
         assert response_text == "Response 1\nResponse 2"
 
     # Finish reasons are serialized as JSON
-    finish_reasons = json.loads(
-        chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS]
-    )
+    if stream_gen_ai_spans:
+        finish_reasons = json.loads(
+            chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS]
+        )
+    else:
+        finish_reasons = json.loads(
+            chat_span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS]
+        )
     assert finish_reasons == ["STOP", "MAX_TOKENS"]
 
 
-def test_all_configuration_parameters(sentry_init, capture_items, mock_genai_client):
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_all_configuration_parameters(
+    sentry_init,
+    capture_events,
+    capture_items,
+    mock_genai_client,
+    stream_gen_ai_spans,
+):
     """Test that all configuration parameters are properly recorded"""
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
 
-    with mock.patch.object(
-        mock_genai_client._api_client, "request", return_value=mock_http_response
-    ):
-        with start_transaction(name="google_genai"):
+    if stream_gen_ai_spans:
+        items = capture_items("span")
+
+        with mock.patch.object(
+            mock_genai_client._api_client, "request", return_value=mock_http_response
+        ), start_transaction(name="google_genai"):
+            config = create_test_config(
+                temperature=0.8,
+                top_p=0.95,
+                top_k=40,
+                max_output_tokens=2048,
+                presence_penalty=0.1,
+                frequency_penalty=0.2,
+                seed=12345,
+            )
+            mock_genai_client.models.generate_content(
+                model="gemini-1.5-flash", contents="Test all params", config=config
+            )
+
+        invoke_span = next(item.payload for item in items if item.type == "span")
+
+        # Check all parameters are recorded
+        assert invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.8
+        assert invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.95
+        assert invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_K] == 40
+        assert invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 2048
+        assert (
+            invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1
+        )
+        assert (
+            invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2
+        )
+        assert invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_SEED] == 12345
+    else:
+        events = capture_events()
+
+        with mock.patch.object(
+            mock_genai_client._api_client, "request", return_value=mock_http_response
+        ), start_transaction(name="google_genai"):
             config = create_test_config(
                 temperature=0.8,
                 top_p=0.95,
@@ -679,56 +1002,88 @@ def test_all_configuration_parameters(sentry_init, capture_items, mock_genai_cli
                 model="gemini-1.5-flash", contents="Test all params", config=config
             )
 
-    invoke_span = next(item.payload for item in items if item.type == "span")
+        (event,) = events
+        invoke_span = event["spans"][0]
 
-    # Check all parameters are recorded
-    assert invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.8
-    assert invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.95
-    assert invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_K] == 40
-    assert invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 2048
-    assert invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1
-    assert invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2
-    assert invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_SEED] == 12345
+        # Check all parameters are recorded
+        assert invoke_span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.8
+        assert invoke_span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.95
+        assert invoke_span["data"][SPANDATA.GEN_AI_REQUEST_TOP_K] == 40
+        assert invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 2048
+        assert invoke_span["data"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1
+        assert invoke_span["data"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2
+        assert invoke_span["data"][SPANDATA.GEN_AI_REQUEST_SEED] == 12345
 
 
-def test_empty_response(sentry_init, capture_items, mock_genai_client):
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_empty_response(
+    sentry_init,
+    capture_events,
+    capture_items,
+    mock_genai_client,
+    stream_gen_ai_spans,
+):
     """Test handling of minimal response with no content"""
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
 
     # Minimal response with empty candidates array
     minimal_response_json = {"candidates": []}
     mock_http_response = create_mock_http_response(minimal_response_json)
 
-    with mock.patch.object(
-        mock_genai_client._api_client, "request", return_value=mock_http_response
-    ):
-        with start_transaction(name="google_genai"):
+    if stream_gen_ai_spans:
+        items = capture_items("span")
+
+        with mock.patch.object(
+            mock_genai_client._api_client, "request", return_value=mock_http_response
+        ), start_transaction(name="google_genai"):
+            response = mock_genai_client.models.generate_content(
+                model="gemini-1.5-flash", contents="Test", config=create_test_config()
+            )
+
+        # Response will have an empty candidates list
+        assert response is not None
+        assert len(response.candidates) == 0
+
+        # Should still create spans even with empty candidates
+        spans = [item.payload for item in items if item.type == "span"]
+        assert len(spans) == 1
+    else:
+        events = capture_events()
+
+        with mock.patch.object(
+            mock_genai_client._api_client, "request", return_value=mock_http_response
+        ), start_transaction(name="google_genai"):
             response = mock_genai_client.models.generate_content(
                 model="gemini-1.5-flash", contents="Test", config=create_test_config()
             )
 
-    # Response will have an empty candidates list
-    assert response is not None
-    assert len(response.candidates) == 0
+        # Response will have an empty candidates list
+        assert response is not None
+        assert len(response.candidates) == 0
 
-    # Should still create spans even with empty candidates
-    spans = [item.payload for item in items if item.type == "span"]
-    assert len(spans) == 1
+        (event,) = events
+        # Should still create spans even with empty candidates
+        assert len(event["spans"]) == 1
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 def test_response_with_different_id_fields(
-    sentry_init, capture_items, mock_genai_client
+    sentry_init,
+    capture_events,
+    capture_items,
+    mock_genai_client,
+    stream_gen_ai_spans,
 ):
     """Test handling of different response ID field names"""
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
 
     # Response with response_id and model_version
     response_json = {
@@ -747,21 +1102,40 @@ def test_response_with_different_id_fields(
 
     mock_http_response = create_mock_http_response(response_json)
 
-    with mock.patch.object(
-        mock_genai_client._api_client, "request", return_value=mock_http_response
-    ):
-        with start_transaction(name="google_genai"):
+    if stream_gen_ai_spans:
+        items = capture_items("span")
+
+        with mock.patch.object(
+            mock_genai_client._api_client, "request", return_value=mock_http_response
+        ), start_transaction(name="google_genai"):
             mock_genai_client.models.generate_content(
                 model="gemini-1.5-flash", contents="Test", config=create_test_config()
             )
 
-    chat_span = next(item.payload for item in items if item.type == "span")
+        chat_span = next(item.payload for item in items if item.type == "span")
 
-    assert chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID] == "resp-456"
-    assert (
-        chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL]
-        == "gemini-1.5-flash-001"
-    )
+        assert chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID] == "resp-456"
+        assert (
+            chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL]
+            == "gemini-1.5-flash-001"
+        )
+    else:
+        events = capture_events()
+
+        with mock.patch.object(
+            mock_genai_client._api_client, "request", return_value=mock_http_response
+        ), start_transaction(name="google_genai"):
+            mock_genai_client.models.generate_content(
+                model="gemini-1.5-flash", contents="Test", config=create_test_config()
+            )
+
+        (event,) = events
+        chat_span = event["spans"][0]
+
+        assert chat_span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "resp-456"
+        assert (
+            chat_span["data"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "gemini-1.5-flash-001"
+        )
 
 
 def test_tool_with_async_function(sentry_init):
@@ -785,40 +1159,72 @@ async def async_tool(param: str) -> str:
     assert hasattr(wrapped_async_tool, "__wrapped__")  # Should preserve original
 
 
-def test_contents_as_none(sentry_init, capture_items, mock_genai_client):
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_contents_as_none(
+    sentry_init,
+    capture_events,
+    capture_items,
+    mock_genai_client,
+    stream_gen_ai_spans,
+):
     """Test handling when contents parameter is None"""
     sentry_init(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
 
-    with mock.patch.object(
-        mock_genai_client._api_client, "request", return_value=mock_http_response
-    ):
-        with start_transaction(name="google_genai"):
+    if stream_gen_ai_spans:
+        items = capture_items("span")
+
+        with mock.patch.object(
+            mock_genai_client._api_client, "request", return_value=mock_http_response
+        ), start_transaction(name="google_genai"):
+            mock_genai_client.models.generate_content(
+                model="gemini-1.5-flash", contents=None, config=create_test_config()
+            )
+
+        invoke_span = next(item.payload for item in items if item.type == "span")
+
+        # Should handle None contents gracefully
+        messages = invoke_span["attributes"].get(SPANDATA.GEN_AI_REQUEST_MESSAGES, [])
+    else:
+        events = capture_events()
+
+        with mock.patch.object(
+            mock_genai_client._api_client, "request", return_value=mock_http_response
+        ), start_transaction(name="google_genai"):
             mock_genai_client.models.generate_content(
                 model="gemini-1.5-flash", contents=None, config=create_test_config()
             )
 
-    invoke_span = next(item.payload for item in items if item.type == "span")
+        (event,) = events
+        invoke_span = event["spans"][0]
+
+        # Should handle None contents gracefully
+        messages = invoke_span["data"].get(SPANDATA.GEN_AI_REQUEST_MESSAGES, [])
 
-    # Should handle None contents gracefully
-    messages = invoke_span["attributes"].get(SPANDATA.GEN_AI_REQUEST_MESSAGES, [])
     # Should only have system message if any, not user message
     assert all(msg["role"] != "user" or msg["content"] is not None for msg in messages)
 
 
-def test_tool_calls_extraction(sentry_init, capture_items, mock_genai_client):
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_tool_calls_extraction(
+    sentry_init,
+    capture_events,
+    capture_items,
+    mock_genai_client,
+    stream_gen_ai_spans,
+):
     """Test extraction of tool/function calls from response"""
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
 
     # Response with function calls
     function_call_response_json = {
@@ -857,27 +1263,49 @@ def test_tool_calls_extraction(sentry_init, capture_items, mock_genai_client):
 
     mock_http_response = create_mock_http_response(function_call_response_json)
 
-    with mock.patch.object(
-        mock_genai_client._api_client, "request", return_value=mock_http_response
-    ):
-        with start_transaction(name="google_genai"):
+    if stream_gen_ai_spans:
+        items = capture_items("span")
+
+        with mock.patch.object(
+            mock_genai_client._api_client, "request", return_value=mock_http_response
+        ), start_transaction(name="google_genai"):
             mock_genai_client.models.generate_content(
                 model="gemini-1.5-flash",
                 contents="What's the weather and time?",
                 config=create_test_config(),
             )
 
-    chat_span = next(
-        item.payload for item in items if item.type == "span"
-    )  # The chat span
+        chat_span = next(
+            item.payload for item in items if item.type == "span"
+        )  # The chat span
 
-    # Check that tool calls are extracted and stored
-    assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in chat_span["attributes"]
+        # Check that tool calls are extracted and stored
+        assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in chat_span["attributes"]
 
-    # Parse the JSON string to verify content
-    tool_calls = json.loads(
-        chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS]
-    )
+        # Parse the JSON string to verify content
+        tool_calls = json.loads(
+            chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS]
+        )
+    else:
+        events = capture_events()
+
+        with mock.patch.object(
+            mock_genai_client._api_client, "request", return_value=mock_http_response
+        ), start_transaction(name="google_genai"):
+            mock_genai_client.models.generate_content(
+                model="gemini-1.5-flash",
+                contents="What's the weather and time?",
+                config=create_test_config(),
+            )
+
+        (event,) = events
+        chat_span = event["spans"][0]  # The chat span
+
+        # Check that tool calls are extracted and stored
+        assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in chat_span["data"]
+
+        # Parse the JSON string to verify content
+        tool_calls = json.loads(chat_span["data"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS])
 
     assert len(tool_calls) == 2
 
@@ -897,14 +1325,21 @@ def test_tool_calls_extraction(sentry_init, capture_items, mock_genai_client):
     assert json.loads(tool_calls[1]["arguments"]) == {"timezone": "PST"}
 
 
-def test_google_genai_message_truncation(sentry_init, capture_items, mock_genai_client):
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_google_genai_message_truncation(
+    sentry_init,
+    capture_events,
+    capture_items,
+    mock_genai_client,
+    stream_gen_ai_spans,
+):
     """Test that large messages are truncated properly in Google GenAI integration."""
     sentry_init(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
 
     large_content = (
         "This is a very long message that will exceed our size limits. " * 1000
@@ -913,26 +1348,45 @@ def test_google_genai_message_truncation(sentry_init, capture_items, mock_genai_
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
 
-    with mock.patch.object(
-        mock_genai_client._api_client, "request", return_value=mock_http_response
-    ):
-        with start_transaction(name="google_genai"):
+    if stream_gen_ai_spans:
+        items = capture_items("span")
+
+        with mock.patch.object(
+            mock_genai_client._api_client, "request", return_value=mock_http_response
+        ), start_transaction(name="google_genai"):
             mock_genai_client.models.generate_content(
                 model="gemini-1.5-flash",
                 contents=[large_content, small_content],
                 config=create_test_config(),
             )
 
-    invoke_span = next(item.payload for item in items if item.type == "span")
-    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["attributes"]
-
-    messages_data = invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-    assert isinstance(messages_data, str)
+        invoke_span = next(item.payload for item in items if item.type == "span")
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["attributes"]
 
-    parsed_messages = json.loads(messages_data)
-    assert isinstance(parsed_messages, list)
-    assert len(parsed_messages) == 1
-    assert parsed_messages[0]["role"] == "user"
+        messages_data = invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+    else:
+        events = capture_events()
+
+        with mock.patch.object(
+            mock_genai_client._api_client, "request", return_value=mock_http_response
+        ), start_transaction(name="google_genai"):
+            mock_genai_client.models.generate_content(
+                model="gemini-1.5-flash",
+                contents=[large_content, small_content],
+                config=create_test_config(),
+            )
+
+        (event,) = events
+        invoke_span = event["spans"][0]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["data"]
+
+        messages_data = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+    assert isinstance(messages_data, str)
+
+    parsed_messages = json.loads(messages_data)
+    assert isinstance(parsed_messages, list)
+    assert len(parsed_messages) == 1
+    assert parsed_messages[0]["role"] == "user"
 
     # What "small content" becomes because the large message used the entire character limit
     assert "..." in parsed_messages[0]["content"][1]["text"]
@@ -962,6 +1416,7 @@ def test_google_genai_message_truncation(sentry_init, capture_items, mock_genai_
 }
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.parametrize(
     "send_default_pii, include_prompts",
     [
@@ -972,24 +1427,32 @@ def test_google_genai_message_truncation(sentry_init, capture_items, mock_genai_
     ],
 )
 def test_embed_content(
-    sentry_init, capture_items, send_default_pii, include_prompts, mock_genai_client
+    sentry_init,
+    capture_events,
+    capture_items,
+    send_default_pii,
+    include_prompts,
+    mock_genai_client,
+    stream_gen_ai_spans,
 ):
     sentry_init(
         integrations=[GoogleGenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     # Mock the HTTP response at the _api_client.request() level
     mock_http_response = create_mock_http_response(EXAMPLE_EMBED_RESPONSE_JSON)
 
-    with mock.patch.object(
-        mock_genai_client._api_client,
-        "request",
-        return_value=mock_http_response,
-    ):
-        with start_transaction(name="google_genai_embeddings"):
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with mock.patch.object(
+            mock_genai_client._api_client,
+            "request",
+            return_value=mock_http_response,
+        ), start_transaction(name="google_genai_embeddings"):
             mock_genai_client.models.embed_content(
                 model="text-embedding-004",
                 contents=[
@@ -998,49 +1461,136 @@ def test_embed_content(
                 ],
             )
 
-    (event,) = (item.payload for item in items if item.type == "transaction")
-    assert event["transaction"] == "google_genai_embeddings"
+        (event,) = (item.payload for item in items if item.type == "transaction")
 
-    # Should have 1 span for embeddings
-    spans = [item.payload for item in items if item.type == "span"]
-    assert len(spans) == 1
-    (embed_span,) = spans
+        assert event["transaction"] == "google_genai_embeddings"
 
-    # Check embeddings span
-    assert embed_span["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS
-    assert embed_span["name"] == "embeddings text-embedding-004"
-    assert embed_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings"
-    assert embed_span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "gcp.gemini"
-    assert (
-        embed_span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-004"
-    )
+        # Should have 1 span for embeddings
+        spans = [item.payload for item in items if item.type == "span"]
+        assert len(spans) == 1
+        (embed_span,) = spans
 
-    # Check input texts if PII is allowed
-    if send_default_pii and include_prompts:
-        input_texts = json.loads(
-            embed_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
+        # Check embeddings span
+        assert embed_span["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS
+        assert embed_span["name"] == "embeddings text-embedding-004"
+        assert embed_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings"
+        assert embed_span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "gcp.gemini"
+        assert (
+            embed_span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL]
+            == "text-embedding-004"
         )
-        assert input_texts == [
-            "What is your name?",
-            "What is your favorite color?",
-        ]
+
+        # Check input texts if PII is allowed
+        if send_default_pii and include_prompts:
+            if stream_gen_ai_spans:
+                input_texts = json.loads(
+                    embed_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
+                )
+            else:
+                input_texts = json.loads(
+                    embed_span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
+                )
+            assert input_texts == [
+                "What is your name?",
+                "What is your favorite color?",
+            ]
+        else:
+            if stream_gen_ai_spans:
+                assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embed_span["attributes"]
+            else:
+                assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embed_span["data"]
+
+        # Check usage data (sum of token counts from statistics: 10 + 15 = 25)
+        # Note: Only available in newer versions with ContentEmbeddingStatistics
+        if stream_gen_ai_spans:
+            if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["attributes"]:
+                assert (
+                    embed_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 25
+                )
+        else:
+            if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["data"]:
+                assert embed_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 25
     else:
-        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embed_span["attributes"]
+        events = capture_events()
 
-    # Check usage data (sum of token counts from statistics: 10 + 15 = 25)
-    # Note: Only available in newer versions with ContentEmbeddingStatistics
-    if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["attributes"]:
-        assert embed_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 25
+        with mock.patch.object(
+            mock_genai_client._api_client,
+            "request",
+            return_value=mock_http_response,
+        ), start_transaction(name="google_genai_embeddings"):
+            mock_genai_client.models.embed_content(
+                model="text-embedding-004",
+                contents=[
+                    "What is your name?",
+                    "What is your favorite color?",
+                ],
+            )
 
+        assert len(events) == 1
+        (event,) = events
 
-def test_embed_content_string_input(sentry_init, capture_items, mock_genai_client):
+        assert event["type"] == "transaction"
+
+        assert event["transaction"] == "google_genai_embeddings"
+
+        # Should have 1 span for embeddings
+        assert len(event["spans"]) == 1
+        (embed_span,) = event["spans"]
+
+        # Check embeddings span
+        assert embed_span["op"] == OP.GEN_AI_EMBEDDINGS
+        assert embed_span["description"] == "embeddings text-embedding-004"
+        assert embed_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings"
+        assert embed_span["data"][SPANDATA.GEN_AI_SYSTEM] == "gcp.gemini"
+        assert embed_span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-004"
+
+        # Check input texts if PII is allowed
+        if send_default_pii and include_prompts:
+            if stream_gen_ai_spans:
+                input_texts = json.loads(
+                    embed_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
+                )
+            else:
+                input_texts = json.loads(
+                    embed_span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
+                )
+            assert input_texts == [
+                "What is your name?",
+                "What is your favorite color?",
+            ]
+        else:
+            if stream_gen_ai_spans:
+                assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embed_span["attributes"]
+            else:
+                assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embed_span["data"]
+
+        # Check usage data (sum of token counts from statistics: 10 + 15 = 25)
+        # Note: Only available in newer versions with ContentEmbeddingStatistics
+        if stream_gen_ai_spans:
+            if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["attributes"]:
+                assert (
+                    embed_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 25
+                )
+        else:
+            if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["data"]:
+                assert embed_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 25
+
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_embed_content_string_input(
+    sentry_init,
+    capture_events,
+    capture_items,
+    mock_genai_client,
+    stream_gen_ai_spans,
+):
     """Test embed_content with a single string instead of list."""
     sentry_init(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
 
     # Mock response with single embedding
     single_embed_response = {
@@ -1059,49 +1609,112 @@ def test_embed_content_string_input(sentry_init, capture_items, mock_genai_clien
     }
     mock_http_response = create_mock_http_response(single_embed_response)
 
-    with mock.patch.object(
-        mock_genai_client._api_client, "request", return_value=mock_http_response
-    ):
-        with start_transaction(name="google_genai_embeddings"):
+    if stream_gen_ai_spans:
+        items = capture_items("span")
+
+        with mock.patch.object(
+            mock_genai_client._api_client, "request", return_value=mock_http_response
+        ), start_transaction(name="google_genai_embeddings"):
             mock_genai_client.models.embed_content(
                 model="text-embedding-004",
                 contents="Single text input",
             )
 
-    spans = [item.payload for item in items if item.type == "span"]
-    (embed_span,) = spans
+        spans = [item.payload for item in items if item.type == "span"]
+        (embed_span,) = spans
 
-    # Check that single string is handled correctly
-    input_texts = json.loads(embed_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT])
-    assert input_texts == ["Single text input"]
-    # Should use token_count from statistics (5), not billable_character_count (10)
-    # Note: Only available in newer versions with ContentEmbeddingStatistics
-    if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["attributes"]:
-        assert embed_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5
+        # Check that single string is handled correctly
+        input_texts = json.loads(
+            embed_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
+        )
+
+        assert input_texts == ["Single text input"]
+        # Should use token_count from statistics (5), not billable_character_count (10)
+        # Note: Only available in newer versions with ContentEmbeddingStatistics
+        if stream_gen_ai_spans:
+            if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["attributes"]:
+                assert embed_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5
+        else:
+            if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["data"]:
+                assert embed_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5
+    else:
+        events = capture_events()
+
+        with mock.patch.object(
+            mock_genai_client._api_client, "request", return_value=mock_http_response
+        ), start_transaction(name="google_genai_embeddings"):
+            mock_genai_client.models.embed_content(
+                model="text-embedding-004",
+                contents="Single text input",
+            )
 
+        (event,) = events
+        (embed_span,) = event["spans"]
 
-def test_embed_content_error_handling(sentry_init, capture_items, mock_genai_client):
+        # Check that single string is handled correctly
+        input_texts = json.loads(embed_span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT])
+
+        assert input_texts == ["Single text input"]
+        # Should use token_count from statistics (5), not billable_character_count (10)
+        # Note: Only available in newer versions with ContentEmbeddingStatistics
+        if stream_gen_ai_spans:
+            if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["attributes"]:
+                assert embed_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5
+        else:
+            if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["data"]:
+                assert embed_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5
+
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_embed_content_error_handling(
+    sentry_init,
+    capture_events,
+    capture_items,
+    mock_genai_client,
+    stream_gen_ai_spans,
+):
     """Test error handling in embed_content."""
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "event")
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "event")
+
+        # Mock an error at the HTTP level
+        with mock.patch.object(
+            mock_genai_client._api_client,
+            "request",
+            side_effect=Exception("Embedding API Error"),
+        ), start_transaction(name="google_genai_embeddings"), pytest.raises(
+            Exception, match="Embedding API Error"
+        ):
+            mock_genai_client.models.embed_content(
+                model="text-embedding-004",
+                contents=["This will fail"],
+            )
 
-    # Mock an error at the HTTP level
-    with mock.patch.object(
-        mock_genai_client._api_client,
-        "request",
-        side_effect=Exception("Embedding API Error"),
-    ):
-        with start_transaction(name="google_genai_embeddings"):
-            with pytest.raises(Exception, match="Embedding API Error"):
-                mock_genai_client.models.embed_content(
-                    model="text-embedding-004",
-                    contents=["This will fail"],
-                )
+        (error_event,) = (item.payload for item in items if item.type == "event")
+    else:
+        events = capture_events()
+
+        # Mock an error at the HTTP level
+        with mock.patch.object(
+            mock_genai_client._api_client,
+            "request",
+            side_effect=Exception("Embedding API Error"),
+        ), start_transaction(name="google_genai_embeddings"), pytest.raises(
+            Exception, match="Embedding API Error"
+        ):
+            mock_genai_client.models.embed_content(
+                model="text-embedding-004",
+                contents=["This will fail"],
+            )
 
-    (error_event,) = (item.payload for item in items if item.type == "event")
+        # Should have both transaction and error events
+        assert len(events) == 2
+        error_event, _ = events
 
     assert error_event["level"] == "error"
     assert error_event["exception"]["values"][0]["type"] == "Exception"
@@ -1109,15 +1722,20 @@ def test_embed_content_error_handling(sentry_init, capture_items, mock_genai_cli
     assert error_event["exception"]["values"][0]["mechanism"]["type"] == "google_genai"
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 def test_embed_content_without_statistics(
-    sentry_init, capture_items, mock_genai_client
+    sentry_init,
+    capture_events,
+    capture_items,
+    mock_genai_client,
+    stream_gen_ai_spans,
 ):
     """Test embed_content response without statistics (older package versions)."""
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
 
     # Response without statistics (typical for older google-genai versions)
     # Embeddings exist but don't have the statistics field
@@ -1133,50 +1751,92 @@ def test_embed_content_without_statistics(
     }
     mock_http_response = create_mock_http_response(old_version_response)
 
-    with mock.patch.object(
-        mock_genai_client._api_client, "request", return_value=mock_http_response
-    ):
-        with start_transaction(name="google_genai_embeddings"):
+    if stream_gen_ai_spans:
+        items = capture_items("span")
+
+        with mock.patch.object(
+            mock_genai_client._api_client, "request", return_value=mock_http_response
+        ), start_transaction(name="google_genai_embeddings"):
             mock_genai_client.models.embed_content(
                 model="text-embedding-004",
                 contents=["Test without statistics", "Another test"],
             )
 
-    spans = [item.payload for item in items if item.type == "span"]
-    (embed_span,) = spans
+        spans = [item.payload for item in items if item.type == "span"]
+        (embed_span,) = spans
 
-    # No usage tokens since there are no statistics in older versions
-    # This is expected and the integration should handle it gracefully
-    assert SPANDATA.GEN_AI_USAGE_INPUT_TOKENS not in embed_span["attributes"]
+        # No usage tokens since there are no statistics in older versions
+        # This is expected and the integration should handle it gracefully
+        assert SPANDATA.GEN_AI_USAGE_INPUT_TOKENS not in embed_span["attributes"]
+    else:
+        events = capture_events()
+
+        with mock.patch.object(
+            mock_genai_client._api_client, "request", return_value=mock_http_response
+        ), start_transaction(name="google_genai_embeddings"):
+            mock_genai_client.models.embed_content(
+                model="text-embedding-004",
+                contents=["Test without statistics", "Another test"],
+            )
+
+        (event,) = events
+        (embed_span,) = event["spans"]
+
+        # No usage tokens since there are no statistics in older versions
+        # This is expected and the integration should handle it gracefully
+        assert SPANDATA.GEN_AI_USAGE_INPUT_TOKENS not in embed_span["data"]
 
 
-def test_embed_content_span_origin(sentry_init, capture_items, mock_genai_client):
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_embed_content_span_origin(
+    sentry_init,
+    capture_events,
+    capture_items,
+    mock_genai_client,
+    stream_gen_ai_spans,
+):
     """Test that embed_content spans have correct origin."""
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     mock_http_response = create_mock_http_response(EXAMPLE_EMBED_RESPONSE_JSON)
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+        with mock.patch.object(
+            mock_genai_client._api_client, "request", return_value=mock_http_response
+        ), start_transaction(name="google_genai_embeddings"):
+            mock_genai_client.models.embed_content(
+                model="text-embedding-004",
+                contents=["Test origin"],
+            )
 
-    with mock.patch.object(
-        mock_genai_client._api_client, "request", return_value=mock_http_response
-    ):
-        with start_transaction(name="google_genai_embeddings"):
+        (event,) = (item.payload for item in items if item.type == "transaction")
+        assert event["contexts"]["trace"]["origin"] == "manual"
+
+        spans = [item.payload for item in items if item.type == "span"]
+        for span in spans:
+            assert span["attributes"]["sentry.origin"] == "auto.ai.google_genai"
+    else:
+        events = capture_events()
+        with mock.patch.object(
+            mock_genai_client._api_client, "request", return_value=mock_http_response
+        ), start_transaction(name="google_genai_embeddings"):
             mock_genai_client.models.embed_content(
                 model="text-embedding-004",
                 contents=["Test origin"],
             )
 
-    (event,) = (item.payload for item in items if item.type == "transaction")
-    assert event["contexts"]["trace"]["origin"] == "manual"
+        (event,) = events
 
-    spans = [item.payload for item in items if item.type == "span"]
-    for span in spans:
-        assert span["attributes"]["sentry.origin"] == "auto.ai.google_genai"
+        assert event["contexts"]["trace"]["origin"] == "manual"
+        for span in event["spans"]:
+            assert span["origin"] == "auto.ai.google_genai"
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
 @pytest.mark.parametrize(
     "send_default_pii, include_prompts",
@@ -1188,25 +1848,33 @@ def test_embed_content_span_origin(sentry_init, capture_items, mock_genai_client
     ],
 )
 async def test_async_embed_content(
-    sentry_init, capture_items, send_default_pii, include_prompts, mock_genai_client
+    sentry_init,
+    capture_events,
+    capture_items,
+    send_default_pii,
+    include_prompts,
+    mock_genai_client,
+    stream_gen_ai_spans,
 ):
     """Test async embed_content method."""
     sentry_init(
         integrations=[GoogleGenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     # Mock the async HTTP response
     mock_http_response = create_mock_http_response(EXAMPLE_EMBED_RESPONSE_JSON)
 
-    with mock.patch.object(
-        mock_genai_client._api_client,
-        "async_request",
-        return_value=mock_http_response,
-    ):
-        with start_transaction(name="google_genai_embeddings_async"):
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with mock.patch.object(
+            mock_genai_client._api_client,
+            "async_request",
+            return_value=mock_http_response,
+        ), start_transaction(name="google_genai_embeddings_async"):
             await mock_genai_client.aio.models.embed_content(
                 model="text-embedding-004",
                 contents=[
@@ -1215,52 +1883,109 @@ async def test_async_embed_content(
                 ],
             )
 
-    (event,) = (item.payload for item in items if item.type == "transaction")
-    assert event["transaction"] == "google_genai_embeddings_async"
+        (event,) = (item.payload for item in items if item.type == "transaction")
 
-    # Should have 1 span for embeddings
-    spans = [item.payload for item in items if item.type == "span"]
-    assert len(spans) == 1
-    (embed_span,) = spans
+        assert event["transaction"] == "google_genai_embeddings_async"
 
-    # Check embeddings span
-    assert embed_span["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS
-    assert embed_span["name"] == "embeddings text-embedding-004"
-    assert embed_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings"
-    assert embed_span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "gcp.gemini"
-    assert (
-        embed_span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-004"
-    )
+        # Should have 1 span for embeddings
+        spans = [item.payload for item in items if item.type == "span"]
+        assert len(spans) == 1
+        (embed_span,) = spans
 
-    # Check input texts if PII is allowed
-    if send_default_pii and include_prompts:
-        input_texts = json.loads(
-            embed_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
+        # Check embeddings span
+        assert embed_span["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS
+        assert embed_span["name"] == "embeddings text-embedding-004"
+        assert embed_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings"
+        assert embed_span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "gcp.gemini"
+        assert (
+            embed_span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL]
+            == "text-embedding-004"
         )
-        assert input_texts == [
-            "What is your name?",
-            "What is your favorite color?",
-        ]
+
+        # Check input texts if PII is allowed
+        if send_default_pii and include_prompts:
+            input_texts = json.loads(
+                embed_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
+            )
+            assert input_texts == [
+                "What is your name?",
+                "What is your favorite color?",
+            ]
+        else:
+            assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embed_span["attributes"]
+
+        # Check usage data (sum of token counts from statistics: 10 + 15 = 25)
+        # Note: Only available in newer versions with ContentEmbeddingStatistics
+        if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["attributes"]:
+            assert embed_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 25
     else:
-        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embed_span["attributes"]
+        events = capture_events()
 
-    # Check usage data (sum of token counts from statistics: 10 + 15 = 25)
-    # Note: Only available in newer versions with ContentEmbeddingStatistics
-    if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["attributes"]:
-        assert embed_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 25
+        with mock.patch.object(
+            mock_genai_client._api_client,
+            "async_request",
+            return_value=mock_http_response,
+        ), start_transaction(name="google_genai_embeddings_async"):
+            await mock_genai_client.aio.models.embed_content(
+                model="text-embedding-004",
+                contents=[
+                    "What is your name?",
+                    "What is your favorite color?",
+                ],
+            )
+
+        assert len(events) == 1
+        (event,) = events
+
+        assert event["type"] == "transaction"
+
+        assert event["transaction"] == "google_genai_embeddings_async"
 
+        # Should have 1 span for embeddings
+        assert len(event["spans"]) == 1
+        (embed_span,) = event["spans"]
 
+        # Check embeddings span
+        assert embed_span["op"] == OP.GEN_AI_EMBEDDINGS
+        assert embed_span["description"] == "embeddings text-embedding-004"
+        assert embed_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings"
+        assert embed_span["data"][SPANDATA.GEN_AI_SYSTEM] == "gcp.gemini"
+        assert embed_span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-004"
+
+        # Check input texts if PII is allowed
+        if send_default_pii and include_prompts:
+            input_texts = json.loads(
+                embed_span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
+            )
+            assert input_texts == [
+                "What is your name?",
+                "What is your favorite color?",
+            ]
+        else:
+            assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embed_span["data"]
+
+        # Check usage data (sum of token counts from statistics: 10 + 15 = 25)
+        # Note: Only available in newer versions with ContentEmbeddingStatistics
+        if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["data"]:
+            assert embed_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 25
+
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
 async def test_async_embed_content_string_input(
-    sentry_init, capture_items, mock_genai_client
+    sentry_init,
+    capture_events,
+    capture_items,
+    mock_genai_client,
+    stream_gen_ai_spans,
 ):
     """Test async embed_content with a single string instead of list."""
     sentry_init(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
 
     # Mock response with single embedding
     single_embed_response = {
@@ -1279,52 +2004,108 @@ async def test_async_embed_content_string_input(
     }
     mock_http_response = create_mock_http_response(single_embed_response)
 
-    with mock.patch.object(
-        mock_genai_client._api_client, "async_request", return_value=mock_http_response
-    ):
-        with start_transaction(name="google_genai_embeddings_async"):
+    if stream_gen_ai_spans:
+        items = capture_items("span")
+
+        with mock.patch.object(
+            mock_genai_client._api_client,
+            "async_request",
+            return_value=mock_http_response,
+        ), start_transaction(name="google_genai_embeddings_async"):
+            await mock_genai_client.aio.models.embed_content(
+                model="text-embedding-004",
+                contents="Single text input",
+            )
+
+        spans = [item.payload for item in items if item.type == "span"]
+        (embed_span,) = spans
+
+        # Check that single string is handled correctly
+        input_texts = json.loads(
+            embed_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
+        )
+    else:
+        events = capture_events()
+
+        with mock.patch.object(
+            mock_genai_client._api_client,
+            "async_request",
+            return_value=mock_http_response,
+        ), start_transaction(name="google_genai_embeddings_async"):
             await mock_genai_client.aio.models.embed_content(
                 model="text-embedding-004",
                 contents="Single text input",
             )
 
-    spans = [item.payload for item in items if item.type == "span"]
-    (embed_span,) = spans
+        (event,) = events
+        (embed_span,) = event["spans"]
+
+        # Check that single string is handled correctly
+        input_texts = json.loads(embed_span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT])
 
-    # Check that single string is handled correctly
-    input_texts = json.loads(embed_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT])
     assert input_texts == ["Single text input"]
     # Should use token_count from statistics (5), not billable_character_count (10)
     # Note: Only available in newer versions with ContentEmbeddingStatistics
-    if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["attributes"]:
-        assert embed_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5
+    if stream_gen_ai_spans:
+        if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["attributes"]:
+            assert embed_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5
+    else:
+        if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["data"]:
+            assert embed_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
 async def test_async_embed_content_error_handling(
-    sentry_init, capture_items, mock_genai_client
+    sentry_init,
+    capture_events,
+    capture_items,
+    mock_genai_client,
+    stream_gen_ai_spans,
 ):
     """Test error handling in async embed_content."""
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "event")
 
-    # Mock an error at the HTTP level
-    with mock.patch.object(
-        mock_genai_client._api_client,
-        "async_request",
-        side_effect=Exception("Async Embedding API Error"),
-    ):
-        with start_transaction(name="google_genai_embeddings_async"):
-            with pytest.raises(Exception, match="Async Embedding API Error"):
-                await mock_genai_client.aio.models.embed_content(
-                    model="text-embedding-004",
-                    contents=["This will fail"],
-                )
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "event")
+
+        # Mock an error at the HTTP level
+        with mock.patch.object(
+            mock_genai_client._api_client,
+            "async_request",
+            side_effect=Exception("Async Embedding API Error"),
+        ), start_transaction(name="google_genai_embeddings_async"), pytest.raises(
+            Exception, match="Async Embedding API Error"
+        ):
+            await mock_genai_client.aio.models.embed_content(
+                model="text-embedding-004",
+                contents=["This will fail"],
+            )
+
+        (error_event,) = (item.payload for item in items if item.type == "event")
+    else:
+        events = capture_events()
+
+        # Mock an error at the HTTP level
+        with mock.patch.object(
+            mock_genai_client._api_client,
+            "async_request",
+            side_effect=Exception("Async Embedding API Error"),
+        ), start_transaction(name="google_genai_embeddings_async"), pytest.raises(
+            Exception, match="Async Embedding API Error"
+        ):
+            await mock_genai_client.aio.models.embed_content(
+                model="text-embedding-004",
+                contents=["This will fail"],
+            )
 
-    (error_event,) = (item.payload for item in items if item.type == "event")
+        # Should have both transaction and error events
+        assert len(events) == 2
+        error_event, _ = events
 
     assert error_event["level"] == "error"
     assert error_event["exception"]["values"][0]["type"] == "Exception"
@@ -1332,16 +2113,21 @@ async def test_async_embed_content_error_handling(
     assert error_event["exception"]["values"][0]["mechanism"]["type"] == "google_genai"
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
 async def test_async_embed_content_without_statistics(
-    sentry_init, capture_items, mock_genai_client
+    sentry_init,
+    capture_events,
+    capture_items,
+    mock_genai_client,
+    stream_gen_ai_spans,
 ):
     """Test async embed_content response without statistics (older package versions)."""
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
 
     # Response without statistics (typical for older google-genai versions)
     # Embeddings exist but don't have the statistics field
@@ -1357,64 +2143,118 @@ async def test_async_embed_content_without_statistics(
     }
     mock_http_response = create_mock_http_response(old_version_response)
 
-    with mock.patch.object(
-        mock_genai_client._api_client, "async_request", return_value=mock_http_response
-    ):
-        with start_transaction(name="google_genai_embeddings_async"):
+    if stream_gen_ai_spans:
+        items = capture_items("span")
+
+        with mock.patch.object(
+            mock_genai_client._api_client,
+            "async_request",
+            return_value=mock_http_response,
+        ), start_transaction(name="google_genai_embeddings_async"):
             await mock_genai_client.aio.models.embed_content(
                 model="text-embedding-004",
                 contents=["Test without statistics", "Another test"],
             )
 
-    spans = [item.payload for item in items if item.type == "span"]
-    (embed_span,) = spans
+        spans = [item.payload for item in items if item.type == "span"]
+        (embed_span,) = spans
+    else:
+        events = capture_events()
+
+        with mock.patch.object(
+            mock_genai_client._api_client,
+            "async_request",
+            return_value=mock_http_response,
+        ), start_transaction(name="google_genai_embeddings_async"):
+            await mock_genai_client.aio.models.embed_content(
+                model="text-embedding-004",
+                contents=["Test without statistics", "Another test"],
+            )
+
+        (event,) = events
+        (embed_span,) = event["spans"]
 
     # No usage tokens since there are no statistics in older versions
     # This is expected and the integration should handle it gracefully
-    assert SPANDATA.GEN_AI_USAGE_INPUT_TOKENS not in embed_span["attributes"]
+    if stream_gen_ai_spans:
+        assert SPANDATA.GEN_AI_USAGE_INPUT_TOKENS not in embed_span["attributes"]
+    else:
+        assert SPANDATA.GEN_AI_USAGE_INPUT_TOKENS not in embed_span["data"]
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
 async def test_async_embed_content_span_origin(
-    sentry_init, capture_items, mock_genai_client
+    sentry_init,
+    capture_events,
+    capture_items,
+    mock_genai_client,
+    stream_gen_ai_spans,
 ):
     """Test that async embed_content spans have correct origin."""
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     mock_http_response = create_mock_http_response(EXAMPLE_EMBED_RESPONSE_JSON)
 
-    with mock.patch.object(
-        mock_genai_client._api_client, "async_request", return_value=mock_http_response
-    ):
-        with start_transaction(name="google_genai_embeddings_async"):
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with mock.patch.object(
+            mock_genai_client._api_client,
+            "async_request",
+            return_value=mock_http_response,
+        ), start_transaction(name="google_genai_embeddings_async"):
+            await mock_genai_client.aio.models.embed_content(
+                model="text-embedding-004",
+                contents=["Test origin"],
+            )
+
+        (event,) = [item.payload for item in items if item.type == "transaction"]
+        assert event["contexts"]["trace"]["origin"] == "manual"
+
+        spans = [item.payload for item in items if item.type == "span"]
+        for span in spans:
+            assert span["attributes"]["sentry.origin"] == "auto.ai.google_genai"
+    else:
+        events = capture_events()
+
+        with mock.patch.object(
+            mock_genai_client._api_client,
+            "async_request",
+            return_value=mock_http_response,
+        ), start_transaction(name="google_genai_embeddings_async"):
             await mock_genai_client.aio.models.embed_content(
                 model="text-embedding-004",
                 contents=["Test origin"],
             )
 
-    (event,) = [item.payload for item in items if item.type == "transaction"]
-    assert event["contexts"]["trace"]["origin"] == "manual"
+        (event,) = events
 
-    spans = [item.payload for item in items if item.type == "span"]
-    for span in spans:
-        assert span["attributes"]["sentry.origin"] == "auto.ai.google_genai"
+        assert event["contexts"]["trace"]["origin"] == "manual"
+        for span in event["spans"]:
+            assert span["origin"] == "auto.ai.google_genai"
 
 
 # Integration tests for generate_content with different input message formats
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 def test_generate_content_with_content_object(
-    sentry_init, capture_items, mock_genai_client
+    sentry_init,
+    capture_events,
+    capture_items,
+    mock_genai_client,
+    stream_gen_ai_spans,
 ):
     """Test generate_content with Content object input."""
     sentry_init(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
 
@@ -1423,17 +2263,36 @@ def test_generate_content_with_content_object(
         role="user", parts=[genai_types.Part(text="Hello from Content object")]
     )
 
-    with mock.patch.object(
-        mock_genai_client._api_client, "request", return_value=mock_http_response
-    ):
-        with start_transaction(name="google_genai"):
+    if stream_gen_ai_spans:
+        items = capture_items("span")
+
+        with mock.patch.object(
+            mock_genai_client._api_client, "request", return_value=mock_http_response
+        ), start_transaction(name="google_genai"):
+            mock_genai_client.models.generate_content(
+                model="gemini-1.5-flash", contents=content, config=create_test_config()
+            )
+
+        invoke_span = next(item.payload for item in items if item.type == "span")
+
+        messages = json.loads(
+            invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        )
+    else:
+        events = capture_events()
+
+        with mock.patch.object(
+            mock_genai_client._api_client, "request", return_value=mock_http_response
+        ), start_transaction(name="google_genai"):
             mock_genai_client.models.generate_content(
                 model="gemini-1.5-flash", contents=content, config=create_test_config()
             )
 
-    invoke_span = next(item.payload for item in items if item.type == "span")
+        (event,) = events
+        invoke_span = event["spans"][0]
+
+        messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
 
-    messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     assert len(messages) == 1
     assert messages[0]["role"] == "user"
     assert messages[0]["content"] == [
@@ -1441,33 +2300,57 @@ def test_generate_content_with_content_object(
     ]
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 def test_generate_content_with_dict_format(
-    sentry_init, capture_items, mock_genai_client
+    sentry_init,
+    capture_events,
+    capture_items,
+    mock_genai_client,
+    stream_gen_ai_spans,
 ):
     """Test generate_content with dict format input (ContentDict)."""
     sentry_init(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
 
     # Dict format content
     contents = {"role": "user", "parts": [{"text": "Hello from dict format"}]}
 
-    with mock.patch.object(
-        mock_genai_client._api_client, "request", return_value=mock_http_response
-    ):
-        with start_transaction(name="google_genai"):
+    if stream_gen_ai_spans:
+        items = capture_items("span")
+
+        with mock.patch.object(
+            mock_genai_client._api_client, "request", return_value=mock_http_response
+        ), start_transaction(name="google_genai"):
+            mock_genai_client.models.generate_content(
+                model="gemini-1.5-flash", contents=contents, config=create_test_config()
+            )
+
+        invoke_span = next(item.payload for item in items if item.type == "span")
+
+        messages = json.loads(
+            invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        )
+    else:
+        events = capture_events()
+
+        with mock.patch.object(
+            mock_genai_client._api_client, "request", return_value=mock_http_response
+        ), start_transaction(name="google_genai"):
             mock_genai_client.models.generate_content(
                 model="gemini-1.5-flash", contents=contents, config=create_test_config()
             )
 
-    invoke_span = next(item.payload for item in items if item.type == "span")
+        (event,) = events
+        invoke_span = event["spans"][0]
+
+        messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
 
-    messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     assert len(messages) == 1
     assert messages[0]["role"] == "user"
     assert messages[0]["content"] == [
@@ -1475,14 +2358,21 @@ def test_generate_content_with_dict_format(
     ]
 
 
-def test_generate_content_with_file_data(sentry_init, capture_items, mock_genai_client):
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_generate_content_with_file_data(
+    sentry_init,
+    capture_events,
+    capture_items,
+    mock_genai_client,
+    stream_gen_ai_spans,
+):
     """Test generate_content with file_data (external file reference)."""
     sentry_init(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
 
@@ -1498,17 +2388,36 @@ def test_generate_content_with_file_data(sentry_init, capture_items, mock_genai_
         ],
     )
 
-    with mock.patch.object(
-        mock_genai_client._api_client, "request", return_value=mock_http_response
-    ):
-        with start_transaction(name="google_genai"):
+    if stream_gen_ai_spans:
+        items = capture_items("span")
+
+        with mock.patch.object(
+            mock_genai_client._api_client, "request", return_value=mock_http_response
+        ), start_transaction(name="google_genai"):
+            mock_genai_client.models.generate_content(
+                model="gemini-1.5-flash", contents=content, config=create_test_config()
+            )
+
+        invoke_span = next(item.payload for item in items if item.type == "span")
+
+        messages = json.loads(
+            invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        )
+    else:
+        events = capture_events()
+
+        with mock.patch.object(
+            mock_genai_client._api_client, "request", return_value=mock_http_response
+        ), start_transaction(name="google_genai"):
             mock_genai_client.models.generate_content(
                 model="gemini-1.5-flash", contents=content, config=create_test_config()
             )
 
-    invoke_span = next(item.payload for item in items if item.type == "span")
+        (event,) = events
+        invoke_span = event["spans"][0]
+
+        messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
 
-    messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     assert len(messages) == 1
     assert messages[0]["role"] == "user"
     assert len(messages[0]["content"]) == 2
@@ -1522,16 +2431,21 @@ def test_generate_content_with_file_data(sentry_init, capture_items, mock_genai_
     assert messages[0]["content"][1]["uri"] == "gs://bucket/image.jpg"
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 def test_generate_content_with_inline_data(
-    sentry_init, capture_items, mock_genai_client
+    sentry_init,
+    capture_events,
+    capture_items,
+    mock_genai_client,
+    stream_gen_ai_spans,
 ):
     """Test generate_content with inline_data (binary data)."""
     sentry_init(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
 
@@ -1546,17 +2460,36 @@ def test_generate_content_with_inline_data(
         ],
     )
 
-    with mock.patch.object(
-        mock_genai_client._api_client, "request", return_value=mock_http_response
-    ):
-        with start_transaction(name="google_genai"):
+    if stream_gen_ai_spans:
+        items = capture_items("span")
+
+        with mock.patch.object(
+            mock_genai_client._api_client, "request", return_value=mock_http_response
+        ), start_transaction(name="google_genai"):
             mock_genai_client.models.generate_content(
                 model="gemini-1.5-flash", contents=content, config=create_test_config()
             )
 
-    invoke_span = next(item.payload for item in items if item.type == "span")
+        invoke_span = next(item.payload for item in items if item.type == "span")
+
+        messages = json.loads(
+            invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        )
+    else:
+        events = capture_events()
+
+        with mock.patch.object(
+            mock_genai_client._api_client, "request", return_value=mock_http_response
+        ), start_transaction(name="google_genai"):
+            mock_genai_client.models.generate_content(
+                model="gemini-1.5-flash", contents=content, config=create_test_config()
+            )
+
+        (event,) = events
+        invoke_span = event["spans"][0]
+
+        messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
 
-    messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     assert len(messages) == 1
     assert messages[0]["role"] == "user"
     assert len(messages[0]["content"]) == 2
@@ -1567,16 +2500,21 @@ def test_generate_content_with_inline_data(
     assert messages[0]["content"][1]["content"] == BLOB_DATA_SUBSTITUTE
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 def test_generate_content_with_function_response(
-    sentry_init, capture_items, mock_genai_client
+    sentry_init,
+    capture_events,
+    capture_items,
+    mock_genai_client,
+    stream_gen_ai_spans,
 ):
     """Test generate_content with function_response (tool result)."""
     sentry_init(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
 
@@ -1602,17 +2540,36 @@ def test_generate_content_with_function_response(
         ),
     ]
 
-    with mock.patch.object(
-        mock_genai_client._api_client, "request", return_value=mock_http_response
-    ):
-        with start_transaction(name="google_genai"):
+    if stream_gen_ai_spans:
+        items = capture_items("span")
+
+        with mock.patch.object(
+            mock_genai_client._api_client, "request", return_value=mock_http_response
+        ), start_transaction(name="google_genai"):
             mock_genai_client.models.generate_content(
                 model="gemini-1.5-flash", contents=contents, config=create_test_config()
             )
 
-    invoke_span = next(item.payload for item in items if item.type == "span")
+        invoke_span = next(item.payload for item in items if item.type == "span")
+
+        messages = json.loads(
+            invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        )
+    else:
+        events = capture_events()
+
+        with mock.patch.object(
+            mock_genai_client._api_client, "request", return_value=mock_http_response
+        ), start_transaction(name="google_genai"):
+            mock_genai_client.models.generate_content(
+                model="gemini-1.5-flash", contents=contents, config=create_test_config()
+            )
+
+        (event,) = events
+        invoke_span = event["spans"][0]
+
+        messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
 
-    messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     assert len(messages) == 1
     # First message is user message
     assert messages[0]["role"] == "tool"
@@ -1621,16 +2578,21 @@ def test_generate_content_with_function_response(
     assert messages[0]["content"]["output"] == "Sunny, 72F"
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 def test_generate_content_with_mixed_string_and_content(
-    sentry_init, capture_items, mock_genai_client
+    sentry_init,
+    capture_events,
+    capture_items,
+    mock_genai_client,
+    stream_gen_ai_spans,
 ):
     """Test generate_content with mixed string and Content objects in list."""
     sentry_init(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
 
@@ -1647,57 +2609,105 @@ def test_generate_content_with_mixed_string_and_content(
         ),
     ]
 
-    with mock.patch.object(
-        mock_genai_client._api_client, "request", return_value=mock_http_response
-    ):
-        with start_transaction(name="google_genai"):
+    if stream_gen_ai_spans:
+        items = capture_items("span")
+
+        with mock.patch.object(
+            mock_genai_client._api_client, "request", return_value=mock_http_response
+        ), start_transaction(name="google_genai"):
+            mock_genai_client.models.generate_content(
+                model="gemini-1.5-flash", contents=contents, config=create_test_config()
+            )
+
+        invoke_span = next(item.payload for item in items if item.type == "span")
+
+        messages = json.loads(
+            invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        )
+    else:
+        events = capture_events()
+
+        with mock.patch.object(
+            mock_genai_client._api_client, "request", return_value=mock_http_response
+        ), start_transaction(name="google_genai"):
             mock_genai_client.models.generate_content(
                 model="gemini-1.5-flash", contents=contents, config=create_test_config()
             )
 
-    invoke_span = next(item.payload for item in items if item.type == "span")
+        (event,) = events
+        invoke_span = event["spans"][0]
+
+        messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
 
-    messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     assert len(messages) == 1
     # User message
     assert messages[0]["role"] == "user"
     assert messages[0]["content"] == [{"text": "Tell me a joke", "type": "text"}]
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 def test_generate_content_with_part_object_directly(
-    sentry_init, capture_items, mock_genai_client
+    sentry_init,
+    capture_events,
+    capture_items,
+    mock_genai_client,
+    stream_gen_ai_spans,
 ):
     """Test generate_content with Part object directly (not wrapped in Content)."""
     sentry_init(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
 
     # Part object directly
     part = genai_types.Part(text="Direct Part object")
 
-    with mock.patch.object(
-        mock_genai_client._api_client, "request", return_value=mock_http_response
-    ):
-        with start_transaction(name="google_genai"):
+    if stream_gen_ai_spans:
+        items = capture_items("span")
+
+        with mock.patch.object(
+            mock_genai_client._api_client, "request", return_value=mock_http_response
+        ), start_transaction(name="google_genai"):
+            mock_genai_client.models.generate_content(
+                model="gemini-1.5-flash", contents=part, config=create_test_config()
+            )
+
+        invoke_span = next(item.payload for item in items if item.type == "span")
+
+        messages = json.loads(
+            invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        )
+    else:
+        events = capture_events()
+
+        with mock.patch.object(
+            mock_genai_client._api_client, "request", return_value=mock_http_response
+        ), start_transaction(name="google_genai"):
             mock_genai_client.models.generate_content(
                 model="gemini-1.5-flash", contents=part, config=create_test_config()
             )
 
-    invoke_span = next(item.payload for item in items if item.type == "span")
+        (event,) = events
+        invoke_span = event["spans"][0]
+
+        messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
 
-    messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     assert len(messages) == 1
     assert messages[0]["role"] == "user"
     assert messages[0]["content"] == [{"text": "Direct Part object", "type": "text"}]
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 def test_generate_content_with_list_of_dicts(
-    sentry_init, capture_items, mock_genai_client
+    sentry_init,
+    capture_events,
+    capture_items,
+    mock_genai_client,
+    stream_gen_ai_spans,
 ):
     """
     Test generate_content with list of dict format inputs.
@@ -1710,8 +2720,8 @@ def test_generate_content_with_list_of_dicts(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
 
@@ -1722,32 +2732,56 @@ def test_generate_content_with_list_of_dicts(
         {"role": "user", "parts": [{"text": "Second user message"}]},
     ]
 
-    with mock.patch.object(
-        mock_genai_client._api_client, "request", return_value=mock_http_response
-    ):
-        with start_transaction(name="google_genai"):
+    if stream_gen_ai_spans:
+        items = capture_items("span")
+
+        with mock.patch.object(
+            mock_genai_client._api_client, "request", return_value=mock_http_response
+        ), start_transaction(name="google_genai"):
+            mock_genai_client.models.generate_content(
+                model="gemini-1.5-flash", contents=contents, config=create_test_config()
+            )
+
+        invoke_span = next(item.payload for item in items if item.type == "span")
+
+        messages = json.loads(
+            invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        )
+    else:
+        events = capture_events()
+
+        with mock.patch.object(
+            mock_genai_client._api_client, "request", return_value=mock_http_response
+        ), start_transaction(name="google_genai"):
             mock_genai_client.models.generate_content(
                 model="gemini-1.5-flash", contents=contents, config=create_test_config()
             )
 
-    invoke_span = next(item.payload for item in items if item.type == "span")
+        (event,) = events
+        invoke_span = event["spans"][0]
+
+        messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
 
-    messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     assert len(messages) == 1
     assert messages[0]["role"] == "user"
     assert messages[0]["content"] == [{"text": "Second user message", "type": "text"}]
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 def test_generate_content_with_dict_inline_data(
-    sentry_init, capture_items, mock_genai_client
+    sentry_init,
+    capture_events,
+    capture_items,
+    mock_genai_client,
+    stream_gen_ai_spans,
 ):
     """Test generate_content with dict format containing inline_data."""
     sentry_init(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
 
@@ -1760,17 +2794,36 @@ def test_generate_content_with_dict_inline_data(
         ],
     }
 
-    with mock.patch.object(
-        mock_genai_client._api_client, "request", return_value=mock_http_response
-    ):
-        with start_transaction(name="google_genai"):
+    if stream_gen_ai_spans:
+        items = capture_items("span")
+
+        with mock.patch.object(
+            mock_genai_client._api_client, "request", return_value=mock_http_response
+        ), start_transaction(name="google_genai"):
+            mock_genai_client.models.generate_content(
+                model="gemini-1.5-flash", contents=contents, config=create_test_config()
+            )
+
+        invoke_span = next(item.payload for item in items if item.type == "span")
+
+        messages = json.loads(
+            invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        )
+    else:
+        events = capture_events()
+
+        with mock.patch.object(
+            mock_genai_client._api_client, "request", return_value=mock_http_response
+        ), start_transaction(name="google_genai"):
             mock_genai_client.models.generate_content(
                 model="gemini-1.5-flash", contents=contents, config=create_test_config()
             )
 
-    invoke_span = next(item.payload for item in items if item.type == "span")
+        (event,) = events
+        invoke_span = event["spans"][0]
+
+        messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
 
-    messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     assert len(messages) == 1
     assert messages[0]["role"] == "user"
     assert len(messages[0]["content"]) == 2
@@ -1783,15 +2836,20 @@ def test_generate_content_with_dict_inline_data(
     assert messages[0]["content"][1]["content"] == BLOB_DATA_SUBSTITUTE
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 def test_generate_content_without_parts_property_inline_data(
-    sentry_init, capture_items, mock_genai_client
+    sentry_init,
+    capture_events,
+    capture_items,
+    mock_genai_client,
+    stream_gen_ai_spans,
 ):
     sentry_init(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
 
@@ -1800,17 +2858,35 @@ def test_generate_content_without_parts_property_inline_data(
         {"inline_data": {"data": b"fake_binary_data", "mime_type": "image/gif"}},
     ]
 
-    with mock.patch.object(
-        mock_genai_client._api_client, "request", return_value=mock_http_response
-    ):
-        with start_transaction(name="google_genai"):
+    if stream_gen_ai_spans:
+        items = capture_items("span")
+
+        with mock.patch.object(
+            mock_genai_client._api_client, "request", return_value=mock_http_response
+        ), start_transaction(name="google_genai"):
             mock_genai_client.models.generate_content(
                 model="gemini-1.5-flash", contents=contents, config=create_test_config()
             )
 
-    invoke_span = next(item.payload for item in items if item.type == "span")
+        invoke_span = next(item.payload for item in items if item.type == "span")
 
-    messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+        messages = json.loads(
+            invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        )
+    else:
+        events = capture_events()
+
+        with mock.patch.object(
+            mock_genai_client._api_client, "request", return_value=mock_http_response
+        ), start_transaction(name="google_genai"):
+            mock_genai_client.models.generate_content(
+                model="gemini-1.5-flash", contents=contents, config=create_test_config()
+            )
+
+        (event,) = events
+        invoke_span = event["spans"][0]
+
+        messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
 
     assert len(messages) == 1
 
@@ -1826,15 +2902,20 @@ def test_generate_content_without_parts_property_inline_data(
     assert messages[0]["content"][1]["inline_data"]["mime_type"] == "image/gif"
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 def test_generate_content_without_parts_property_inline_data_and_binary_data_within_string(
-    sentry_init, capture_items, mock_genai_client
+    sentry_init,
+    capture_events,
+    capture_items,
+    mock_genai_client,
+    stream_gen_ai_spans,
 ):
     sentry_init(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
 
@@ -1848,17 +2929,36 @@ def test_generate_content_without_parts_property_inline_data_and_binary_data_wit
         },
     ]
 
-    with mock.patch.object(
-        mock_genai_client._api_client, "request", return_value=mock_http_response
-    ):
-        with start_transaction(name="google_genai"):
+    if stream_gen_ai_spans:
+        items = capture_items("span")
+
+        with mock.patch.object(
+            mock_genai_client._api_client, "request", return_value=mock_http_response
+        ), start_transaction(name="google_genai"):
             mock_genai_client.models.generate_content(
                 model="gemini-1.5-flash", contents=contents, config=create_test_config()
             )
 
-    invoke_span = next(item.payload for item in items if item.type == "span")
+        invoke_span = next(item.payload for item in items if item.type == "span")
+
+        messages = json.loads(
+            invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        )
+    else:
+        events = capture_events()
+
+        with mock.patch.object(
+            mock_genai_client._api_client, "request", return_value=mock_http_response
+        ), start_transaction(name="google_genai"):
+            mock_genai_client.models.generate_content(
+                model="gemini-1.5-flash", contents=contents, config=create_test_config()
+            )
+
+        (event,) = events
+        invoke_span = event["spans"][0]
+
+        messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
 
-    messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     assert len(messages) == 1
     assert messages[0]["role"] == "user"
 
diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py
index 3c79ca7262..d691a58c31 100644
--- a/tests/integrations/huggingface_hub/test_huggingface_hub.py
+++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py
@@ -17,7 +17,7 @@
 
 
 if TYPE_CHECKING:
-    from typing import Any
+    pass
 
 
 HF_VERSION = package_version("huggingface-hub")
@@ -466,173 +466,327 @@ def mock_hf_chat_completion_api_streaming_tools(httpx_mock):
             yield rsps
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.httpx_mock(assert_all_requests_were_expected=False)
 @pytest.mark.parametrize("send_default_pii", [True, False])
 @pytest.mark.parametrize("include_prompts", [True, False])
 def test_text_generation(
-    sentry_init: "Any",
-    capture_items: "Any",
-    send_default_pii: "Any",
-    include_prompts: "Any",
-    mock_hf_text_generation_api: "Any",
-) -> None:
+    sentry_init,
+    capture_events,
+    capture_items,
+    send_default_pii,
+    include_prompts,
+    mock_hf_text_generation_api,
+    stream_gen_ai_spans,
+):
     sentry_init(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
         integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)],
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     client = InferenceClient(model="test-model")
 
-    with sentry_sdk.start_transaction(name="test"):
-        client.text_generation(
-            "Hello",
-            stream=False,
-            details=True,
-        )
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
 
-    spans = [item.payload for item in items if item.type == "span"]
-    span = None
-    for sp in spans:
-        if sp["attributes"]["sentry.op"].startswith("gen_ai"):
-            assert span is None, "there is exactly one gen_ai span"
-            span = sp
-        else:
-            # there should be no other spans, just the gen_ai span
-            # and optionally some http.client spans from talking to the hf api
-            assert sp["attributes"]["sentry.op"] == "http.client"
-
-    assert span is not None
-
-    assert span["attributes"]["sentry.op"] == "gen_ai.text_completion"
-    assert span["name"] == "text_completion test-model"
-    assert span["attributes"]["sentry.origin"] == "auto.ai.huggingface_hub"
-
-    expected_data = {
-        "gen_ai.operation.name": "text_completion",
-        "gen_ai.request.model": "test-model",
-        "gen_ai.response.finish_reasons": "length",
-        "gen_ai.response.streaming": False,
-        "gen_ai.usage.total_tokens": 10,
-        "sentry.environment": "production",
-        "sentry.op": "gen_ai.text_completion",
-        "sentry.origin": "auto.ai.huggingface_hub",
-        "sentry.release": mock.ANY,
-        "sentry.sdk.name": "sentry.python",
-        "sentry.sdk.version": mock.ANY,
-        "sentry.segment.id": mock.ANY,
-        "sentry.segment.name": "test",
-        "thread.id": mock.ANY,
-        "thread.name": mock.ANY,
-    }
-
-    if send_default_pii and include_prompts:
-        expected_data["gen_ai.request.messages"] = "Hello"
-        expected_data["gen_ai.response.text"] = "[mocked] Hello! How can i help you?"
-
-    if not send_default_pii or not include_prompts:
-        assert "gen_ai.request.messages" not in expected_data
-        assert "gen_ai.response.text" not in expected_data
-
-    assert span["attributes"] == expected_data
-
-    # text generation does not set the response model
-    assert "gen_ai.response.model" not in span["attributes"]
+        with sentry_sdk.start_transaction(name="test"):
+            client.text_generation(
+                "Hello",
+                stream=False,
+                details=True,
+            )
+
+        spans = [item.payload for item in items if item.type == "span"]
+        span = None
+        for sp in spans:
+            if sp["attributes"]["sentry.op"].startswith("gen_ai"):
+                assert span is None, "there is exactly one gen_ai span"
+                span = sp
+            else:
+                # there should be no other spans, just the gen_ai span
+                # and optionally some http.client spans from talking to the hf api
+                assert sp["attributes"]["sentry.op"] == "http.client"
+
+        assert span is not None
+
+        assert span["attributes"]["sentry.op"] == "gen_ai.text_completion"
+        assert span["name"] == "text_completion test-model"
+        assert span["attributes"]["sentry.origin"] == "auto.ai.huggingface_hub"
+
+        expected_data = {
+            "gen_ai.operation.name": "text_completion",
+            "gen_ai.request.model": "test-model",
+            "gen_ai.response.finish_reasons": "length",
+            "gen_ai.response.streaming": False,
+            "gen_ai.usage.total_tokens": 10,
+            "sentry.environment": "production",
+            "sentry.op": "gen_ai.text_completion",
+            "sentry.origin": "auto.ai.huggingface_hub",
+            "sentry.release": mock.ANY,
+            "sentry.sdk.name": "sentry.python",
+            "sentry.sdk.version": mock.ANY,
+            "sentry.segment.id": mock.ANY,
+            "sentry.segment.name": "test",
+            "thread.id": mock.ANY,
+            "thread.name": mock.ANY,
+        }
+
+        if send_default_pii and include_prompts:
+            expected_data["gen_ai.request.messages"] = "Hello"
+            expected_data["gen_ai.response.text"] = (
+                "[mocked] Hello! How can i help you?"
+            )
+
+        if not send_default_pii or not include_prompts:
+            assert "gen_ai.request.messages" not in expected_data
+            assert "gen_ai.response.text" not in expected_data
+
+        assert span["attributes"] == expected_data
+
+        # text generation does not set the response model
+        assert "gen_ai.response.model" not in span["attributes"]
+    else:
+        events = capture_events()
+
+        with sentry_sdk.start_transaction(name="test"):
+            client.text_generation(
+                "Hello",
+                stream=False,
+                details=True,
+            )
+
+        (transaction,) = events
+
+        span = None
+        for sp in transaction["spans"]:
+            if sp["op"].startswith("gen_ai"):
+                assert span is None, "there is exactly one gen_ai span"
+                span = sp
+            else:
+                # there should be no other spans, just the gen_ai span
+                # and optionally some http.client spans from talking to the hf api
+                assert sp["op"] == "http.client"
+
+        assert span is not None
+
+        assert span["op"] == "gen_ai.text_completion"
+        assert span["description"] == "text_completion test-model"
+        assert span["origin"] == "auto.ai.huggingface_hub"
+
+        expected_data = {
+            "gen_ai.operation.name": "text_completion",
+            "gen_ai.request.model": "test-model",
+            "gen_ai.response.finish_reasons": "length",
+            "gen_ai.response.streaming": False,
+            "gen_ai.usage.total_tokens": 10,
+            "thread.id": mock.ANY,
+            "thread.name": mock.ANY,
+        }
+
+        if send_default_pii and include_prompts:
+            expected_data["gen_ai.request.messages"] = "Hello"
+            expected_data["gen_ai.response.text"] = (
+                "[mocked] Hello! How can i help you?"
+            )
 
+        if not send_default_pii or not include_prompts:
+            assert "gen_ai.request.messages" not in expected_data
+            assert "gen_ai.response.text" not in expected_data
 
+        assert span["data"] == expected_data
+
+        # text generation does not set the response model
+        assert "gen_ai.response.model" not in span["data"]
+
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.httpx_mock(assert_all_requests_were_expected=False)
 @pytest.mark.parametrize("send_default_pii", [True, False])
 @pytest.mark.parametrize("include_prompts", [True, False])
 def test_text_generation_streaming(
-    sentry_init: "Any",
-    capture_items: "Any",
-    send_default_pii: "Any",
-    include_prompts: "Any",
-    mock_hf_text_generation_api_streaming: "Any",
-) -> None:
+    sentry_init,
+    capture_events,
+    capture_items,
+    send_default_pii,
+    include_prompts,
+    mock_hf_text_generation_api_streaming,
+    stream_gen_ai_spans,
+):
     sentry_init(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
         integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)],
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     client = InferenceClient(model="test-model")
 
-    with sentry_sdk.start_transaction(name="test"):
-        for _ in client.text_generation(
-            prompt="Hello",
-            stream=True,
-            details=True,
-        ):
-            pass
-
-    spans = [item.payload for item in items if item.type == "span"]
-    span = None
-    for sp in spans:
-        if sp["attributes"]["sentry.op"].startswith("gen_ai"):
-            assert span is None, "there is exactly one gen_ai span"
-            span = sp
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with sentry_sdk.start_transaction(name="test"):
+            for _ in client.text_generation(
+                prompt="Hello",
+                stream=True,
+                details=True,
+            ):
+                pass
+
+        spans = [item.payload for item in items if item.type == "span"]
+        span = None
+        for sp in spans:
+            if sp["attributes"]["sentry.op"].startswith("gen_ai"):
+                assert span is None, "there is exactly one gen_ai span"
+                span = sp
+            else:
+                # there should be no other spans, just the gen_ai span
+                # and optionally some http.client spans from talking to the hf api
+                assert sp["attributes"]["sentry.op"] == "http.client"
+
+        assert span is not None
+
+        assert span["attributes"]["sentry.op"] == "gen_ai.text_completion"
+        assert span["name"] == "text_completion test-model"
+        assert span["attributes"]["sentry.origin"] == "auto.ai.huggingface_hub"
+
+        if stream_gen_ai_spans:
+            expected_data = {
+                "gen_ai.operation.name": "text_completion",
+                "gen_ai.request.model": "test-model",
+                "gen_ai.response.finish_reasons": "length",
+                "gen_ai.response.streaming": True,
+                "gen_ai.usage.total_tokens": 10,
+                "sentry.environment": "production",
+                "sentry.op": "gen_ai.text_completion",
+                "sentry.origin": "auto.ai.huggingface_hub",
+                "sentry.release": mock.ANY,
+                "sentry.sdk.name": "sentry.python",
+                "sentry.sdk.version": mock.ANY,
+                "sentry.segment.id": mock.ANY,
+                "sentry.segment.name": "test",
+                "thread.id": mock.ANY,
+                "thread.name": mock.ANY,
+            }
         else:
-            # there should be no other spans, just the gen_ai span
-            # and optionally some http.client spans from talking to the hf api
-            assert sp["attributes"]["sentry.op"] == "http.client"
-
-    assert span is not None
-
-    assert span["attributes"]["sentry.op"] == "gen_ai.text_completion"
-    assert span["name"] == "text_completion test-model"
-    assert span["attributes"]["sentry.origin"] == "auto.ai.huggingface_hub"
-
-    expected_data = {
-        "gen_ai.operation.name": "text_completion",
-        "gen_ai.request.model": "test-model",
-        "gen_ai.response.finish_reasons": "length",
-        "gen_ai.response.streaming": True,
-        "gen_ai.usage.total_tokens": 10,
-        "sentry.environment": "production",
-        "sentry.op": "gen_ai.text_completion",
-        "sentry.origin": "auto.ai.huggingface_hub",
-        "sentry.release": mock.ANY,
-        "sentry.sdk.name": "sentry.python",
-        "sentry.sdk.version": mock.ANY,
-        "sentry.segment.id": mock.ANY,
-        "sentry.segment.name": "test",
-        "thread.id": mock.ANY,
-        "thread.name": mock.ANY,
-    }
-
-    if send_default_pii and include_prompts:
-        expected_data["gen_ai.request.messages"] = "Hello"
-        expected_data["gen_ai.response.text"] = "the mocked model response"
-
-    if not send_default_pii or not include_prompts:
-        assert "gen_ai.request.messages" not in expected_data
-        assert "gen_ai.response.text" not in expected_data
-
-    assert span["attributes"] == expected_data
-
-    # text generation does not set the response model
-    assert "gen_ai.response.model" not in span["attributes"]
+            expected_data = {
+                "gen_ai.operation.name": "text_completion",
+                "gen_ai.request.model": "test-model",
+                "gen_ai.response.finish_reasons": "length",
+                "gen_ai.response.streaming": True,
+                "gen_ai.usage.total_tokens": 10,
+                "thread.id": mock.ANY,
+                "thread.name": mock.ANY,
+            }
+
+        if send_default_pii and include_prompts:
+            expected_data["gen_ai.request.messages"] = "Hello"
+            expected_data["gen_ai.response.text"] = "the mocked model response"
+
+        if not send_default_pii or not include_prompts:
+            assert "gen_ai.request.messages" not in expected_data
+            assert "gen_ai.response.text" not in expected_data
+
+        assert span["attributes"] == expected_data
+
+        # text generation does not set the response model
+        assert "gen_ai.response.model" not in span["attributes"]
+    else:
+        events = capture_events()
 
+        with sentry_sdk.start_transaction(name="test"):
+            for _ in client.text_generation(
+                prompt="Hello",
+                stream=True,
+                details=True,
+            ):
+                pass
+
+        (transaction,) = events
+
+        span = None
+        for sp in transaction["spans"]:
+            if sp["op"].startswith("gen_ai"):
+                assert span is None, "there is exactly one gen_ai span"
+                span = sp
+            else:
+                # there should be no other spans, just the gen_ai span
+                # and optionally some http.client spans from talking to the hf api
+                assert sp["op"] == "http.client"
+
+        assert span is not None
+
+        assert span["op"] == "gen_ai.text_completion"
+        assert span["description"] == "text_completion test-model"
+        assert span["origin"] == "auto.ai.huggingface_hub"
+
+        if stream_gen_ai_spans:
+            expected_data = {
+                "gen_ai.operation.name": "text_completion",
+                "gen_ai.request.model": "test-model",
+                "gen_ai.response.finish_reasons": "length",
+                "gen_ai.response.streaming": True,
+                "gen_ai.usage.total_tokens": 10,
+                "sentry.environment": "production",
+                "sentry.op": "gen_ai.text_completion",
+                "sentry.origin": "auto.ai.huggingface_hub",
+                "sentry.release": mock.ANY,
+                "sentry.sdk.name": "sentry.python",
+                "sentry.sdk.version": mock.ANY,
+                "sentry.segment.id": mock.ANY,
+                "sentry.segment.name": "test",
+                "thread.id": mock.ANY,
+                "thread.name": mock.ANY,
+            }
+        else:
+            expected_data = {
+                "gen_ai.operation.name": "text_completion",
+                "gen_ai.request.model": "test-model",
+                "gen_ai.response.finish_reasons": "length",
+                "gen_ai.response.streaming": True,
+                "gen_ai.usage.total_tokens": 10,
+                "thread.id": mock.ANY,
+                "thread.name": mock.ANY,
+            }
 
+        if send_default_pii and include_prompts:
+            expected_data["gen_ai.request.messages"] = "Hello"
+            expected_data["gen_ai.response.text"] = "the mocked model response"
+
+        if not send_default_pii or not include_prompts:
+            assert "gen_ai.request.messages" not in expected_data
+            assert "gen_ai.response.text" not in expected_data
+
+        assert span["data"] == expected_data
+
+        # text generation does not set the response model
+        assert "gen_ai.response.model" not in span["data"]
+
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.httpx_mock(assert_all_requests_were_expected=False)
 @pytest.mark.parametrize("send_default_pii", [True, False])
 @pytest.mark.parametrize("include_prompts", [True, False])
 def test_chat_completion(
-    sentry_init: "Any",
-    capture_items: "Any",
-    send_default_pii: "Any",
-    include_prompts: "Any",
-    mock_hf_chat_completion_api: "Any",
-) -> None:
+    sentry_init,
+    capture_events,
+    capture_items,
+    send_default_pii,
+    include_prompts,
+    mock_hf_chat_completion_api,
+    stream_gen_ai_spans,
+):
     sentry_init(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
         integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)],
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+    else:
+        events = capture_events()
 
     client = get_hf_provider_inference_client()
 
@@ -642,247 +796,451 @@ def test_chat_completion(
             stream=False,
         )
 
-    spans = [item.payload for item in items if item.type == "span"]
-    span = None
-    for sp in spans:
-        if sp["attributes"]["sentry.op"].startswith("gen_ai"):
-            assert span is None, "there is exactly one gen_ai span"
-            span = sp
+    if stream_gen_ai_spans:
+        spans = [item.payload for item in items if item.type == "span"]
+        span = None
+        for sp in spans:
+            if sp["attributes"]["sentry.op"].startswith("gen_ai"):
+                assert span is None, "there is exactly one gen_ai span"
+                span = sp
+            else:
+                # there should be no other spans, just the gen_ai span
+                # and optionally some http.client spans from talking to the hf api
+                assert sp["attributes"]["sentry.op"] == "http.client"
+
+        assert span is not None
+
+        assert span["attributes"]["sentry.op"] == "gen_ai.chat"
+        assert span["name"] == "chat test-model"
+        assert span["attributes"]["sentry.origin"] == "auto.ai.huggingface_hub"
+
+        expected_data = {
+            "gen_ai.operation.name": "chat",
+            "gen_ai.request.model": "test-model",
+            "gen_ai.response.finish_reasons": "stop",
+            "gen_ai.response.model": "test-model-123",
+            "gen_ai.response.streaming": False,
+            "gen_ai.usage.input_tokens": 10,
+            "gen_ai.usage.output_tokens": 8,
+            "gen_ai.usage.total_tokens": 18,
+            "sentry.environment": "production",
+            "sentry.op": "gen_ai.chat",
+            "sentry.origin": "auto.ai.huggingface_hub",
+            "sentry.release": mock.ANY,
+            "sentry.sdk.name": "sentry.python",
+            "sentry.sdk.version": mock.ANY,
+            "sentry.segment.id": mock.ANY,
+            "sentry.segment.name": "test",
+            "thread.id": mock.ANY,
+            "thread.name": mock.ANY,
+        }
+
+        if send_default_pii and include_prompts:
+            expected_data["gen_ai.request.messages"] = (
+                '[{"role": "user", "content": "Hello!"}]'
+            )
+            expected_data["gen_ai.response.text"] = (
+                "[mocked] Hello! How can I help you today?"
+            )
+
+        if not send_default_pii or not include_prompts:
+            assert "gen_ai.request.messages" not in expected_data
+            assert "gen_ai.response.text" not in expected_data
+
+        assert span["attributes"] == expected_data
+    else:
+        (transaction,) = events
+
+        span = None
+        for sp in transaction["spans"]:
+            if sp["op"].startswith("gen_ai"):
+                assert span is None, "there is exactly one gen_ai span"
+                span = sp
+            else:
+                # there should be no other spans, just the gen_ai span
+                # and optionally some http.client spans from talking to the hf api
+                assert sp["op"] == "http.client"
+
+        assert span is not None
+
+        if stream_gen_ai_spans:
+            assert span["attributes"]["sentry.op"] == "gen_ai.chat"
+            assert span["name"] == "chat test-model"
+            assert span["attributes"]["sentry.origin"] == "auto.ai.huggingface_hub"
         else:
-            # there should be no other spans, just the gen_ai span
-            # and optionally some http.client spans from talking to the hf api
-            assert sp["attributes"]["sentry.op"] == "http.client"
-
-    assert span is not None
-
-    assert span["attributes"]["sentry.op"] == "gen_ai.chat"
-    assert span["name"] == "chat test-model"
-    assert span["attributes"]["sentry.origin"] == "auto.ai.huggingface_hub"
-
-    expected_data = {
-        "gen_ai.operation.name": "chat",
-        "gen_ai.request.model": "test-model",
-        "gen_ai.response.finish_reasons": "stop",
-        "gen_ai.response.model": "test-model-123",
-        "gen_ai.response.streaming": False,
-        "gen_ai.usage.input_tokens": 10,
-        "gen_ai.usage.output_tokens": 8,
-        "gen_ai.usage.total_tokens": 18,
-        "sentry.environment": "production",
-        "sentry.op": "gen_ai.chat",
-        "sentry.origin": "auto.ai.huggingface_hub",
-        "sentry.release": mock.ANY,
-        "sentry.sdk.name": "sentry.python",
-        "sentry.sdk.version": mock.ANY,
-        "sentry.segment.id": mock.ANY,
-        "sentry.segment.name": "test",
-        "thread.id": mock.ANY,
-        "thread.name": mock.ANY,
-    }
-
-    if send_default_pii and include_prompts:
-        expected_data["gen_ai.request.messages"] = (
-            '[{"role": "user", "content": "Hello!"}]'
-        )
-        expected_data["gen_ai.response.text"] = (
-            "[mocked] Hello! How can I help you today?"
-        )
+            assert span["op"] == "gen_ai.chat"
+            assert span["description"] == "chat test-model"
+            assert span["origin"] == "auto.ai.huggingface_hub"
+
+        expected_data = {
+            "gen_ai.operation.name": "chat",
+            "gen_ai.request.model": "test-model",
+            "gen_ai.response.finish_reasons": "stop",
+            "gen_ai.response.model": "test-model-123",
+            "gen_ai.response.streaming": False,
+            "gen_ai.usage.input_tokens": 10,
+            "gen_ai.usage.output_tokens": 8,
+            "gen_ai.usage.total_tokens": 18,
+            "thread.id": mock.ANY,
+            "thread.name": mock.ANY,
+        }
 
-    if not send_default_pii or not include_prompts:
-        assert "gen_ai.request.messages" not in expected_data
-        assert "gen_ai.response.text" not in expected_data
+        if send_default_pii and include_prompts:
+            expected_data["gen_ai.request.messages"] = (
+                '[{"role": "user", "content": "Hello!"}]'
+            )
+            expected_data["gen_ai.response.text"] = (
+                "[mocked] Hello! How can I help you today?"
+            )
 
-    assert span["attributes"] == expected_data
+        if not send_default_pii or not include_prompts:
+            assert "gen_ai.request.messages" not in expected_data
+            assert "gen_ai.response.text" not in expected_data
 
+        assert span["data"] == expected_data
 
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.httpx_mock(assert_all_requests_were_expected=False)
 @pytest.mark.parametrize("send_default_pii", [True, False])
 @pytest.mark.parametrize("include_prompts", [True, False])
 def test_chat_completion_streaming(
-    sentry_init: "Any",
-    capture_items: "Any",
-    send_default_pii: "Any",
-    include_prompts: "Any",
-    mock_hf_chat_completion_api_streaming: "Any",
-) -> None:
+    sentry_init,
+    capture_events,
+    capture_items,
+    send_default_pii,
+    include_prompts,
+    mock_hf_chat_completion_api_streaming,
+    stream_gen_ai_spans,
+):
     sentry_init(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
         integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)],
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     client = get_hf_provider_inference_client()
 
-    with sentry_sdk.start_transaction(name="test"):
-        _ = list(
-            client.chat_completion(
-                [{"role": "user", "content": "Hello!"}],
-                stream=True,
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with sentry_sdk.start_transaction(name="test"):
+            _ = list(
+                client.chat_completion(
+                    [{"role": "user", "content": "Hello!"}],
+                    stream=True,
+                )
             )
-        )
 
-    spans = [item.payload for item in items if item.type == "span"]
-    span = None
-    for sp in spans:
-        if sp["attributes"]["sentry.op"].startswith("gen_ai"):
-            assert span is None, "there is exactly one gen_ai span"
-            span = sp
-        else:
-            # there should be no other spans, just the gen_ai span
-            # and optionally some http.client spans from talking to the hf api
-            assert sp["attributes"]["sentry.op"] == "http.client"
-
-    assert span is not None
-
-    assert span["attributes"]["sentry.op"] == "gen_ai.chat"
-    assert span["name"] == "chat test-model"
-    assert span["attributes"]["sentry.origin"] == "auto.ai.huggingface_hub"
-
-    expected_data = {
-        "gen_ai.operation.name": "chat",
-        "gen_ai.request.model": "test-model",
-        "gen_ai.response.finish_reasons": "stop",
-        "gen_ai.response.model": "test-model-123",
-        "gen_ai.response.streaming": True,
-        "sentry.environment": "production",
-        "sentry.op": "gen_ai.chat",
-        "sentry.origin": "auto.ai.huggingface_hub",
-        "sentry.release": mock.ANY,
-        "sentry.sdk.name": "sentry.python",
-        "sentry.sdk.version": mock.ANY,
-        "sentry.segment.id": mock.ANY,
-        "sentry.segment.name": "test",
-        "thread.id": mock.ANY,
-        "thread.name": mock.ANY,
-    }
-    # usage is not available in older versions of the library
-    if HF_VERSION and HF_VERSION >= (0, 26, 0):
-        expected_data["gen_ai.usage.input_tokens"] = 183
-        expected_data["gen_ai.usage.output_tokens"] = 14
-        expected_data["gen_ai.usage.total_tokens"] = 197
-
-    if send_default_pii and include_prompts:
-        expected_data["gen_ai.request.messages"] = (
-            '[{"role": "user", "content": "Hello!"}]'
-        )
-        expected_data["gen_ai.response.text"] = "the mocked model response"
+        spans = [item.payload for item in items if item.type == "span"]
+        span = None
+        for sp in spans:
+            if sp["attributes"]["sentry.op"].startswith("gen_ai"):
+                assert span is None, "there is exactly one gen_ai span"
+                span = sp
+            else:
+                # there should be no other spans, just the gen_ai span
+                # and optionally some http.client spans from talking to the hf api
+                assert sp["attributes"]["sentry.op"] == "http.client"
+
+        assert span is not None
+
+        assert span["attributes"]["sentry.op"] == "gen_ai.chat"
+        assert span["name"] == "chat test-model"
+        assert span["attributes"]["sentry.origin"] == "auto.ai.huggingface_hub"
+
+        expected_data = {
+            "gen_ai.operation.name": "chat",
+            "gen_ai.request.model": "test-model",
+            "gen_ai.response.finish_reasons": "stop",
+            "gen_ai.response.model": "test-model-123",
+            "gen_ai.response.streaming": True,
+            "sentry.environment": "production",
+            "sentry.op": "gen_ai.chat",
+            "sentry.origin": "auto.ai.huggingface_hub",
+            "sentry.release": mock.ANY,
+            "sentry.sdk.name": "sentry.python",
+            "sentry.sdk.version": mock.ANY,
+            "sentry.segment.id": mock.ANY,
+            "sentry.segment.name": "test",
+            "thread.id": mock.ANY,
+            "thread.name": mock.ANY,
+        }
+
+        # usage is not available in older versions of the library
+        if HF_VERSION and HF_VERSION >= (0, 26, 0):
+            expected_data["gen_ai.usage.input_tokens"] = 183
+            expected_data["gen_ai.usage.output_tokens"] = 14
+            expected_data["gen_ai.usage.total_tokens"] = 197
+
+        if send_default_pii and include_prompts:
+            expected_data["gen_ai.request.messages"] = (
+                '[{"role": "user", "content": "Hello!"}]'
+            )
+            expected_data["gen_ai.response.text"] = "the mocked model response"
 
-    if not send_default_pii or not include_prompts:
-        assert "gen_ai.request.messages" not in expected_data
-        assert "gen_ai.response.text" not in expected_data
+        if not send_default_pii or not include_prompts:
+            assert "gen_ai.request.messages" not in expected_data
+            assert "gen_ai.response.text" not in expected_data
 
-    assert span["attributes"] == expected_data
+        assert span["attributes"] == expected_data
+    else:
+        events = capture_events()
+
+        with sentry_sdk.start_transaction(name="test"):
+            _ = list(
+                client.chat_completion(
+                    [{"role": "user", "content": "Hello!"}],
+                    stream=True,
+                )
+            )
+
+        (transaction,) = events
+
+        span = None
+        for sp in transaction["spans"]:
+            if sp["op"].startswith("gen_ai"):
+                assert span is None, "there is exactly one gen_ai span"
+                span = sp
+            else:
+                # there should be no other spans, just the gen_ai span
+                # and optionally some http.client spans from talking to the hf api
+                assert sp["op"] == "http.client"
+
+        assert span is not None
+
+        assert span["op"] == "gen_ai.chat"
+        assert span["description"] == "chat test-model"
+        assert span["origin"] == "auto.ai.huggingface_hub"
+
+        expected_data = {
+            "gen_ai.operation.name": "chat",
+            "gen_ai.request.model": "test-model",
+            "gen_ai.response.finish_reasons": "stop",
+            "gen_ai.response.model": "test-model-123",
+            "gen_ai.response.streaming": True,
+            "thread.id": mock.ANY,
+            "thread.name": mock.ANY,
+        }
+        # usage is not available in older versions of the library
+        if HF_VERSION and HF_VERSION >= (0, 26, 0):
+            expected_data["gen_ai.usage.input_tokens"] = 183
+            expected_data["gen_ai.usage.output_tokens"] = 14
+            expected_data["gen_ai.usage.total_tokens"] = 197
+
+        if send_default_pii and include_prompts:
+            expected_data["gen_ai.request.messages"] = (
+                '[{"role": "user", "content": "Hello!"}]'
+            )
+            expected_data["gen_ai.response.text"] = "the mocked model response"
 
+        if not send_default_pii or not include_prompts:
+            assert "gen_ai.request.messages" not in expected_data
+            assert "gen_ai.response.text" not in expected_data
 
+        assert span["data"] == expected_data
+
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.httpx_mock(assert_all_requests_were_expected=False)
 def test_chat_completion_api_error(
-    sentry_init: "Any", capture_items: "Any", mock_hf_api_with_errors: "Any"
-) -> None:
-    sentry_init(traces_sample_rate=1.0, _experiments={"gen_ai_as_v2_spans": True})
-    items = capture_items("event", "transaction", "span")
+    sentry_init,
+    capture_events,
+    capture_items,
+    mock_hf_api_with_errors,
+    stream_gen_ai_spans,
+):
+    sentry_init(
+        traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+    )
 
     client = get_hf_provider_inference_client()
+    if stream_gen_ai_spans:
+        items = capture_items("event", "transaction", "span")
 
-    with sentry_sdk.start_transaction(name="test"):
-        with pytest.raises(HfHubHTTPError):
+        with sentry_sdk.start_transaction(name="test"), pytest.raises(HfHubHTTPError):
             client.chat_completion(
                 messages=[{"role": "user", "content": "Hello!"}],
             )
 
-    (error,) = (item.payload for item in items if item.type == "event")
-    assert error["exception"]["values"][0]["mechanism"]["type"] == "huggingface_hub"
-    assert not error["exception"]["values"][0]["mechanism"]["handled"]
+        (error,) = (item.payload for item in items if item.type == "event")
 
-    spans = [item.payload for item in items if item.type == "span"]
-    span = None
-    for sp in spans:
-        if sp["attributes"]["sentry.op"].startswith("gen_ai"):
-            assert span is None, "there is exactly one gen_ai span"
-            span = sp
-        else:
-            # there should be no other spans, just the gen_ai span
-            # and optionally some http.client spans from talking to the hf api
-            assert sp["attributes"]["sentry.op"] == "http.client"
+        assert error["exception"]["values"][0]["mechanism"]["type"] == "huggingface_hub"
+        assert not error["exception"]["values"][0]["mechanism"]["handled"]
 
-    assert span is not None
+        spans = [item.payload for item in items if item.type == "span"]
+        span = None
+        for sp in spans:
+            if sp["attributes"]["sentry.op"].startswith("gen_ai"):
+                assert span is None, "there is exactly one gen_ai span"
+                span = sp
+            else:
+                # there should be no other spans, just the gen_ai span
+                # and optionally some http.client spans from talking to the hf api
+                assert sp["attributes"]["sentry.op"] == "http.client"
 
-    assert span["attributes"]["sentry.op"] == "gen_ai.chat"
-    assert span["name"] == "chat test-model"
-    assert span["attributes"]["sentry.origin"] == "auto.ai.huggingface_hub"
-    assert span["status"] == "error"
+        assert span is not None
 
-    (transaction,) = (item.payload for item in items if item.type == "transaction")
-    assert (
-        error["contexts"]["trace"]["trace_id"]
-        == transaction["contexts"]["trace"]["trace_id"]
-    )
-    expected_data = {
-        "gen_ai.operation.name": "chat",
-        "gen_ai.request.model": "test-model",
-        "sentry.environment": "production",
-        "sentry.op": "gen_ai.chat",
-        "sentry.origin": "auto.ai.huggingface_hub",
-        "sentry.release": mock.ANY,
-        "sentry.sdk.name": "sentry.python",
-        "sentry.sdk.version": mock.ANY,
-        "sentry.segment.id": mock.ANY,
-        "sentry.segment.name": "test",
-        "thread.id": mock.ANY,
-        "thread.name": mock.ANY,
-    }
-    assert span["attributes"] == expected_data
+        assert span["attributes"]["sentry.op"] == "gen_ai.chat"
+        assert span["name"] == "chat test-model"
+        assert span["attributes"]["sentry.origin"] == "auto.ai.huggingface_hub"
+        assert span["status"] == "error"
 
+        (transaction,) = (item.payload for item in items if item.type == "transaction")
 
+        assert (
+            error["contexts"]["trace"]["trace_id"]
+            == transaction["contexts"]["trace"]["trace_id"]
+        )
+        expected_data = {
+            "gen_ai.operation.name": "chat",
+            "gen_ai.request.model": "test-model",
+            "sentry.environment": "production",
+            "sentry.op": "gen_ai.chat",
+            "sentry.origin": "auto.ai.huggingface_hub",
+            "sentry.release": mock.ANY,
+            "sentry.sdk.name": "sentry.python",
+            "sentry.sdk.version": mock.ANY,
+            "sentry.segment.id": mock.ANY,
+            "sentry.segment.name": "test",
+            "thread.id": mock.ANY,
+            "thread.name": mock.ANY,
+        }
+        assert span["attributes"] == expected_data
+    else:
+        events = capture_events()
+
+        with sentry_sdk.start_transaction(name="test"), pytest.raises(HfHubHTTPError):
+            client.chat_completion(
+                messages=[{"role": "user", "content": "Hello!"}],
+            )
+
+        (
+            error,
+            transaction,
+        ) = events
+
+        assert error["exception"]["values"][0]["mechanism"]["type"] == "huggingface_hub"
+        assert not error["exception"]["values"][0]["mechanism"]["handled"]
+
+        span = None
+        for sp in transaction["spans"]:
+            if sp["op"].startswith("gen_ai"):
+                assert span is None, "there is exactly one gen_ai span"
+                span = sp
+            else:
+                # there should be no other spans, just the gen_ai span
+                # and optionally some http.client spans from talking to the hf api
+                assert sp["op"] == "http.client"
+
+        assert span is not None
+
+        assert span["op"] == "gen_ai.chat"
+        assert span["description"] == "chat test-model"
+        assert span["origin"] == "auto.ai.huggingface_hub"
+        assert span["status"] == "internal_error"
+        assert span.get("tags", {}).get("status") == "internal_error"
+
+        assert (
+            error["contexts"]["trace"]["trace_id"]
+            == transaction["contexts"]["trace"]["trace_id"]
+        )
+        expected_data = {
+            "gen_ai.operation.name": "chat",
+            "gen_ai.request.model": "test-model",
+            "thread.id": mock.ANY,
+            "thread.name": mock.ANY,
+        }
+        assert span["data"] == expected_data
+
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.httpx_mock(assert_all_requests_were_expected=False)
 def test_span_status_error(
-    sentry_init: "Any", capture_items: "Any", mock_hf_api_with_errors: "Any"
-) -> None:
-    sentry_init(traces_sample_rate=1.0, _experiments={"gen_ai_as_v2_spans": True})
-    items = capture_items("event", "transaction", "span")
-
+    sentry_init,
+    capture_events,
+    capture_items,
+    mock_hf_api_with_errors,
+    stream_gen_ai_spans,
+):
     client = get_hf_provider_inference_client()
 
-    with sentry_sdk.start_transaction(name="test"):
-        with pytest.raises(HfHubHTTPError):
+    sentry_init(
+        traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+    )
+
+    if stream_gen_ai_spans:
+        items = capture_items("event", "transaction", "span")
+
+        with sentry_sdk.start_transaction(name="test"), pytest.raises(HfHubHTTPError):
             client.chat_completion(
                 messages=[{"role": "user", "content": "Hello!"}],
             )
 
-    (error,) = [item.payload for item in items if item.type == "event"]
-    assert error["level"] == "error"
+        (error,) = [item.payload for item in items if item.type == "event"]
+        assert error["level"] == "error"
+
+        spans = [item.payload for item in items if item.type == "span"]
+        span = None
+        for sp in spans:
+            if sp["attributes"]["sentry.op"].startswith("gen_ai"):
+                assert span is None, "there is exactly one gen_ai span"
+                span = sp
+            else:
+                # there should be no other spans, just the gen_ai span
+                # and optionally some http.client spans from talking to the hf api
+                assert sp["attributes"]["sentry.op"] == "http.client"
+
+        assert span is not None
+        assert span["status"] == "error"
+    else:
+        events = capture_events()
 
-    spans = [item.payload for item in items if item.type == "span"]
-    span = None
-    for sp in spans:
-        if sp["attributes"]["sentry.op"].startswith("gen_ai"):
-            assert span is None, "there is exactly one gen_ai span"
-            span = sp
-        else:
-            # there should be no other spans, just the gen_ai span
-            # and optionally some http.client spans from talking to the hf api
-            assert sp["attributes"]["sentry.op"] == "http.client"
+        with sentry_sdk.start_transaction(name="test"), pytest.raises(HfHubHTTPError):
+            client.chat_completion(
+                messages=[{"role": "user", "content": "Hello!"}],
+            )
+
+        (error, transaction) = events
+        assert error["level"] == "error"
+
+        span = None
+        for sp in transaction["spans"]:
+            if sp["op"].startswith("gen_ai"):
+                assert span is None, "there is exactly one gen_ai span"
+                span = sp
+            else:
+                # there should be no other spans, just the gen_ai span
+                # and optionally some http.client spans from talking to the hf api
+                assert sp["op"] == "http.client"
 
-    assert span is not None
-    assert span["status"] == "error"
+        assert span is not None
+        assert span["status"] == "internal_error"
+        assert span["tags"]["status"] == "internal_error"
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.httpx_mock(assert_all_requests_were_expected=False)
 @pytest.mark.parametrize("send_default_pii", [True, False])
 @pytest.mark.parametrize("include_prompts", [True, False])
 def test_chat_completion_with_tools(
-    sentry_init: "Any",
-    capture_items: "Any",
-    send_default_pii: "Any",
-    include_prompts: "Any",
-    mock_hf_chat_completion_api_tools: "Any",
-) -> None:
+    sentry_init,
+    capture_events,
+    capture_items,
+    send_default_pii,
+    include_prompts,
+    mock_hf_chat_completion_api_tools,
+    stream_gen_ai_spans,
+):
     sentry_init(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
         integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)],
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     client = get_hf_provider_inference_client()
 
@@ -901,83 +1259,148 @@ def test_chat_completion_with_tools(
         }
     ]
 
-    with sentry_sdk.start_transaction(name="test"):
-        client.chat_completion(
-            messages=[{"role": "user", "content": "What is the weather in Paris?"}],
-            tools=tools,
-            tool_choice="auto",
-        )
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
 
-    spans = [item.payload for item in items if item.type == "span"]
-    span = None
-    for sp in spans:
-        if sp["attributes"]["sentry.op"].startswith("gen_ai"):
-            assert span is None, "there is exactly one gen_ai span"
-            span = sp
-        else:
-            # there should be no other spans, just the gen_ai span
-            # and optionally some http.client spans from talking to the hf api
-            assert sp["attributes"]["sentry.op"] == "http.client"
-
-    assert span is not None
-
-    assert span["attributes"]["sentry.op"] == "gen_ai.chat"
-    assert span["name"] == "chat test-model"
-    assert span["attributes"]["sentry.origin"] == "auto.ai.huggingface_hub"
-
-    expected_data = {
-        "gen_ai.operation.name": "chat",
-        "gen_ai.request.available_tools": '[{"type": "function", "function": {"name": "get_weather", "description": "Get current weather", "parameters": {"type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"]}}}]',
-        "gen_ai.request.model": "test-model",
-        "gen_ai.response.finish_reasons": "tool_calls",
-        "gen_ai.response.model": "test-model-123",
-        "gen_ai.usage.input_tokens": 10,
-        "gen_ai.usage.output_tokens": 8,
-        "gen_ai.usage.total_tokens": 18,
-        "sentry.environment": "production",
-        "sentry.op": "gen_ai.chat",
-        "sentry.origin": "auto.ai.huggingface_hub",
-        "sentry.release": mock.ANY,
-        "sentry.sdk.name": "sentry.python",
-        "sentry.sdk.version": mock.ANY,
-        "sentry.segment.id": mock.ANY,
-        "sentry.segment.name": "test",
-        "thread.id": mock.ANY,
-        "thread.name": mock.ANY,
-    }
-
-    if send_default_pii and include_prompts:
-        expected_data["gen_ai.request.messages"] = (
-            '[{"role": "user", "content": "What is the weather in Paris?"}]'
-        )
-        expected_data["gen_ai.response.tool_calls"] = (
-            '[{"function": {"arguments": {"location": "Paris"}, "name": "get_weather", "description": "None"}, "id": "call_123", "type": "function"}]'
-        )
+        with sentry_sdk.start_transaction(name="test"):
+            client.chat_completion(
+                messages=[{"role": "user", "content": "What is the weather in Paris?"}],
+                tools=tools,
+                tool_choice="auto",
+            )
+
+        spans = [item.payload for item in items if item.type == "span"]
+        span = None
+        for sp in spans:
+            if sp["attributes"]["sentry.op"].startswith("gen_ai"):
+                assert span is None, "there is exactly one gen_ai span"
+                span = sp
+            else:
+                # there should be no other spans, just the gen_ai span
+                # and optionally some http.client spans from talking to the hf api
+                assert sp["attributes"]["sentry.op"] == "http.client"
+
+        assert span is not None
+
+        assert span["attributes"]["sentry.op"] == "gen_ai.chat"
+        assert span["name"] == "chat test-model"
+        assert span["attributes"]["sentry.origin"] == "auto.ai.huggingface_hub"
+
+        expected_data = {
+            "gen_ai.operation.name": "chat",
+            "gen_ai.request.available_tools": '[{"type": "function", "function": {"name": "get_weather", "description": "Get current weather", "parameters": {"type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"]}}}]',
+            "gen_ai.request.model": "test-model",
+            "gen_ai.response.finish_reasons": "tool_calls",
+            "gen_ai.response.model": "test-model-123",
+            "gen_ai.usage.input_tokens": 10,
+            "gen_ai.usage.output_tokens": 8,
+            "gen_ai.usage.total_tokens": 18,
+            "sentry.environment": "production",
+            "sentry.op": "gen_ai.chat",
+            "sentry.origin": "auto.ai.huggingface_hub",
+            "sentry.release": mock.ANY,
+            "sentry.sdk.name": "sentry.python",
+            "sentry.sdk.version": mock.ANY,
+            "sentry.segment.id": mock.ANY,
+            "sentry.segment.name": "test",
+            "thread.id": mock.ANY,
+            "thread.name": mock.ANY,
+        }
+
+        if send_default_pii and include_prompts:
+            expected_data["gen_ai.request.messages"] = (
+                '[{"role": "user", "content": "What is the weather in Paris?"}]'
+            )
+            expected_data["gen_ai.response.tool_calls"] = (
+                '[{"function": {"arguments": {"location": "Paris"}, "name": "get_weather", "description": "None"}, "id": "call_123", "type": "function"}]'
+            )
+
+        if not send_default_pii or not include_prompts:
+            assert "gen_ai.request.messages" not in expected_data
+            assert "gen_ai.response.text" not in expected_data
+            assert "gen_ai.response.tool_calls" not in expected_data
+
+        assert span["attributes"] == expected_data
+    else:
+        events = capture_events()
+
+        with sentry_sdk.start_transaction(name="test"):
+            client.chat_completion(
+                messages=[{"role": "user", "content": "What is the weather in Paris?"}],
+                tools=tools,
+                tool_choice="auto",
+            )
+
+        (transaction,) = events
+
+        span = None
+        for sp in transaction["spans"]:
+            if sp["op"].startswith("gen_ai"):
+                assert span is None, "there is exactly one gen_ai span"
+                span = sp
+            else:
+                # there should be no other spans, just the gen_ai span
+                # and optionally some http.client spans from talking to the hf api
+                assert sp["op"] == "http.client"
+
+        assert span is not None
+
+        assert span["op"] == "gen_ai.chat"
+        assert span["description"] == "chat test-model"
+        assert span["origin"] == "auto.ai.huggingface_hub"
+
+        expected_data = {
+            "gen_ai.operation.name": "chat",
+            "gen_ai.request.available_tools": '[{"type": "function", "function": {"name": "get_weather", "description": "Get current weather", "parameters": {"type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"]}}}]',
+            "gen_ai.request.model": "test-model",
+            "gen_ai.response.finish_reasons": "tool_calls",
+            "gen_ai.response.model": "test-model-123",
+            "gen_ai.usage.input_tokens": 10,
+            "gen_ai.usage.output_tokens": 8,
+            "gen_ai.usage.total_tokens": 18,
+            "thread.id": mock.ANY,
+            "thread.name": mock.ANY,
+        }
+
+        if send_default_pii and include_prompts:
+            expected_data["gen_ai.request.messages"] = (
+                '[{"role": "user", "content": "What is the weather in Paris?"}]'
+            )
+            expected_data["gen_ai.response.tool_calls"] = (
+                '[{"function": {"arguments": {"location": "Paris"}, "name": "get_weather", "description": "None"}, "id": "call_123", "type": "function"}]'
+            )
 
-    if not send_default_pii or not include_prompts:
-        assert "gen_ai.request.messages" not in expected_data
-        assert "gen_ai.response.text" not in expected_data
-        assert "gen_ai.response.tool_calls" not in expected_data
+        if not send_default_pii or not include_prompts:
+            assert "gen_ai.request.messages" not in expected_data
+            assert "gen_ai.response.text" not in expected_data
+            assert "gen_ai.response.tool_calls" not in expected_data
 
-    assert span["attributes"] == expected_data
+        assert span["data"] == expected_data
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.httpx_mock(assert_all_requests_were_expected=False)
 @pytest.mark.parametrize("send_default_pii", [True, False])
 @pytest.mark.parametrize("include_prompts", [True, False])
 def test_chat_completion_streaming_with_tools(
-    sentry_init: "Any",
-    capture_items: "Any",
-    send_default_pii: "Any",
-    include_prompts: "Any",
-    mock_hf_chat_completion_api_streaming_tools: "Any",
-) -> None:
+    sentry_init,
+    capture_events,
+    capture_items,
+    send_default_pii,
+    include_prompts,
+    mock_hf_chat_completion_api_streaming_tools,
+    stream_gen_ai_spans,
+):
     sentry_init(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
         integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)],
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+    else:
+        events = capture_events()
 
     client = get_hf_provider_inference_client()
 
@@ -1006,59 +1429,110 @@ def test_chat_completion_streaming_with_tools(
             )
         )
 
-    spans = [item.payload for item in items if item.type == "span"]
-    span = None
-    for sp in spans:
-        if sp["attributes"]["sentry.op"].startswith("gen_ai"):
-            assert span is None, "there is exactly one gen_ai span"
-            span = sp
-        else:
-            # there should be no other spans, just the gen_ai span
-            # and optionally some http.client spans from talking to the hf api
-            assert sp["attributes"]["sentry.op"] == "http.client"
-
-    assert span is not None
-
-    assert span["attributes"]["sentry.op"] == "gen_ai.chat"
-    assert span["name"] == "chat test-model"
-    assert span["attributes"]["sentry.origin"] == "auto.ai.huggingface_hub"
-
-    expected_data = {
-        "gen_ai.operation.name": "chat",
-        "gen_ai.request.available_tools": '[{"type": "function", "function": {"name": "get_weather", "description": "Get current weather", "parameters": {"type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"]}}}]',
-        "gen_ai.request.model": "test-model",
-        "gen_ai.response.finish_reasons": "tool_calls",
-        "gen_ai.response.model": "test-model-123",
-        "gen_ai.response.streaming": True,
-        "sentry.environment": "production",
-        "sentry.op": "gen_ai.chat",
-        "sentry.origin": "auto.ai.huggingface_hub",
-        "sentry.release": mock.ANY,
-        "sentry.sdk.name": "sentry.python",
-        "sentry.sdk.version": mock.ANY,
-        "sentry.segment.id": mock.ANY,
-        "sentry.segment.name": "test",
-        "thread.id": mock.ANY,
-        "thread.name": mock.ANY,
-    }
-
-    if HF_VERSION and HF_VERSION >= (0, 26, 0):
-        expected_data["gen_ai.usage.input_tokens"] = 183
-        expected_data["gen_ai.usage.output_tokens"] = 14
-        expected_data["gen_ai.usage.total_tokens"] = 197
-
-    if send_default_pii and include_prompts:
-        expected_data["gen_ai.request.messages"] = (
-            '[{"role": "user", "content": "What is the weather in Paris?"}]'
-        )
-        expected_data["gen_ai.response.text"] = "response with tool calls follows"
-        expected_data["gen_ai.response.tool_calls"] = (
-            '[{"function": {"arguments": {"location": "Paris"}, "name": "get_weather"}, "id": "call_123", "type": "function", "index": "None"}]'
-        )
+    if stream_gen_ai_spans:
+        spans = [item.payload for item in items if item.type == "span"]
+        span = None
+        for sp in spans:
+            if sp["attributes"]["sentry.op"].startswith("gen_ai"):
+                assert span is None, "there is exactly one gen_ai span"
+                span = sp
+            else:
+                # there should be no other spans, just the gen_ai span
+                # and optionally some http.client spans from talking to the hf api
+                assert sp["attributes"]["sentry.op"] == "http.client"
+
+        assert span is not None
+
+        assert span["attributes"]["sentry.op"] == "gen_ai.chat"
+        assert span["name"] == "chat test-model"
+        assert span["attributes"]["sentry.origin"] == "auto.ai.huggingface_hub"
+
+        expected_data = {
+            "gen_ai.operation.name": "chat",
+            "gen_ai.request.available_tools": '[{"type": "function", "function": {"name": "get_weather", "description": "Get current weather", "parameters": {"type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"]}}}]',
+            "gen_ai.request.model": "test-model",
+            "gen_ai.response.finish_reasons": "tool_calls",
+            "gen_ai.response.model": "test-model-123",
+            "gen_ai.response.streaming": True,
+            "sentry.environment": "production",
+            "sentry.op": "gen_ai.chat",
+            "sentry.origin": "auto.ai.huggingface_hub",
+            "sentry.release": mock.ANY,
+            "sentry.sdk.name": "sentry.python",
+            "sentry.sdk.version": mock.ANY,
+            "sentry.segment.id": mock.ANY,
+            "sentry.segment.name": "test",
+            "thread.id": mock.ANY,
+            "thread.name": mock.ANY,
+        }
+
+        if HF_VERSION and HF_VERSION >= (0, 26, 0):
+            expected_data["gen_ai.usage.input_tokens"] = 183
+            expected_data["gen_ai.usage.output_tokens"] = 14
+            expected_data["gen_ai.usage.total_tokens"] = 197
+
+        if send_default_pii and include_prompts:
+            expected_data["gen_ai.request.messages"] = (
+                '[{"role": "user", "content": "What is the weather in Paris?"}]'
+            )
+            expected_data["gen_ai.response.text"] = "response with tool calls follows"
+            expected_data["gen_ai.response.tool_calls"] = (
+                '[{"function": {"arguments": {"location": "Paris"}, "name": "get_weather"}, "id": "call_123", "type": "function", "index": "None"}]'
+            )
+
+        if not send_default_pii or not include_prompts:
+            assert "gen_ai.request.messages" not in expected_data
+            assert "gen_ai.response.text" not in expected_data
+            assert "gen_ai.response.tool_calls" not in expected_data
+
+        assert span["attributes"] == expected_data
+    else:
+        (transaction,) = events
+
+        span = None
+        for sp in transaction["spans"]:
+            if sp["op"].startswith("gen_ai"):
+                assert span is None, "there is exactly one gen_ai span"
+                span = sp
+            else:
+                # there should be no other spans, just the gen_ai span
+                # and optionally some http.client spans from talking to the hf api
+                assert sp["op"] == "http.client"
+
+        assert span is not None
+
+        assert span["op"] == "gen_ai.chat"
+        assert span["description"] == "chat test-model"
+        assert span["origin"] == "auto.ai.huggingface_hub"
+
+        expected_data = {
+            "gen_ai.operation.name": "chat",
+            "gen_ai.request.available_tools": '[{"type": "function", "function": {"name": "get_weather", "description": "Get current weather", "parameters": {"type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"]}}}]',
+            "gen_ai.request.model": "test-model",
+            "gen_ai.response.finish_reasons": "tool_calls",
+            "gen_ai.response.model": "test-model-123",
+            "gen_ai.response.streaming": True,
+            "thread.id": mock.ANY,
+            "thread.name": mock.ANY,
+        }
+
+        if HF_VERSION and HF_VERSION >= (0, 26, 0):
+            expected_data["gen_ai.usage.input_tokens"] = 183
+            expected_data["gen_ai.usage.output_tokens"] = 14
+            expected_data["gen_ai.usage.total_tokens"] = 197
+
+        if send_default_pii and include_prompts:
+            expected_data["gen_ai.request.messages"] = (
+                '[{"role": "user", "content": "What is the weather in Paris?"}]'
+            )
+            expected_data["gen_ai.response.text"] = "response with tool calls follows"
+            expected_data["gen_ai.response.tool_calls"] = (
+                '[{"function": {"arguments": {"location": "Paris"}, "name": "get_weather"}, "id": "call_123", "type": "function", "index": "None"}]'
+            )
 
-    if not send_default_pii or not include_prompts:
-        assert "gen_ai.request.messages" not in expected_data
-        assert "gen_ai.response.text" not in expected_data
-        assert "gen_ai.response.tool_calls" not in expected_data
+        if not send_default_pii or not include_prompts:
+            assert "gen_ai.request.messages" not in expected_data
+            assert "gen_ai.response.text" not in expected_data
+            assert "gen_ai.response.tool_calls" not in expected_data
 
-    assert span["attributes"] == expected_data
+        assert span["data"] == expected_data
diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py
index 5c700180cd..7adb2d13c5 100644
--- a/tests/integrations/langchain/test_langchain.py
+++ b/tests/integrations/langchain/test_langchain.py
@@ -257,10 +257,13 @@ def _llm_type(self) -> str:
         return llm_type
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 def test_langchain_text_completion(
     sentry_init,
+    capture_events,
     capture_items,
     get_model_response,
+    stream_gen_ai_spans,
 ):
     sentry_init(
         integrations=[
@@ -270,8 +273,8 @@ def test_langchain_text_completion(
         ],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     model_response = get_model_response(
         Completion(
@@ -302,45 +305,87 @@ def test_langchain_text_completion(
         openai_api_key="badkey",
     )
 
-    with patch.object(
-        model.client._client._client,
-        "send",
-        return_value=model_response,
-    ) as _:
-        with start_transaction():
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with patch.object(
+            model.client._client._client,
+            "send",
+            return_value=model_response,
+        ) as _, start_transaction():
             input_text = "What is the capital of France?"
             model.invoke(input_text, config={"run_name": "my-snazzy-pipeline"})
 
-    tx = next(item.payload for item in items if item.type == "transaction")
-    assert tx["type"] == "transaction"
+        tx = next(item.payload for item in items if item.type == "transaction")
+        assert tx["type"] == "transaction"
 
-    spans = [item.payload for item in items if item.type == "span"]
-    llm_spans = [
-        span
-        for span in spans
-        if span["attributes"].get("sentry.op") == "gen_ai.text_completion"
-    ]
-    assert len(llm_spans) > 0
+        spans = [item.payload for item in items if item.type == "span"]
+        llm_spans = [
+            span
+            for span in spans
+            if span["attributes"].get("sentry.op") == "gen_ai.text_completion"
+        ]
 
-    llm_span = llm_spans[0]
-    assert llm_span["name"] == "text_completion gpt-3.5-turbo"
-    assert llm_span["attributes"]["gen_ai.system"] == "openai"
-    assert llm_span["attributes"]["gen_ai.function_id"] == "my-snazzy-pipeline"
-    assert llm_span["attributes"]["gen_ai.request.model"] == "gpt-3.5-turbo"
-    assert (
-        llm_span["attributes"]["gen_ai.response.text"]
-        == "The capital of France is Paris."
-    )
-    assert llm_span["attributes"]["gen_ai.usage.total_tokens"] == 25
-    assert llm_span["attributes"]["gen_ai.usage.input_tokens"] == 10
-    assert llm_span["attributes"]["gen_ai.usage.output_tokens"] == 15
+        assert len(llm_spans) > 0
+
+        llm_span = llm_spans[0]
+
+        assert llm_span["name"] == "text_completion gpt-3.5-turbo"
+        assert llm_span["attributes"]["gen_ai.system"] == "openai"
+        assert llm_span["attributes"]["gen_ai.function_id"] == "my-snazzy-pipeline"
+        assert llm_span["attributes"]["gen_ai.request.model"] == "gpt-3.5-turbo"
+        assert (
+            llm_span["attributes"]["gen_ai.response.text"]
+            == "The capital of France is Paris."
+        )
+        assert llm_span["attributes"]["gen_ai.usage.total_tokens"] == 25
+        assert llm_span["attributes"]["gen_ai.usage.input_tokens"] == 10
+        assert llm_span["attributes"]["gen_ai.usage.output_tokens"] == 15
+    else:
+        events = capture_events()
+
+        with patch.object(
+            model.client._client._client,
+            "send",
+            return_value=model_response,
+        ) as _, start_transaction():
+            input_text = "What is the capital of France?"
+            model.invoke(input_text, config={"run_name": "my-snazzy-pipeline"})
+
+        tx = events[0]
+        assert tx["type"] == "transaction"
+
+        llm_spans = [
+            span
+            for span in tx.get("spans", [])
+            if span.get("op") == "gen_ai.text_completion"
+        ]
 
+        assert len(llm_spans) > 0
 
+        llm_span = llm_spans[0]
+
+        assert llm_span["description"] == "text_completion gpt-3.5-turbo"
+        assert llm_span["data"]["gen_ai.system"] == "openai"
+        assert llm_span["data"]["gen_ai.function_id"] == "my-snazzy-pipeline"
+        assert llm_span["data"]["gen_ai.request.model"] == "gpt-3.5-turbo"
+        assert (
+            llm_span["data"]["gen_ai.response.text"]
+            == "The capital of France is Paris."
+        )
+        assert llm_span["data"]["gen_ai.usage.total_tokens"] == 25
+        assert llm_span["data"]["gen_ai.usage.input_tokens"] == 10
+        assert llm_span["data"]["gen_ai.usage.output_tokens"] == 15
+
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 def test_langchain_chat_with_run_name(
     sentry_init,
+    capture_events,
     capture_items,
     get_model_response,
     nonstreaming_chat_completions_model_response,
+    stream_gen_ai_spans,
 ):
     sentry_init(
         integrations=[
@@ -350,8 +395,8 @@ def test_langchain_chat_with_run_name(
         ],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
 
     request_headers = {}
     # Changed in https://github.com/langchain-ai/langchain/pull/32655
@@ -380,28 +425,56 @@ def test_langchain_chat_with_run_name(
         openai_api_key="badkey",
     )
 
-    with patch.object(
-        llm.client._client._client,
-        "send",
-        return_value=model_response,
-    ) as _:
-        with start_transaction():
+    if stream_gen_ai_spans:
+        items = capture_items("span")
+
+        with patch.object(
+            llm.client._client._client,
+            "send",
+            return_value=model_response,
+        ) as _, start_transaction():
             llm.invoke(
                 "How many letters in the word eudca",
                 config={"run_name": "my-snazzy-pipeline"},
             )
 
-    spans = [item.payload for item in items if item.type == "span"]
-    chat_spans = list(x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.chat")
-    assert len(chat_spans) == 1
-    assert (
-        chat_spans[0]["attributes"][SPANDATA.GEN_AI_FUNCTION_ID] == "my-snazzy-pipeline"
-    )
+        spans = [item.payload for item in items if item.type == "span"]
+        chat_spans = list(
+            x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.chat"
+        )
+        assert len(chat_spans) == 1
+        assert (
+            chat_spans[0]["attributes"][SPANDATA.GEN_AI_FUNCTION_ID]
+            == "my-snazzy-pipeline"
+        )
+    else:
+        events = capture_events()
+
+        with patch.object(
+            llm.client._client._client,
+            "send",
+            return_value=model_response,
+        ) as _, start_transaction():
+            llm.invoke(
+                "How many letters in the word eudca",
+                config={"run_name": "my-snazzy-pipeline"},
+            )
+
+        tx = events[0]
+
+        chat_spans = list(x for x in tx["spans"] if x["op"] == "gen_ai.chat")
+        assert len(chat_spans) == 1
+        assert (
+            chat_spans[0]["data"][SPANDATA.GEN_AI_FUNCTION_ID] == "my-snazzy-pipeline"
+        )
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 def test_langchain_tool_call_with_run_name(
     sentry_init,
+    capture_events,
     capture_items,
+    stream_gen_ai_spans,
 ):
     sentry_init(
         integrations=[
@@ -411,25 +484,44 @@ def test_langchain_tool_call_with_run_name(
         ],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
+    if stream_gen_ai_spans:
+        items = capture_items("span")
+
+        with start_transaction():
+            get_word_length.invoke(
+                {"word": "eudca"},
+                config={"run_name": "my-snazzy-pipeline"},
+            )
 
-    with start_transaction():
-        get_word_length.invoke(
-            {"word": "eudca"},
-            config={"run_name": "my-snazzy-pipeline"},
+        spans = [item.payload for item in items if item.type == "span"]
+        tool_spans = list(
+            x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.execute_tool"
+        )
+        assert len(tool_spans) == 1
+        assert (
+            tool_spans[0]["attributes"][SPANDATA.GEN_AI_FUNCTION_ID]
+            == "my-snazzy-pipeline"
         )
+    else:
+        events = capture_events()
 
-    spans = [item.payload for item in items if item.type == "span"]
-    tool_spans = list(
-        x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.execute_tool"
-    )
-    assert len(tool_spans) == 1
-    assert (
-        tool_spans[0]["attributes"][SPANDATA.GEN_AI_FUNCTION_ID] == "my-snazzy-pipeline"
-    )
+        with start_transaction():
+            get_word_length.invoke(
+                {"word": "eudca"},
+                config={"run_name": "my-snazzy-pipeline"},
+            )
+
+        tx = events[0]
+        tool_spans = list(x for x in tx["spans"] if x["op"] == "gen_ai.execute_tool")
+        assert len(tool_spans) == 1
+        assert (
+            tool_spans[0]["data"][SPANDATA.GEN_AI_FUNCTION_ID] == "my-snazzy-pipeline"
+        )
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.skipif(
     LANGCHAIN_VERSION < (1,),
     reason="LangChain 1.0+ required (ONE AGENT refactor)",
@@ -456,6 +548,7 @@ def test_langchain_tool_call_with_run_name(
 )
 def test_langchain_create_agent(
     sentry_init,
+    capture_events,
     capture_items,
     send_default_pii,
     include_prompts,
@@ -463,6 +556,7 @@ def test_langchain_create_agent(
     request,
     get_model_response,
     nonstreaming_responses_model_response,
+    stream_gen_ai_spans,
 ):
     sentry_init(
         integrations=[
@@ -472,8 +566,8 @@ def test_langchain_create_agent(
         ],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     model_response = get_model_response(
         nonstreaming_responses_model_response,
@@ -496,12 +590,14 @@ def test_langchain_create_agent(
         name="word_length_agent",
     )
 
-    with patch.object(
-        llm.client._client._client,
-        "send",
-        return_value=model_response,
-    ) as _:
-        with start_transaction():
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with patch.object(
+            llm.client._client._client,
+            "send",
+            return_value=model_response,
+        ) as _, start_transaction():
             agent.invoke(
                 {
                     "messages": [
@@ -510,61 +606,135 @@ def test_langchain_create_agent(
                 },
             )
 
-    tx = next(item.payload for item in items if item.type == "transaction")
-    assert tx["type"] == "transaction"
-    assert tx["contexts"]["trace"]["origin"] == "manual"
+        tx = next(item.payload for item in items if item.type == "transaction")
 
-    spans = [item.payload for item in items if item.type == "span"]
-    chat_spans = list(x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.chat")
-    assert len(chat_spans) == 1
-    assert chat_spans[0]["attributes"]["sentry.origin"] == "auto.ai.langchain"
+        assert tx["type"] == "transaction"
+        assert tx["contexts"]["trace"]["origin"] == "manual"
 
-    assert chat_spans[0]["attributes"]["gen_ai.system"] == "openai-chat"
-    assert chat_spans[0]["attributes"]["gen_ai.agent.name"] == "word_length_agent"
+        spans = [item.payload for item in items if item.type == "span"]
+        chat_spans = list(
+            x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.chat"
+        )
+        assert len(chat_spans) == 1
+        assert chat_spans[0]["attributes"]["sentry.origin"] == "auto.ai.langchain"
 
-    assert chat_spans[0]["attributes"]["gen_ai.usage.input_tokens"] == 10
-    assert chat_spans[0]["attributes"]["gen_ai.usage.output_tokens"] == 20
-    assert chat_spans[0]["attributes"]["gen_ai.usage.total_tokens"] == 30
+        assert chat_spans[0]["attributes"]["gen_ai.system"] == "openai-chat"
+        assert chat_spans[0]["attributes"]["gen_ai.agent.name"] == "word_length_agent"
 
-    if send_default_pii and include_prompts:
-        assert (
-            chat_spans[0]["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
-            == "Hello, how can I help you?"
-        )
+        assert chat_spans[0]["attributes"]["gen_ai.usage.input_tokens"] == 10
+        assert chat_spans[0]["attributes"]["gen_ai.usage.output_tokens"] == 20
+        assert chat_spans[0]["attributes"]["gen_ai.usage.total_tokens"] == 30
 
-        param_id = request.node.callspec.id
-        if "string" in param_id:
-            assert [
-                {
-                    "type": "text",
-                    "content": "You are very powerful assistant, but don't know current events",
-                }
-            ] == json.loads(
-                chat_spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
+        if send_default_pii and include_prompts:
+            assert (
+                chat_spans[0]["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+                == "Hello, how can I help you?"
             )
+
+            param_id = request.node.callspec.id
+            if "string" in param_id:
+                assert [
+                    {
+                        "type": "text",
+                        "content": "You are very powerful assistant, but don't know current events",
+                    }
+                ] == json.loads(
+                    chat_spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
+                )
+            else:
+                assert [
+                    {
+                        "type": "text",
+                        "content": "You are a helpful assistant.",
+                    },
+                    {
+                        "type": "text",
+                        "content": "Be concise and clear.",
+                    },
+                ] == json.loads(
+                    chat_spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
+                )
         else:
-            assert [
-                {
-                    "type": "text",
-                    "content": "You are a helpful assistant.",
-                },
+            assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[0].get(
+                "attributes", {}
+            )
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[0].get(
+                "attributes", {}
+            )
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[0].get(
+                "attributes", {}
+            )
+
+    else:
+        events = capture_events()
+
+        with patch.object(
+            llm.client._client._client,
+            "send",
+            return_value=model_response,
+        ) as _, start_transaction():
+            agent.invoke(
                 {
-                    "type": "text",
-                    "content": "Be concise and clear.",
+                    "messages": [
+                        HumanMessage(content="How many letters in the word eudca"),
+                    ],
                 },
-            ] == json.loads(
-                chat_spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
             )
-    else:
-        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[0].get(
-            "attributes", {}
-        )
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[0].get(
-            "attributes", {}
-        )
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[0].get("attributes", {})
+
+        tx = events[0]
+
+        assert tx["type"] == "transaction"
+        assert tx["contexts"]["trace"]["origin"] == "manual"
+
+        chat_spans = list(x for x in tx["spans"] if x["op"] == "gen_ai.chat")
+        assert len(chat_spans) == 1
+        assert chat_spans[0]["origin"] == "auto.ai.langchain"
+
+        assert chat_spans[0]["data"]["gen_ai.system"] == "openai-chat"
+        assert chat_spans[0]["data"]["gen_ai.agent.name"] == "word_length_agent"
+
+        assert chat_spans[0]["data"]["gen_ai.usage.input_tokens"] == 10
+        assert chat_spans[0]["data"]["gen_ai.usage.output_tokens"] == 20
+        assert chat_spans[0]["data"]["gen_ai.usage.total_tokens"] == 30
+
+        if send_default_pii and include_prompts:
+            assert (
+                chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+                == "Hello, how can I help you?"
+            )
+
+            param_id = request.node.callspec.id
+            if "string" in param_id:
+                assert [
+                    {
+                        "type": "text",
+                        "content": "You are very powerful assistant, but don't know current events",
+                    }
+                ] == json.loads(
+                    chat_spans[0]["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
+                )
+            else:
+                assert [
+                    {
+                        "type": "text",
+                        "content": "You are a helpful assistant.",
+                    },
+                    {
+                        "type": "text",
+                        "content": "Be concise and clear.",
+                    },
+                ] == json.loads(
+                    chat_spans[0]["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
+                )
+        else:
+            assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[0].get(
+                "data", {}
+            )
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[0].get("data", {})
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[0].get("data", {})
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.skipif(
     LANGCHAIN_VERSION < (1,),
     reason="LangChain 1.0+ required (ONE AGENT refactor)",
@@ -580,11 +750,13 @@ def test_langchain_create_agent(
 )
 def test_tool_execution_span(
     sentry_init,
+    capture_events,
     capture_items,
     send_default_pii,
     include_prompts,
     get_model_response,
     responses_tool_call_model_responses,
+    stream_gen_ai_spans,
 ):
     sentry_init(
         integrations=[
@@ -594,8 +766,8 @@ def test_tool_execution_span(
         ],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     responses = responses_tool_call_model_responses(
         tool_name="get_word_length",
@@ -657,12 +829,14 @@ def test_tool_execution_span(
         name="word_length_agent",
     )
 
-    with patch.object(
-        llm.client._client._client,
-        "send",
-        side_effect=[tool_response, final_response],
-    ) as _:
-        with start_transaction():
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with patch.object(
+            llm.client._client._client,
+            "send",
+            side_effect=[tool_response, final_response],
+        ) as _, start_transaction():
             agent.invoke(
                 {
                     "messages": [
@@ -671,86 +845,190 @@ def test_tool_execution_span(
                 },
             )
 
-    tx = next(item.payload for item in items if item.type == "transaction")
-    assert tx["type"] == "transaction"
-    assert tx["contexts"]["trace"]["origin"] == "manual"
+        tx = next(item.payload for item in items if item.type == "transaction")
 
-    spans = [item.payload for item in items if item.type == "span"]
-    chat_spans = list(x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.chat")
-    assert len(chat_spans) == 2
+        assert tx["type"] == "transaction"
+        assert tx["contexts"]["trace"]["origin"] == "manual"
 
-    tool_exec_spans = list(
-        x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.execute_tool"
-    )
-    assert len(tool_exec_spans) == 1
-    tool_exec_span = tool_exec_spans[0]
+        spans = [item.payload for item in items if item.type == "span"]
+        chat_spans = list(
+            x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.chat"
+        )
+        assert len(chat_spans) == 2
 
-    assert chat_spans[0]["attributes"]["sentry.origin"] == "auto.ai.langchain"
-    assert chat_spans[1]["attributes"]["sentry.origin"] == "auto.ai.langchain"
-    assert tool_exec_span["attributes"]["sentry.origin"] == "auto.ai.langchain"
+        tool_exec_spans = list(
+            x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.execute_tool"
+        )
 
-    assert chat_spans[0]["attributes"]["gen_ai.agent.name"] == "word_length_agent"
-    assert chat_spans[1]["attributes"]["gen_ai.agent.name"] == "word_length_agent"
-    assert tool_exec_span["attributes"]["gen_ai.agent.name"] == "word_length_agent"
+        assert len(tool_exec_spans) == 1
+        tool_exec_span = tool_exec_spans[0]
 
-    assert chat_spans[0]["attributes"]["gen_ai.usage.input_tokens"] == 142
-    assert chat_spans[0]["attributes"]["gen_ai.usage.output_tokens"] == 50
-    assert chat_spans[0]["attributes"]["gen_ai.usage.total_tokens"] == 192
-    assert chat_spans[0]["attributes"]["gen_ai.system"] == "openai-chat"
+        assert chat_spans[0]["attributes"]["sentry.origin"] == "auto.ai.langchain"
+        assert chat_spans[1]["attributes"]["sentry.origin"] == "auto.ai.langchain"
+        assert tool_exec_span["attributes"]["sentry.origin"] == "auto.ai.langchain"
 
-    assert chat_spans[1]["attributes"]["gen_ai.usage.input_tokens"] == 89
-    assert chat_spans[1]["attributes"]["gen_ai.usage.output_tokens"] == 28
-    assert chat_spans[1]["attributes"]["gen_ai.usage.total_tokens"] == 117
-    assert chat_spans[1]["attributes"]["gen_ai.system"] == "openai-chat"
+        assert chat_spans[0]["attributes"]["gen_ai.agent.name"] == "word_length_agent"
+        assert chat_spans[1]["attributes"]["gen_ai.agent.name"] == "word_length_agent"
+        assert tool_exec_span["attributes"]["gen_ai.agent.name"] == "word_length_agent"
 
-    if send_default_pii and include_prompts:
-        assert "word" in tool_exec_span["attributes"][SPANDATA.GEN_AI_TOOL_INPUT]
+        assert chat_spans[0]["attributes"]["gen_ai.usage.input_tokens"] == 142
+        assert chat_spans[0]["attributes"]["gen_ai.usage.output_tokens"] == 50
+        assert chat_spans[0]["attributes"]["gen_ai.usage.total_tokens"] == 192
+        assert chat_spans[0]["attributes"]["gen_ai.system"] == "openai-chat"
+
+        assert chat_spans[1]["attributes"]["gen_ai.usage.input_tokens"] == 89
+        assert chat_spans[1]["attributes"]["gen_ai.usage.output_tokens"] == 28
+        assert chat_spans[1]["attributes"]["gen_ai.usage.total_tokens"] == 117
+        assert chat_spans[1]["attributes"]["gen_ai.system"] == "openai-chat"
 
-        assert "5" in chat_spans[1]["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+        if send_default_pii and include_prompts:
+            assert "word" in tool_exec_span["attributes"][SPANDATA.GEN_AI_TOOL_INPUT]
 
-        # Verify tool calls are recorded when PII is enabled
-        assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in chat_spans[0].get(
-            "attributes", {}
-        ), (
-            "Tool calls should be recorded when send_default_pii=True and include_prompts=True"
-        )
-        tool_calls_data = chat_spans[0]["attributes"][
-            SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS
-        ]
-        assert isinstance(tool_calls_data, str)
-        assert "get_word_length" in tool_calls_data
+            assert "5" in chat_spans[1]["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+
+            # Verify tool calls are recorded when PII is enabled
+            assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in chat_spans[0].get(
+                "attributes", {}
+            ), (
+                "Tool calls should be recorded when send_default_pii=True and include_prompts=True"
+            )
+            tool_calls_data = chat_spans[0]["attributes"][
+                SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS
+            ]
+            assert isinstance(tool_calls_data, str)
+            assert "get_word_length" in tool_calls_data
+        else:
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[0].get(
+                "attributes", {}
+            )
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[0].get(
+                "attributes", {}
+            )
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[1].get(
+                "attributes", {}
+            )
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[1].get(
+                "attributes", {}
+            )
+            assert SPANDATA.GEN_AI_TOOL_INPUT not in tool_exec_span.get(
+                "attributes", {}
+            )
+            assert SPANDATA.GEN_AI_TOOL_OUTPUT not in tool_exec_span.get(
+                "attributes", {}
+            )
+
+            # Verify tool calls are NOT recorded when PII is disabled
+            assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in chat_spans[0].get(
+                "attributes", {}
+            ), (
+                f"Tool calls should NOT be recorded when send_default_pii={send_default_pii} "
+                f"and include_prompts={include_prompts}"
+            )
+            assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in chat_spans[1].get(
+                "attributes", {}
+            ), (
+                f"Tool calls should NOT be recorded when send_default_pii={send_default_pii} "
+                f"and include_prompts={include_prompts}"
+            )
+
+        # Verify that available tools are always recorded regardless of PII settings
+        for chat_span in chat_spans:
+            tools_data = chat_span["attributes"][
+                SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS
+            ]
+            assert "get_word_length" in tools_data
     else:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[0].get(
-            "attributes", {}
-        )
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[0].get("attributes", {})
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[1].get(
-            "attributes", {}
-        )
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[1].get("attributes", {})
-        assert SPANDATA.GEN_AI_TOOL_INPUT not in tool_exec_span.get("attributes", {})
-        assert SPANDATA.GEN_AI_TOOL_OUTPUT not in tool_exec_span.get("attributes", {})
+        events = capture_events()
 
-        # Verify tool calls are NOT recorded when PII is disabled
-        assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in chat_spans[0].get(
-            "attributes", {}
-        ), (
-            f"Tool calls should NOT be recorded when send_default_pii={send_default_pii} "
-            f"and include_prompts={include_prompts}"
-        )
-        assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in chat_spans[1].get(
-            "attributes", {}
-        ), (
-            f"Tool calls should NOT be recorded when send_default_pii={send_default_pii} "
-            f"and include_prompts={include_prompts}"
+        with patch.object(
+            llm.client._client._client,
+            "send",
+            side_effect=[tool_response, final_response],
+        ) as _, start_transaction():
+            agent.invoke(
+                {
+                    "messages": [
+                        HumanMessage(content="How many letters in the word eudca"),
+                    ],
+                },
+            )
+
+        tx = events[0]
+
+        assert tx["type"] == "transaction"
+        assert tx["contexts"]["trace"]["origin"] == "manual"
+
+        chat_spans = list(x for x in tx["spans"] if x["op"] == "gen_ai.chat")
+        assert len(chat_spans) == 2
+
+        tool_exec_spans = list(
+            x for x in tx["spans"] if x["op"] == "gen_ai.execute_tool"
         )
 
-    # Verify that available tools are always recorded regardless of PII settings
-    for chat_span in chat_spans:
-        tools_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS]
-        assert "get_word_length" in tools_data
+        assert len(tool_exec_spans) == 1
+        tool_exec_span = tool_exec_spans[0]
+
+        assert chat_spans[0]["origin"] == "auto.ai.langchain"
+        assert chat_spans[1]["origin"] == "auto.ai.langchain"
+        assert tool_exec_span["origin"] == "auto.ai.langchain"
 
+        assert chat_spans[0]["data"]["gen_ai.agent.name"] == "word_length_agent"
+        assert chat_spans[1]["data"]["gen_ai.agent.name"] == "word_length_agent"
+        assert tool_exec_span["data"]["gen_ai.agent.name"] == "word_length_agent"
 
+        assert chat_spans[0]["data"]["gen_ai.usage.input_tokens"] == 142
+        assert chat_spans[0]["data"]["gen_ai.usage.output_tokens"] == 50
+        assert chat_spans[0]["data"]["gen_ai.usage.total_tokens"] == 192
+        assert chat_spans[0]["data"]["gen_ai.system"] == "openai-chat"
+
+        assert chat_spans[1]["data"]["gen_ai.usage.input_tokens"] == 89
+        assert chat_spans[1]["data"]["gen_ai.usage.output_tokens"] == 28
+        assert chat_spans[1]["data"]["gen_ai.usage.total_tokens"] == 117
+        assert chat_spans[1]["data"]["gen_ai.system"] == "openai-chat"
+
+        if send_default_pii and include_prompts:
+            assert "word" in tool_exec_span["data"][SPANDATA.GEN_AI_TOOL_INPUT]
+
+            assert "5" in chat_spans[1]["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+
+            # Verify tool calls are recorded when PII is enabled
+            assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in chat_spans[0].get(
+                "data", {}
+            ), (
+                "Tool calls should be recorded when send_default_pii=True and include_prompts=True"
+            )
+            tool_calls_data = chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS]
+            assert isinstance(tool_calls_data, str)
+            assert "get_word_length" in tool_calls_data
+        else:
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[0].get("data", {})
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[0].get("data", {})
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[1].get("data", {})
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[1].get("data", {})
+            assert SPANDATA.GEN_AI_TOOL_INPUT not in tool_exec_span.get("data", {})
+            assert SPANDATA.GEN_AI_TOOL_OUTPUT not in tool_exec_span.get("data", {})
+
+            # Verify tool calls are NOT recorded when PII is disabled
+            assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in chat_spans[0].get(
+                "data", {}
+            ), (
+                f"Tool calls should NOT be recorded when send_default_pii={send_default_pii} "
+                f"and include_prompts={include_prompts}"
+            )
+            assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in chat_spans[1].get(
+                "data", {}
+            ), (
+                f"Tool calls should NOT be recorded when send_default_pii={send_default_pii} "
+                f"and include_prompts={include_prompts}"
+            )
+
+        # Verify that available tools are always recorded regardless of PII settings
+        for chat_span in chat_spans:
+            tools_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS]
+            assert "get_word_length" in tools_data
+
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.parametrize(
     "send_default_pii, include_prompts",
     [
@@ -761,12 +1039,14 @@ def test_tool_execution_span(
 )
 def test_langchain_openai_tools_agent_no_prompts(
     sentry_init,
+    capture_events,
     capture_items,
     send_default_pii,
     include_prompts,
     get_model_response,
     server_side_event_chunks,
     streaming_chat_completions_model_responses,
+    stream_gen_ai_spans,
 ):
     sentry_init(
         integrations=[
@@ -776,8 +1056,8 @@ def test_langchain_openai_tools_agent_no_prompts(
         ],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     prompt = ChatPromptTemplate.from_messages(
         [
@@ -815,12 +1095,14 @@ def test_langchain_openai_tools_agent_no_prompts(
 
     agent_executor = AgentExecutor(agent=agent, tools=[get_word_length], verbose=True)
 
-    with patch.object(
-        llm.client._client._client,
-        "send",
-        side_effect=[tool_response, final_response],
-    ) as _:
-        with start_transaction():
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with patch.object(
+            llm.client._client._client,
+            "send",
+            side_effect=[tool_response, final_response],
+        ) as _, start_transaction():
             list(
                 agent_executor.invoke(
                     {"input": "How many letters in the word eudca"},
@@ -828,89 +1110,193 @@ def test_langchain_openai_tools_agent_no_prompts(
                 )
             )
 
-    tx = next(item.payload for item in items if item.type == "transaction")
-    assert tx["type"] == "transaction"
-    assert tx["contexts"]["trace"]["origin"] == "manual"
+        tx = next(item.payload for item in items if item.type == "transaction")
 
-    spans = [item.payload for item in items if item.type == "span"]
-    invoke_agent_span = next(
-        x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.invoke_agent"
-    )
-    chat_spans = list(x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.chat")
-    tool_exec_span = next(
-        x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.execute_tool"
-    )
+        assert tx["type"] == "transaction"
+        assert tx["contexts"]["trace"]["origin"] == "manual"
+
+        spans = [item.payload for item in items if item.type == "span"]
+        invoke_agent_span = next(
+            x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.invoke_agent"
+        )
+        chat_spans = list(
+            x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.chat"
+        )
+        tool_exec_span = next(
+            x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.execute_tool"
+        )
 
-    assert len(chat_spans) == 2
+        assert len(chat_spans) == 2
 
-    assert invoke_agent_span["attributes"]["sentry.origin"] == "auto.ai.langchain"
-    assert chat_spans[0]["attributes"]["sentry.origin"] == "auto.ai.langchain"
-    assert chat_spans[1]["attributes"]["sentry.origin"] == "auto.ai.langchain"
-    assert tool_exec_span["attributes"]["sentry.origin"] == "auto.ai.langchain"
+        assert invoke_agent_span["attributes"]["sentry.origin"] == "auto.ai.langchain"
+        assert chat_spans[0]["attributes"]["sentry.origin"] == "auto.ai.langchain"
+        assert chat_spans[1]["attributes"]["sentry.origin"] == "auto.ai.langchain"
+        assert tool_exec_span["attributes"]["sentry.origin"] == "auto.ai.langchain"
 
-    assert invoke_agent_span["attributes"]["gen_ai.function_id"] == "my-snazzy-pipeline"
+        assert (
+            invoke_agent_span["attributes"]["gen_ai.function_id"]
+            == "my-snazzy-pipeline"
+        )
 
-    # We can't guarantee anything about the "shape" of the langchain execution graph
-    assert (
-        len(list(x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.chat")) > 0
-    )
+        # We can't guarantee anything about the "shape" of the langchain execution graph
+        assert (
+            len(list(x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.chat"))
+            > 0
+        )
 
-    # Token usage is only available in newer versions of langchain (v0.2+)
-    # where usage_metadata is supported on AIMessageChunk
-    if "gen_ai.usage.input_tokens" in chat_spans[0]["attributes"]:
-        assert chat_spans[0]["attributes"]["gen_ai.usage.input_tokens"] == 142
-        assert chat_spans[0]["attributes"]["gen_ai.usage.output_tokens"] == 50
-        assert chat_spans[0]["attributes"]["gen_ai.usage.total_tokens"] == 192
+        # Token usage is only available in newer versions of langchain (v0.2+)
+        # where usage_metadata is supported on AIMessageChunk
+        if "gen_ai.usage.input_tokens" in chat_spans[0]["attributes"]:
+            assert chat_spans[0]["attributes"]["gen_ai.usage.input_tokens"] == 142
+            assert chat_spans[0]["attributes"]["gen_ai.usage.output_tokens"] == 50
+            assert chat_spans[0]["attributes"]["gen_ai.usage.total_tokens"] == 192
 
-    if "gen_ai.usage.input_tokens" in chat_spans[1]["attributes"]:
-        assert chat_spans[1]["attributes"]["gen_ai.usage.input_tokens"] == 89
-        assert chat_spans[1]["attributes"]["gen_ai.usage.output_tokens"] == 28
-        assert chat_spans[1]["attributes"]["gen_ai.usage.total_tokens"] == 117
+        if "gen_ai.usage.input_tokens" in chat_spans[1]["attributes"]:
+            assert chat_spans[1]["attributes"]["gen_ai.usage.input_tokens"] == 89
+            assert chat_spans[1]["attributes"]["gen_ai.usage.output_tokens"] == 28
+            assert chat_spans[1]["attributes"]["gen_ai.usage.total_tokens"] == 117
 
-    assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[0].get(
-        "attributes", {}
-    )
-    assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[0].get("attributes", {})
-    assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[0].get("attributes", {})
-    assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[1].get(
-        "attributes", {}
-    )
-    assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[1].get("attributes", {})
-    assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[1].get("attributes", {})
-    assert SPANDATA.GEN_AI_TOOL_INPUT not in tool_exec_span.get("attributes", {})
-    assert SPANDATA.GEN_AI_TOOL_OUTPUT not in tool_exec_span.get("attributes", {})
+        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[0].get(
+            "attributes", {}
+        )
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[0].get(
+            "attributes", {}
+        )
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[0].get("attributes", {})
+        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[1].get(
+            "attributes", {}
+        )
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[1].get(
+            "attributes", {}
+        )
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[1].get("attributes", {})
+        assert SPANDATA.GEN_AI_TOOL_INPUT not in tool_exec_span.get("attributes", {})
+        assert SPANDATA.GEN_AI_TOOL_OUTPUT not in tool_exec_span.get("attributes", {})
 
-    # Verify tool calls are NOT recorded when PII is disabled
-    assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in chat_spans[0].get(
-        "attributes", {}
-    ), (
-        f"Tool calls should NOT be recorded when send_default_pii={send_default_pii} "
-        f"and include_prompts={include_prompts}"
-    )
-    assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in chat_spans[1].get(
-        "attributes", {}
-    ), (
-        f"Tool calls should NOT be recorded when send_default_pii={send_default_pii} "
-        f"and include_prompts={include_prompts}"
-    )
+        # Verify tool calls are NOT recorded when PII is disabled
+        assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in chat_spans[0].get(
+            "attributes", {}
+        ), (
+            f"Tool calls should NOT be recorded when send_default_pii={send_default_pii} "
+            f"and include_prompts={include_prompts}"
+        )
+        assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in chat_spans[1].get(
+            "attributes", {}
+        ), (
+            f"Tool calls should NOT be recorded when send_default_pii={send_default_pii} "
+            f"and include_prompts={include_prompts}"
+        )
 
-    # Verify finish_reasons is always an array of strings
-    assert chat_spans[0]["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [
-        "function_call"
-    ]
-    assert chat_spans[1]["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [
-        "stop"
-    ]
+        # Verify finish_reasons is always an array of strings
+        assert chat_spans[0]["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [
+            "function_call"
+        ]
+        assert chat_spans[1]["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [
+            "stop"
+        ]
+
+        # Verify that available tools are always recorded regardless of PII settings
+        for chat_span in chat_spans:
+            tools_data = chat_span["attributes"][
+                SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS
+            ]
+            assert tools_data is not None, (
+                "Available tools should always be recorded regardless of PII settings"
+            )
+            assert "get_word_length" in tools_data
+    else:
+        events = capture_events()
+
+        with patch.object(
+            llm.client._client._client,
+            "send",
+            side_effect=[tool_response, final_response],
+        ) as _, start_transaction():
+            list(
+                agent_executor.invoke(
+                    {"input": "How many letters in the word eudca"},
+                    {"run_name": "my-snazzy-pipeline"},
+                )
+            )
+
+        tx = events[0]
 
-    # Verify that available tools are always recorded regardless of PII settings
-    for chat_span in chat_spans:
-        tools_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS]
-        assert tools_data is not None, (
-            "Available tools should always be recorded regardless of PII settings"
+        assert tx["type"] == "transaction"
+        assert tx["contexts"]["trace"]["origin"] == "manual"
+
+        invoke_agent_span = next(
+            x for x in tx["spans"] if x["op"] == "gen_ai.invoke_agent"
+        )
+        chat_spans = list(x for x in tx["spans"] if x["op"] == "gen_ai.chat")
+        tool_exec_span = next(
+            x for x in tx["spans"] if x["op"] == "gen_ai.execute_tool"
         )
-        assert "get_word_length" in tools_data
 
+        assert len(chat_spans) == 2
 
+        assert invoke_agent_span["origin"] == "auto.ai.langchain"
+        assert chat_spans[0]["origin"] == "auto.ai.langchain"
+        assert chat_spans[1]["origin"] == "auto.ai.langchain"
+        assert tool_exec_span["origin"] == "auto.ai.langchain"
+
+        assert invoke_agent_span["data"]["gen_ai.function_id"] == "my-snazzy-pipeline"
+
+        # We can't guarantee anything about the "shape" of the langchain execution graph
+        assert len(list(x for x in tx["spans"] if x["op"] == "gen_ai.chat")) > 0
+
+        # Token usage is only available in newer versions of langchain (v0.2+)
+        # where usage_metadata is supported on AIMessageChunk
+        if "gen_ai.usage.input_tokens" in chat_spans[0]["data"]:
+            assert chat_spans[0]["data"]["gen_ai.usage.input_tokens"] == 142
+            assert chat_spans[0]["data"]["gen_ai.usage.output_tokens"] == 50
+            assert chat_spans[0]["data"]["gen_ai.usage.total_tokens"] == 192
+
+        if "gen_ai.usage.input_tokens" in chat_spans[1]["data"]:
+            assert chat_spans[1]["data"]["gen_ai.usage.input_tokens"] == 89
+            assert chat_spans[1]["data"]["gen_ai.usage.output_tokens"] == 28
+            assert chat_spans[1]["data"]["gen_ai.usage.total_tokens"] == 117
+
+        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[0].get("data", {})
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[0].get("data", {})
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[0].get("data", {})
+        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[1].get("data", {})
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[1].get("data", {})
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[1].get("data", {})
+        assert SPANDATA.GEN_AI_TOOL_INPUT not in tool_exec_span.get("data", {})
+        assert SPANDATA.GEN_AI_TOOL_OUTPUT not in tool_exec_span.get("data", {})
+
+        # Verify tool calls are NOT recorded when PII is disabled
+        assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in chat_spans[0].get(
+            "data", {}
+        ), (
+            f"Tool calls should NOT be recorded when send_default_pii={send_default_pii} "
+            f"and include_prompts={include_prompts}"
+        )
+        assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in chat_spans[1].get(
+            "data", {}
+        ), (
+            f"Tool calls should NOT be recorded when send_default_pii={send_default_pii} "
+            f"and include_prompts={include_prompts}"
+        )
+
+        # Verify finish_reasons is always an array of strings
+        assert chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [
+            "function_call"
+        ]
+        assert chat_spans[1]["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [
+            "stop"
+        ]
+
+        # Verify that available tools are always recorded regardless of PII settings
+        for chat_span in chat_spans:
+            tools_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS]
+            assert tools_data is not None, (
+                "Available tools should always be recorded regardless of PII settings"
+            )
+            assert "get_word_length" in tools_data
+
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.parametrize(
     "system_instructions_content",
     [
@@ -925,12 +1311,14 @@ def test_langchain_openai_tools_agent_no_prompts(
 )
 def test_langchain_openai_tools_agent(
     sentry_init,
+    capture_events,
     capture_items,
     system_instructions_content,
     request,
     get_model_response,
     server_side_event_chunks,
     streaming_chat_completions_model_responses,
+    stream_gen_ai_spans,
 ):
     sentry_init(
         integrations=[
@@ -940,8 +1328,8 @@ def test_langchain_openai_tools_agent(
         ],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     prompt = ChatPromptTemplate.from_messages(
         [
@@ -979,117 +1367,235 @@ def test_langchain_openai_tools_agent(
 
     agent_executor = AgentExecutor(agent=agent, tools=[get_word_length], verbose=True)
 
-    with patch.object(
-        llm.client._client._client,
-        "send",
-        side_effect=[tool_response, final_response],
-    ) as _:
-        with start_transaction():
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with patch.object(
+            llm.client._client._client,
+            "send",
+            side_effect=[tool_response, final_response],
+        ) as _, start_transaction():
             list(agent_executor.stream({"input": "How many letters in the word eudca"}))
 
-    tx = next(item.payload for item in items if item.type == "transaction")
-    assert tx["type"] == "transaction"
-    assert tx["contexts"]["trace"]["origin"] == "manual"
+        tx = next(item.payload for item in items if item.type == "transaction")
 
-    spans = [item.payload for item in items if item.type == "span"]
-    invoke_agent_span = next(
-        x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.invoke_agent"
-    )
-    chat_spans = list(x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.chat")
-    tool_exec_span = next(
-        x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.execute_tool"
-    )
+        assert tx["type"] == "transaction"
+        assert tx["contexts"]["trace"]["origin"] == "manual"
 
-    assert len(chat_spans) == 2
+        spans = [item.payload for item in items if item.type == "span"]
+        invoke_agent_span = next(
+            x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.invoke_agent"
+        )
+        chat_spans = list(
+            x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.chat"
+        )
+        tool_exec_span = next(
+            x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.execute_tool"
+        )
 
-    assert invoke_agent_span["attributes"]["sentry.origin"] == "auto.ai.langchain"
-    assert chat_spans[0]["attributes"]["sentry.origin"] == "auto.ai.langchain"
-    assert chat_spans[1]["attributes"]["sentry.origin"] == "auto.ai.langchain"
-    assert tool_exec_span["attributes"]["sentry.origin"] == "auto.ai.langchain"
+        assert len(chat_spans) == 2
 
-    # We can't guarantee anything about the "shape" of the langchain execution graph
-    assert (
-        len(list(x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.chat")) > 0
-    )
+        assert invoke_agent_span["attributes"]["sentry.origin"] == "auto.ai.langchain"
+        assert chat_spans[0]["attributes"]["sentry.origin"] == "auto.ai.langchain"
+        assert chat_spans[1]["attributes"]["sentry.origin"] == "auto.ai.langchain"
+        assert tool_exec_span["attributes"]["sentry.origin"] == "auto.ai.langchain"
 
-    # Token usage is only available in newer versions of langchain (v0.2+)
-    # where usage_metadata is supported on AIMessageChunk
-    if "gen_ai.usage.input_tokens" in chat_spans[0]["attributes"]:
-        assert chat_spans[0]["attributes"]["gen_ai.usage.input_tokens"] == 142
-        assert chat_spans[0]["attributes"]["gen_ai.usage.output_tokens"] == 50
-        assert chat_spans[0]["attributes"]["gen_ai.usage.total_tokens"] == 192
+        # We can't guarantee anything about the "shape" of the langchain execution graph
+        assert (
+            len(list(x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.chat"))
+            > 0
+        )
 
-    if "gen_ai.usage.input_tokens" in chat_spans[1]["attributes"]:
-        assert chat_spans[1]["attributes"]["gen_ai.usage.input_tokens"] == 89
-        assert chat_spans[1]["attributes"]["gen_ai.usage.output_tokens"] == 28
-        assert chat_spans[1]["attributes"]["gen_ai.usage.total_tokens"] == 117
+        # Token usage is only available in newer versions of langchain (v0.2+)
+        # where usage_metadata is supported on AIMessageChunk
+        if "gen_ai.usage.input_tokens" in chat_spans[0]["attributes"]:
+            assert chat_spans[0]["attributes"]["gen_ai.usage.input_tokens"] == 142
+            assert chat_spans[0]["attributes"]["gen_ai.usage.output_tokens"] == 50
+            assert chat_spans[0]["attributes"]["gen_ai.usage.total_tokens"] == 192
 
-    assert "5" in chat_spans[0]["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
-    assert "word" in tool_exec_span["attributes"][SPANDATA.GEN_AI_TOOL_INPUT]
-    assert 5 == int(tool_exec_span["attributes"][SPANDATA.GEN_AI_TOOL_OUTPUT])
+        if "gen_ai.usage.input_tokens" in chat_spans[1]["attributes"]:
+            assert chat_spans[1]["attributes"]["gen_ai.usage.input_tokens"] == 89
+            assert chat_spans[1]["attributes"]["gen_ai.usage.output_tokens"] == 28
+            assert chat_spans[1]["attributes"]["gen_ai.usage.total_tokens"] == 117
 
-    param_id = request.node.callspec.id
-    if "string" in param_id:
-        assert [
-            {
-                "type": "text",
-                "content": "You are very powerful assistant, but don't know current events",
-            }
-        ] == json.loads(
-            chat_spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
-        )
+        assert "5" in chat_spans[0]["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+        assert "word" in tool_exec_span["attributes"][SPANDATA.GEN_AI_TOOL_INPUT]
+        assert 5 == int(tool_exec_span["attributes"][SPANDATA.GEN_AI_TOOL_OUTPUT])
+
+        param_id = request.node.callspec.id
+        if "string" in param_id:
+            assert [
+                {
+                    "type": "text",
+                    "content": "You are very powerful assistant, but don't know current events",
+                }
+            ] == json.loads(
+                chat_spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
+            )
+        else:
+            assert [
+                {
+                    "type": "text",
+                    "content": "You are a helpful assistant.",
+                },
+                {
+                    "type": "text",
+                    "content": "Be concise and clear.",
+                },
+            ] == json.loads(
+                chat_spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
+            )
+
+            assert "5" in chat_spans[1]["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+
+            # Verify tool calls are recorded when PII is enabled
+            assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in chat_spans[0].get(
+                "attributes", {}
+            ), (
+                "Tool calls should be recorded when send_default_pii=True and include_prompts=True"
+            )
+            tool_calls_data = chat_spans[0]["attributes"][
+                SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS
+            ]
+
+            assert isinstance(tool_calls_data, (list, str))  # Could be serialized
+            if isinstance(tool_calls_data, str):
+                assert "get_word_length" in tool_calls_data
+            elif isinstance(tool_calls_data, list) and len(tool_calls_data) > 0:
+                # Check if tool calls contain expected function name
+                tool_call_str = str(tool_calls_data)
+                assert "get_word_length" in tool_call_str
+
+            # Verify finish_reasons is always an array of strings
+            assert chat_spans[0]["attributes"][
+                SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS
+            ] == ["function_call"]
+            assert chat_spans[1]["attributes"][
+                SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS
+            ] == ["stop"]
+
+            # Verify that available tools are always recorded regardless of PII settings
+            for chat_span in chat_spans:
+                tools_data = chat_span["attributes"][
+                    SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS
+                ]
+                assert tools_data is not None, (
+                    "Available tools should always be recorded regardless of PII settings"
+                )
+                assert "get_word_length" in tools_data
     else:
-        assert [
-            {
-                "type": "text",
-                "content": "You are a helpful assistant.",
-            },
-            {
-                "type": "text",
-                "content": "Be concise and clear.",
-            },
-        ] == json.loads(
-            chat_spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
-        )
+        events = capture_events()
 
-    assert "5" in chat_spans[1]["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+        with patch.object(
+            llm.client._client._client,
+            "send",
+            side_effect=[tool_response, final_response],
+        ) as _, start_transaction():
+            list(agent_executor.stream({"input": "How many letters in the word eudca"}))
 
-    # Verify tool calls are recorded when PII is enabled
-    assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in chat_spans[0].get("attributes", {}), (
-        "Tool calls should be recorded when send_default_pii=True and include_prompts=True"
-    )
-    tool_calls_data = chat_spans[0]["attributes"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS]
-    assert isinstance(tool_calls_data, (list, str))  # Could be serialized
-    if isinstance(tool_calls_data, str):
-        assert "get_word_length" in tool_calls_data
-    elif isinstance(tool_calls_data, list) and len(tool_calls_data) > 0:
-        # Check if tool calls contain expected function name
-        tool_call_str = str(tool_calls_data)
-        assert "get_word_length" in tool_call_str
+        tx = events[0]
 
-    # Verify finish_reasons is always an array of strings
-    assert chat_spans[0]["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [
-        "function_call"
-    ]
-    assert chat_spans[1]["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [
-        "stop"
-    ]
+        assert tx["type"] == "transaction"
+        assert tx["contexts"]["trace"]["origin"] == "manual"
 
-    # Verify that available tools are always recorded regardless of PII settings
-    for chat_span in chat_spans:
-        tools_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS]
-        assert tools_data is not None, (
-            "Available tools should always be recorded regardless of PII settings"
+        invoke_agent_span = next(
+            x for x in tx["spans"] if x["op"] == "gen_ai.invoke_agent"
         )
-        assert "get_word_length" in tools_data
+        chat_spans = list(x for x in tx["spans"] if x["op"] == "gen_ai.chat")
+        tool_exec_span = next(
+            x for x in tx["spans"] if x["op"] == "gen_ai.execute_tool"
+        )
+
+        assert len(chat_spans) == 2
+
+        assert invoke_agent_span["origin"] == "auto.ai.langchain"
+        assert chat_spans[0]["origin"] == "auto.ai.langchain"
+        assert chat_spans[1]["origin"] == "auto.ai.langchain"
+        assert tool_exec_span["origin"] == "auto.ai.langchain"
+
+        # We can't guarantee anything about the "shape" of the langchain execution graph
+        assert len(list(x for x in tx["spans"] if x["op"] == "gen_ai.chat")) > 0
+
+        # Token usage is only available in newer versions of langchain (v0.2+)
+        # where usage_metadata is supported on AIMessageChunk
+        if "gen_ai.usage.input_tokens" in chat_spans[0]["data"]:
+            assert chat_spans[0]["data"]["gen_ai.usage.input_tokens"] == 142
+            assert chat_spans[0]["data"]["gen_ai.usage.output_tokens"] == 50
+            assert chat_spans[0]["data"]["gen_ai.usage.total_tokens"] == 192
+
+        if "gen_ai.usage.input_tokens" in chat_spans[1]["data"]:
+            assert chat_spans[1]["data"]["gen_ai.usage.input_tokens"] == 89
+            assert chat_spans[1]["data"]["gen_ai.usage.output_tokens"] == 28
+            assert chat_spans[1]["data"]["gen_ai.usage.total_tokens"] == 117
+
+        assert "5" in chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+        assert "word" in tool_exec_span["data"][SPANDATA.GEN_AI_TOOL_INPUT]
+        assert 5 == int(tool_exec_span["data"][SPANDATA.GEN_AI_TOOL_OUTPUT])
+
+        param_id = request.node.callspec.id
+        if "string" in param_id:
+            assert [
+                {
+                    "type": "text",
+                    "content": "You are very powerful assistant, but don't know current events",
+                }
+            ] == json.loads(chat_spans[0]["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS])
+        else:
+            assert [
+                {
+                    "type": "text",
+                    "content": "You are a helpful assistant.",
+                },
+                {
+                    "type": "text",
+                    "content": "Be concise and clear.",
+                },
+            ] == json.loads(chat_spans[0]["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS])
+
+            assert "5" in chat_spans[1]["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+
+            # Verify tool calls are recorded when PII is enabled
+            assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in chat_spans[0].get(
+                "data", {}
+            ), (
+                "Tool calls should be recorded when send_default_pii=True and include_prompts=True"
+            )
+            tool_calls_data = chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS]
+
+            assert isinstance(tool_calls_data, (list, str))  # Could be serialized
+            if isinstance(tool_calls_data, str):
+                assert "get_word_length" in tool_calls_data
+            elif isinstance(tool_calls_data, list) and len(tool_calls_data) > 0:
+                # Check if tool calls contain expected function name
+                tool_call_str = str(tool_calls_data)
+                assert "get_word_length" in tool_call_str
+
+            # Verify finish_reasons is always an array of strings
+            assert chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [
+                "function_call"
+            ]
+            assert chat_spans[1]["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [
+                "stop"
+            ]
+
+            # Verify that available tools are always recorded regardless of PII settings
+            for chat_span in chat_spans:
+                tools_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS]
+                assert tools_data is not None, (
+                    "Available tools should always be recorded regardless of PII settings"
+                )
+                assert "get_word_length" in tools_data
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 def test_langchain_openai_tools_agent_with_config(
     sentry_init,
+    capture_events,
     capture_items,
     get_model_response,
     server_side_event_chunks,
     streaming_chat_completions_model_responses,
+    stream_gen_ai_spans,
 ):
     sentry_init(
         integrations=[
@@ -1099,8 +1605,8 @@ def test_langchain_openai_tools_agent_with_config(
         ],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     prompt = ChatPromptTemplate.from_messages(
         [
@@ -1140,29 +1646,59 @@ def test_langchain_openai_tools_agent_with_config(
 
     agent_executor = AgentExecutor(agent=agent, tools=[get_word_length], verbose=True)
 
-    with patch.object(
-        llm.client._client._client,
-        "send",
-        side_effect=[tool_response, final_response],
-    ) as _:
-        with start_transaction():
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with patch.object(
+            llm.client._client._client,
+            "send",
+            side_effect=[tool_response, final_response],
+        ) as _, start_transaction():
             list(
                 agent_executor.invoke(
                     {"input": "How many letters in the word eudca"},
                 )
             )
 
-    tx = next(item.payload for item in items if item.type == "transaction")
-    assert tx["type"] == "transaction"
-    assert tx["contexts"]["trace"]["origin"] == "manual"
+        tx = next(item.payload for item in items if item.type == "transaction")
 
-    spans = [item.payload for item in items if item.type == "span"]
-    invoke_agent_span = next(
-        x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.invoke_agent"
-    )
-    assert invoke_agent_span["attributes"]["gen_ai.function_id"] == "my-snazzy-pipeline"
+        assert tx["type"] == "transaction"
+        assert tx["contexts"]["trace"]["origin"] == "manual"
 
+        spans = [item.payload for item in items if item.type == "span"]
+        invoke_agent_span = next(
+            x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.invoke_agent"
+        )
+        assert (
+            invoke_agent_span["attributes"]["gen_ai.function_id"]
+            == "my-snazzy-pipeline"
+        )
+    else:
+        events = capture_events()
 
+        with patch.object(
+            llm.client._client._client,
+            "send",
+            side_effect=[tool_response, final_response],
+        ) as _, start_transaction():
+            list(
+                agent_executor.invoke(
+                    {"input": "How many letters in the word eudca"},
+                )
+            )
+
+        tx = events[0]
+
+        assert tx["type"] == "transaction"
+        assert tx["contexts"]["trace"]["origin"] == "manual"
+
+        invoke_agent_span = next(
+            x for x in tx["spans"] if x["op"] == "gen_ai.invoke_agent"
+        )
+        assert invoke_agent_span["data"]["gen_ai.function_id"] == "my-snazzy-pipeline"
+
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.parametrize(
     "send_default_pii, include_prompts",
     [
@@ -1173,12 +1709,14 @@ def test_langchain_openai_tools_agent_with_config(
 )
 def test_langchain_openai_tools_agent_stream_no_prompts(
     sentry_init,
+    capture_events,
     capture_items,
     send_default_pii,
     include_prompts,
     get_model_response,
     server_side_event_chunks,
     streaming_chat_completions_model_responses,
+    stream_gen_ai_spans,
 ):
     sentry_init(
         integrations=[
@@ -1188,8 +1726,8 @@ def test_langchain_openai_tools_agent_stream_no_prompts(
         ],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     prompt = ChatPromptTemplate.from_messages(
         [
@@ -1227,12 +1765,14 @@ def test_langchain_openai_tools_agent_stream_no_prompts(
 
     agent_executor = AgentExecutor(agent=agent, tools=[get_word_length], verbose=True)
 
-    with patch.object(
-        llm.client._client._client,
-        "send",
-        side_effect=[tool_response, final_response],
-    ) as _:
-        with start_transaction():
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with patch.object(
+            llm.client._client._client,
+            "send",
+            side_effect=[tool_response, final_response],
+        ) as _, start_transaction():
             list(
                 agent_executor.stream(
                     {"input": "How many letters in the word eudca"},
@@ -1240,90 +1780,195 @@ def test_langchain_openai_tools_agent_stream_no_prompts(
                 )
             )
 
-    tx = next(item.payload for item in items if item.type == "transaction")
-    assert tx["type"] == "transaction"
-    assert tx["contexts"]["trace"]["origin"] == "manual"
+        tx = next(item.payload for item in items if item.type == "transaction")
 
-    spans = [item.payload for item in items if item.type == "span"]
-    invoke_agent_span = next(
-        x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.invoke_agent"
-    )
-    chat_spans = list(x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.chat")
-    tool_exec_span = next(
-        x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.execute_tool"
-    )
+        assert tx["type"] == "transaction"
+        assert tx["contexts"]["trace"]["origin"] == "manual"
 
-    assert len(chat_spans) == 2
+        spans = [item.payload for item in items if item.type == "span"]
+        invoke_agent_span = next(
+            x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.invoke_agent"
+        )
+        chat_spans = list(
+            x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.chat"
+        )
+        tool_exec_span = next(
+            x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.execute_tool"
+        )
 
-    assert invoke_agent_span["attributes"]["sentry.origin"] == "auto.ai.langchain"
-    assert chat_spans[0]["attributes"]["sentry.origin"] == "auto.ai.langchain"
-    assert chat_spans[1]["attributes"]["sentry.origin"] == "auto.ai.langchain"
-    assert tool_exec_span["attributes"]["sentry.origin"] == "auto.ai.langchain"
+        assert len(chat_spans) == 2
 
-    assert invoke_agent_span["attributes"]["gen_ai.function_id"] == "my-snazzy-pipeline"
+        assert invoke_agent_span["attributes"]["sentry.origin"] == "auto.ai.langchain"
+        assert chat_spans[0]["attributes"]["sentry.origin"] == "auto.ai.langchain"
+        assert chat_spans[1]["attributes"]["sentry.origin"] == "auto.ai.langchain"
+        assert tool_exec_span["attributes"]["sentry.origin"] == "auto.ai.langchain"
 
-    spans = [item.payload for item in items if item.type == "span"]
-    # We can't guarantee anything about the "shape" of the langchain execution graph
-    assert (
-        len(list(x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.chat")) > 0
-    )
+        assert (
+            invoke_agent_span["attributes"]["gen_ai.function_id"]
+            == "my-snazzy-pipeline"
+        )
 
-    # Token usage is only available in newer versions of langchain (v0.2+)
-    # where usage_metadata is supported on AIMessageChunk
-    if "gen_ai.usage.input_tokens" in chat_spans[0]["attributes"]:
-        assert chat_spans[0]["attributes"]["gen_ai.usage.input_tokens"] == 142
-        assert chat_spans[0]["attributes"]["gen_ai.usage.output_tokens"] == 50
-        assert chat_spans[0]["attributes"]["gen_ai.usage.total_tokens"] == 192
+        spans = [item.payload for item in items if item.type == "span"]
+        # We can't guarantee anything about the "shape" of the langchain execution graph
+        assert (
+            len(list(x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.chat"))
+            > 0
+        )
 
-    if "gen_ai.usage.input_tokens" in chat_spans[1]["attributes"]:
-        assert chat_spans[1]["attributes"]["gen_ai.usage.input_tokens"] == 89
-        assert chat_spans[1]["attributes"]["gen_ai.usage.output_tokens"] == 28
-        assert chat_spans[1]["attributes"]["gen_ai.usage.total_tokens"] == 117
+        # Token usage is only available in newer versions of langchain (v0.2+)
+        # where usage_metadata is supported on AIMessageChunk
+        if "gen_ai.usage.input_tokens" in chat_spans[0]["attributes"]:
+            assert chat_spans[0]["attributes"]["gen_ai.usage.input_tokens"] == 142
+            assert chat_spans[0]["attributes"]["gen_ai.usage.output_tokens"] == 50
+            assert chat_spans[0]["attributes"]["gen_ai.usage.total_tokens"] == 192
 
-    assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[0].get(
-        "attributes", {}
-    )
-    assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[0].get("attributes", {})
-    assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[0].get("attributes", {})
-    assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[1].get(
-        "attributes", {}
-    )
-    assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[1].get("attributes", {})
-    assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[1].get("attributes", {})
-    assert SPANDATA.GEN_AI_TOOL_INPUT not in tool_exec_span.get("attributes", {})
-    assert SPANDATA.GEN_AI_TOOL_OUTPUT not in tool_exec_span.get("attributes", {})
+        if "gen_ai.usage.input_tokens" in chat_spans[1]["attributes"]:
+            assert chat_spans[1]["attributes"]["gen_ai.usage.input_tokens"] == 89
+            assert chat_spans[1]["attributes"]["gen_ai.usage.output_tokens"] == 28
+            assert chat_spans[1]["attributes"]["gen_ai.usage.total_tokens"] == 117
 
-    # Verify tool calls are NOT recorded when PII is disabled
-    assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in chat_spans[0].get(
-        "attributes", {}
-    ), (
-        f"Tool calls should NOT be recorded when send_default_pii={send_default_pii} "
-        f"and include_prompts={include_prompts}"
-    )
-    assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in chat_spans[1].get(
-        "attributes", {}
-    ), (
-        f"Tool calls should NOT be recorded when send_default_pii={send_default_pii} "
-        f"and include_prompts={include_prompts}"
-    )
+        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[0].get(
+            "attributes", {}
+        )
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[0].get(
+            "attributes", {}
+        )
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[0].get("attributes", {})
+        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[1].get(
+            "attributes", {}
+        )
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[1].get(
+            "attributes", {}
+        )
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[1].get("attributes", {})
+        assert SPANDATA.GEN_AI_TOOL_INPUT not in tool_exec_span.get("attributes", {})
+        assert SPANDATA.GEN_AI_TOOL_OUTPUT not in tool_exec_span.get("attributes", {})
 
-    # Verify finish_reasons is always an array of strings
-    assert chat_spans[0]["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [
-        "function_call"
-    ]
-    assert chat_spans[1]["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [
-        "stop"
-    ]
+        # Verify tool calls are NOT recorded when PII is disabled
+        assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in chat_spans[0].get(
+            "attributes", {}
+        ), (
+            f"Tool calls should NOT be recorded when send_default_pii={send_default_pii} "
+            f"and include_prompts={include_prompts}"
+        )
+        assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in chat_spans[1].get(
+            "attributes", {}
+        ), (
+            f"Tool calls should NOT be recorded when send_default_pii={send_default_pii} "
+            f"and include_prompts={include_prompts}"
+        )
 
-    # Verify that available tools are always recorded regardless of PII settings
-    for chat_span in chat_spans:
-        tools_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS]
-        assert tools_data is not None, (
-            "Available tools should always be recorded regardless of PII settings"
+        # Verify finish_reasons is always an array of strings
+        assert chat_spans[0]["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [
+            "function_call"
+        ]
+        assert chat_spans[1]["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [
+            "stop"
+        ]
+
+        # Verify that available tools are always recorded regardless of PII settings
+        for chat_span in chat_spans:
+            tools_data = chat_span["attributes"][
+                SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS
+            ]
+
+            assert tools_data is not None, (
+                "Available tools should always be recorded regardless of PII settings"
+            )
+            assert "get_word_length" in tools_data
+    else:
+        events = capture_events()
+
+        with patch.object(
+            llm.client._client._client,
+            "send",
+            side_effect=[tool_response, final_response],
+        ) as _, start_transaction():
+            list(
+                agent_executor.stream(
+                    {"input": "How many letters in the word eudca"},
+                    {"run_name": "my-snazzy-pipeline"},
+                )
+            )
+
+        tx = events[0]
+
+        assert tx["type"] == "transaction"
+        assert tx["contexts"]["trace"]["origin"] == "manual"
+
+        invoke_agent_span = next(
+            x for x in tx["spans"] if x["op"] == "gen_ai.invoke_agent"
         )
-        assert "get_word_length" in tools_data
+        chat_spans = list(x for x in tx["spans"] if x["op"] == "gen_ai.chat")
+        tool_exec_span = next(
+            x for x in tx["spans"] if x["op"] == "gen_ai.execute_tool"
+        )
+
+        assert len(chat_spans) == 2
+
+        assert invoke_agent_span["origin"] == "auto.ai.langchain"
+        assert chat_spans[0]["origin"] == "auto.ai.langchain"
+        assert chat_spans[1]["origin"] == "auto.ai.langchain"
+        assert tool_exec_span["origin"] == "auto.ai.langchain"
+
+        assert invoke_agent_span["data"]["gen_ai.function_id"] == "my-snazzy-pipeline"
 
+        # We can't guarantee anything about the "shape" of the langchain execution graph
+        assert len(list(x for x in tx["spans"] if x["op"] == "gen_ai.chat")) > 0
+
+        # Token usage is only available in newer versions of langchain (v0.2+)
+        # where usage_metadata is supported on AIMessageChunk
+        if "gen_ai.usage.input_tokens" in chat_spans[0]["data"]:
+            assert chat_spans[0]["data"]["gen_ai.usage.input_tokens"] == 142
+            assert chat_spans[0]["data"]["gen_ai.usage.output_tokens"] == 50
+            assert chat_spans[0]["data"]["gen_ai.usage.total_tokens"] == 192
+
+        if "gen_ai.usage.input_tokens" in chat_spans[1]["data"]:
+            assert chat_spans[1]["data"]["gen_ai.usage.input_tokens"] == 89
+            assert chat_spans[1]["data"]["gen_ai.usage.output_tokens"] == 28
+            assert chat_spans[1]["data"]["gen_ai.usage.total_tokens"] == 117
+
+        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[0].get("data", {})
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[0].get("data", {})
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[0].get("data", {})
+        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[1].get("data", {})
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[1].get("data", {})
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[1].get("data", {})
+        assert SPANDATA.GEN_AI_TOOL_INPUT not in tool_exec_span.get("data", {})
+        assert SPANDATA.GEN_AI_TOOL_OUTPUT not in tool_exec_span.get("data", {})
+
+        # Verify tool calls are NOT recorded when PII is disabled
+        assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in chat_spans[0].get(
+            "data", {}
+        ), (
+            f"Tool calls should NOT be recorded when send_default_pii={send_default_pii} "
+            f"and include_prompts={include_prompts}"
+        )
+        assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in chat_spans[1].get(
+            "data", {}
+        ), (
+            f"Tool calls should NOT be recorded when send_default_pii={send_default_pii} "
+            f"and include_prompts={include_prompts}"
+        )
+
+        # Verify finish_reasons is always an array of strings
+        assert chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [
+            "function_call"
+        ]
+        assert chat_spans[1]["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [
+            "stop"
+        ]
 
+        # Verify that available tools are always recorded regardless of PII settings
+        for chat_span in chat_spans:
+            tools_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS]
+            assert tools_data is not None, (
+                "Available tools should always be recorded regardless of PII settings"
+            )
+            assert "get_word_length" in tools_data
+
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.parametrize(
     "system_instructions_content",
     [
@@ -1338,12 +1983,14 @@ def test_langchain_openai_tools_agent_stream_no_prompts(
 )
 def test_langchain_openai_tools_agent_stream(
     sentry_init,
+    capture_events,
     capture_items,
     system_instructions_content,
     request,
     get_model_response,
     server_side_event_chunks,
     streaming_chat_completions_model_responses,
+    stream_gen_ai_spans,
 ):
     sentry_init(
         integrations=[
@@ -1353,8 +2000,8 @@ def test_langchain_openai_tools_agent_stream(
         ],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     prompt = ChatPromptTemplate.from_messages(
         [
@@ -1392,12 +2039,14 @@ def test_langchain_openai_tools_agent_stream(
 
     agent_executor = AgentExecutor(agent=agent, tools=[get_word_length], verbose=True)
 
-    with patch.object(
-        llm.client._client._client,
-        "send",
-        side_effect=[tool_response, final_response],
-    ) as _:
-        with start_transaction():
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with patch.object(
+            llm.client._client._client,
+            "send",
+            side_effect=[tool_response, final_response],
+        ) as _, start_transaction():
             list(
                 agent_executor.stream(
                     {"input": "How many letters in the word eudca"},
@@ -1405,111 +2054,236 @@ def test_langchain_openai_tools_agent_stream(
                 )
             )
 
-    tx = next(item.payload for item in items if item.type == "transaction")
-    assert tx["type"] == "transaction"
-    assert tx["contexts"]["trace"]["origin"] == "manual"
+        tx = next(item.payload for item in items if item.type == "transaction")
 
-    spans = [item.payload for item in items if item.type == "span"]
-    invoke_agent_span = next(
-        x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.invoke_agent"
-    )
-    chat_spans = list(x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.chat")
-    tool_exec_span = next(
-        x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.execute_tool"
-    )
+        assert tx["type"] == "transaction"
+        assert tx["contexts"]["trace"]["origin"] == "manual"
 
-    assert len(chat_spans) == 2
+        spans = [item.payload for item in items if item.type == "span"]
+        invoke_agent_span = next(
+            x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.invoke_agent"
+        )
+        chat_spans = list(
+            x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.chat"
+        )
+        tool_exec_span = next(
+            x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.execute_tool"
+        )
 
-    assert invoke_agent_span["attributes"]["sentry.origin"] == "auto.ai.langchain"
-    assert chat_spans[0]["attributes"]["sentry.origin"] == "auto.ai.langchain"
-    assert chat_spans[1]["attributes"]["sentry.origin"] == "auto.ai.langchain"
-    assert tool_exec_span["attributes"]["sentry.origin"] == "auto.ai.langchain"
+        assert len(chat_spans) == 2
 
-    assert invoke_agent_span["attributes"]["gen_ai.function_id"] == "my-snazzy-pipeline"
+        assert invoke_agent_span["attributes"]["sentry.origin"] == "auto.ai.langchain"
+        assert chat_spans[0]["attributes"]["sentry.origin"] == "auto.ai.langchain"
+        assert chat_spans[1]["attributes"]["sentry.origin"] == "auto.ai.langchain"
+        assert tool_exec_span["attributes"]["sentry.origin"] == "auto.ai.langchain"
 
-    # We can't guarantee anything about the "shape" of the langchain execution graph
-    assert (
-        len(list(x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.chat")) > 0
-    )
+        assert (
+            invoke_agent_span["attributes"]["gen_ai.function_id"]
+            == "my-snazzy-pipeline"
+        )
 
-    # Token usage is only available in newer versions of langchain (v0.2+)
-    # where usage_metadata is supported on AIMessageChunk
-    if "gen_ai.usage.input_tokens" in chat_spans[0]["attributes"]:
-        assert chat_spans[0]["attributes"]["gen_ai.usage.input_tokens"] == 142
-        assert chat_spans[0]["attributes"]["gen_ai.usage.output_tokens"] == 50
-        assert chat_spans[0]["attributes"]["gen_ai.usage.total_tokens"] == 192
+        # We can't guarantee anything about the "shape" of the langchain execution graph
+        assert (
+            len(list(x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.chat"))
+            > 0
+        )
 
-    if "gen_ai.usage.input_tokens" in chat_spans[1]["attributes"]:
-        assert chat_spans[1]["attributes"]["gen_ai.usage.input_tokens"] == 89
-        assert chat_spans[1]["attributes"]["gen_ai.usage.output_tokens"] == 28
-        assert chat_spans[1]["attributes"]["gen_ai.usage.total_tokens"] == 117
+        # Token usage is only available in newer versions of langchain (v0.2+)
+        # where usage_metadata is supported on AIMessageChunk
+        if "gen_ai.usage.input_tokens" in chat_spans[0]["attributes"]:
+            assert chat_spans[0]["attributes"]["gen_ai.usage.input_tokens"] == 142
+            assert chat_spans[0]["attributes"]["gen_ai.usage.output_tokens"] == 50
+            assert chat_spans[0]["attributes"]["gen_ai.usage.total_tokens"] == 192
 
-    assert "5" in chat_spans[0]["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
-    assert "word" in tool_exec_span["attributes"][SPANDATA.GEN_AI_TOOL_INPUT]
-    assert 5 == int(tool_exec_span["attributes"][SPANDATA.GEN_AI_TOOL_OUTPUT])
+        if "gen_ai.usage.input_tokens" in chat_spans[1]["attributes"]:
+            assert chat_spans[1]["attributes"]["gen_ai.usage.input_tokens"] == 89
+            assert chat_spans[1]["attributes"]["gen_ai.usage.output_tokens"] == 28
+            assert chat_spans[1]["attributes"]["gen_ai.usage.total_tokens"] == 117
 
-    param_id = request.node.callspec.id
-    if "string" in param_id:
-        assert [
-            {
-                "type": "text",
-                "content": "You are very powerful assistant, but don't know current events",
-            }
-        ] == json.loads(
-            chat_spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
-        )
+        assert "5" in chat_spans[0]["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+        assert "word" in tool_exec_span["attributes"][SPANDATA.GEN_AI_TOOL_INPUT]
+        assert 5 == int(tool_exec_span["attributes"][SPANDATA.GEN_AI_TOOL_OUTPUT])
+
+        param_id = request.node.callspec.id
+        if "string" in param_id:
+            assert [
+                {
+                    "type": "text",
+                    "content": "You are very powerful assistant, but don't know current events",
+                }
+            ] == json.loads(
+                chat_spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
+            )
+        else:
+            assert [
+                {
+                    "type": "text",
+                    "content": "You are a helpful assistant.",
+                },
+                {
+                    "type": "text",
+                    "content": "Be concise and clear.",
+                },
+            ] == json.loads(
+                chat_spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
+            )
+
+            assert "5" in chat_spans[1]["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+
+            # Verify tool calls are recorded when PII is enabled
+            assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in chat_spans[0].get(
+                "attributes", {}
+            ), (
+                "Tool calls should be recorded when send_default_pii=True and include_prompts=True"
+            )
+            tool_calls_data = chat_spans[0]["attributes"][
+                SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS
+            ]
+
+            assert isinstance(tool_calls_data, (list, str))  # Could be serialized
+            if isinstance(tool_calls_data, str):
+                assert "get_word_length" in tool_calls_data
+            elif isinstance(tool_calls_data, list) and len(tool_calls_data) > 0:
+                # Check if tool calls contain expected function name
+                tool_call_str = str(tool_calls_data)
+                assert "get_word_length" in tool_call_str
+
+            # Verify finish_reasons is always an array of strings
+            assert chat_spans[0]["attributes"][
+                SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS
+            ] == ["function_call"]
+            assert chat_spans[1]["attributes"][
+                SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS
+            ] == ["stop"]
+
+            # Verify that available tools are always recorded regardless of PII settings
+            for chat_span in chat_spans:
+                tools_data = chat_span["attributes"][
+                    SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS
+                ]
+                assert tools_data is not None, (
+                    "Available tools should always be recorded regardless of PII settings"
+                )
+                assert "get_word_length" in tools_data
     else:
-        assert [
-            {
-                "type": "text",
-                "content": "You are a helpful assistant.",
-            },
-            {
-                "type": "text",
-                "content": "Be concise and clear.",
-            },
-        ] == json.loads(
-            chat_spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
+        events = capture_events()
+
+        with patch.object(
+            llm.client._client._client,
+            "send",
+            side_effect=[tool_response, final_response],
+        ) as _, start_transaction():
+            list(
+                agent_executor.stream(
+                    {"input": "How many letters in the word eudca"},
+                    {"run_name": "my-snazzy-pipeline"},
+                )
+            )
+
+        tx = events[0]
+
+        assert tx["type"] == "transaction"
+        assert tx["contexts"]["trace"]["origin"] == "manual"
+
+        invoke_agent_span = next(
+            x for x in tx["spans"] if x["op"] == "gen_ai.invoke_agent"
+        )
+        chat_spans = list(x for x in tx["spans"] if x["op"] == "gen_ai.chat")
+        tool_exec_span = next(
+            x for x in tx["spans"] if x["op"] == "gen_ai.execute_tool"
         )
 
-    assert "5" in chat_spans[1]["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+        assert len(chat_spans) == 2
+
+        assert invoke_agent_span["origin"] == "auto.ai.langchain"
+        assert chat_spans[0]["origin"] == "auto.ai.langchain"
+        assert chat_spans[1]["origin"] == "auto.ai.langchain"
+        assert tool_exec_span["origin"] == "auto.ai.langchain"
+
+        assert invoke_agent_span["data"]["gen_ai.function_id"] == "my-snazzy-pipeline"
+
+        # We can't guarantee anything about the "shape" of the langchain execution graph
+        assert len(list(x for x in tx["spans"] if x["op"] == "gen_ai.chat")) > 0
+
+        # Token usage is only available in newer versions of langchain (v0.2+)
+        # where usage_metadata is supported on AIMessageChunk
+        if "gen_ai.usage.input_tokens" in chat_spans[0]["data"]:
+            assert chat_spans[0]["data"]["gen_ai.usage.input_tokens"] == 142
+            assert chat_spans[0]["data"]["gen_ai.usage.output_tokens"] == 50
+            assert chat_spans[0]["data"]["gen_ai.usage.total_tokens"] == 192
 
-    # Verify tool calls are recorded when PII is enabled
-    assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in chat_spans[0].get("attributes", {}), (
-        "Tool calls should be recorded when send_default_pii=True and include_prompts=True"
-    )
-    tool_calls_data = chat_spans[0]["attributes"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS]
-    assert isinstance(tool_calls_data, (list, str))  # Could be serialized
-    if isinstance(tool_calls_data, str):
-        assert "get_word_length" in tool_calls_data
-    elif isinstance(tool_calls_data, list) and len(tool_calls_data) > 0:
-        # Check if tool calls contain expected function name
-        tool_call_str = str(tool_calls_data)
-        assert "get_word_length" in tool_call_str
+        if "gen_ai.usage.input_tokens" in chat_spans[1]["data"]:
+            assert chat_spans[1]["data"]["gen_ai.usage.input_tokens"] == 89
+            assert chat_spans[1]["data"]["gen_ai.usage.output_tokens"] == 28
+            assert chat_spans[1]["data"]["gen_ai.usage.total_tokens"] == 117
 
-    # Verify finish_reasons is always an array of strings
-    assert chat_spans[0]["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [
-        "function_call"
-    ]
-    assert chat_spans[1]["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [
-        "stop"
-    ]
+        assert "5" in chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+        assert "word" in tool_exec_span["data"][SPANDATA.GEN_AI_TOOL_INPUT]
+        assert 5 == int(tool_exec_span["data"][SPANDATA.GEN_AI_TOOL_OUTPUT])
 
-    # Verify that available tools are always recorded regardless of PII settings
-    for chat_span in chat_spans:
-        tools_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS]
-        assert tools_data is not None, (
-            "Available tools should always be recorded regardless of PII settings"
-        )
-        assert "get_word_length" in tools_data
+        param_id = request.node.callspec.id
+        if "string" in param_id:
+            assert [
+                {
+                    "type": "text",
+                    "content": "You are very powerful assistant, but don't know current events",
+                }
+            ] == json.loads(chat_spans[0]["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS])
+        else:
+            assert [
+                {
+                    "type": "text",
+                    "content": "You are a helpful assistant.",
+                },
+                {
+                    "type": "text",
+                    "content": "Be concise and clear.",
+                },
+            ] == json.loads(chat_spans[0]["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS])
+
+            assert "5" in chat_spans[1]["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+
+            # Verify tool calls are recorded when PII is enabled
+            assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in chat_spans[0].get(
+                "data", {}
+            ), (
+                "Tool calls should be recorded when send_default_pii=True and include_prompts=True"
+            )
+            tool_calls_data = chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS]
+            assert isinstance(tool_calls_data, (list, str))  # Could be serialized
+            if isinstance(tool_calls_data, str):
+                assert "get_word_length" in tool_calls_data
+            elif isinstance(tool_calls_data, list) and len(tool_calls_data) > 0:
+                # Check if tool calls contain expected function name
+                tool_call_str = str(tool_calls_data)
+                assert "get_word_length" in tool_call_str
+
+            # Verify finish_reasons is always an array of strings
+            assert chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [
+                "function_call"
+            ]
+            assert chat_spans[1]["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [
+                "stop"
+            ]
+
+            # Verify that available tools are always recorded regardless of PII settings
+            for chat_span in chat_spans:
+                tools_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS]
+                assert tools_data is not None, (
+                    "Available tools should always be recorded regardless of PII settings"
+                )
+                assert "get_word_length" in tools_data
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 def test_langchain_openai_tools_agent_stream_with_config(
     sentry_init,
+    capture_events,
     capture_items,
     get_model_response,
     server_side_event_chunks,
     streaming_chat_completions_model_responses,
+    stream_gen_ai_spans,
 ):
     sentry_init(
         integrations=[
@@ -1519,8 +2293,8 @@ def test_langchain_openai_tools_agent_stream_with_config(
         ],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     prompt = ChatPromptTemplate.from_messages(
         [
@@ -1560,30 +2334,65 @@ def test_langchain_openai_tools_agent_stream_with_config(
 
     agent_executor = AgentExecutor(agent=agent, tools=[get_word_length], verbose=True)
 
-    with patch.object(
-        llm.client._client._client,
-        "send",
-        side_effect=[tool_response, final_response],
-    ) as _:
-        with start_transaction():
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with patch.object(
+            llm.client._client._client,
+            "send",
+            side_effect=[tool_response, final_response],
+        ) as _, start_transaction():
             list(
                 agent_executor.stream(
                     {"input": "How many letters in the word eudca"},
                 )
             )
 
-    tx = next(item.payload for item in items if item.type == "transaction")
-    assert tx["type"] == "transaction"
-    assert tx["contexts"]["trace"]["origin"] == "manual"
+        tx = next(item.payload for item in items if item.type == "transaction")
 
-    spans = [item.payload for item in items if item.type == "span"]
-    invoke_agent_span = next(
-        x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.invoke_agent"
-    )
-    assert invoke_agent_span["attributes"]["gen_ai.function_id"] == "my-snazzy-pipeline"
+        assert tx["type"] == "transaction"
+        assert tx["contexts"]["trace"]["origin"] == "manual"
+
+        spans = [item.payload for item in items if item.type == "span"]
+        invoke_agent_span = next(
+            x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.invoke_agent"
+        )
+        assert (
+            invoke_agent_span["attributes"]["gen_ai.function_id"]
+            == "my-snazzy-pipeline"
+        )
+    else:
+        events = capture_events()
+
+        with patch.object(
+            llm.client._client._client,
+            "send",
+            side_effect=[tool_response, final_response],
+        ) as _, start_transaction():
+            list(
+                agent_executor.stream(
+                    {"input": "How many letters in the word eudca"},
+                )
+            )
+
+        tx = events[0]
+
+        assert tx["type"] == "transaction"
+        assert tx["contexts"]["trace"]["origin"] == "manual"
+
+        invoke_agent_span = next(
+            x for x in tx["spans"] if x["op"] == "gen_ai.invoke_agent"
+        )
+        assert invoke_agent_span["data"]["gen_ai.function_id"] == "my-snazzy-pipeline"
 
 
-def test_langchain_error(sentry_init, capture_items):
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_langchain_error(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     global llm_type
     llm_type = "acme-llm"
 
@@ -1591,8 +2400,8 @@ def test_langchain_error(sentry_init, capture_items):
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("event")
 
     prompt = ChatPromptTemplate.from_messages(
         [
@@ -1615,57 +2424,114 @@ def test_langchain_error(sentry_init, capture_items):
 
     agent_executor = AgentExecutor(agent=agent, tools=[get_word_length], verbose=True)
 
-    with start_transaction(), pytest.raises(ValueError):
-        list(agent_executor.stream({"input": "How many letters in the word eudca"}))
+    if stream_gen_ai_spans:
+        items = capture_items("event")
+
+        with start_transaction(), pytest.raises(ValueError):
+            list(agent_executor.stream({"input": "How many letters in the word eudca"}))
+
+        (error,) = (item.payload for item in items if item.type == "event")
+    else:
+        events = capture_events()
+
+        with start_transaction(), pytest.raises(ValueError):
+            list(agent_executor.stream({"input": "How many letters in the word eudca"}))
 
-    (error,) = (item.payload for item in items if item.type == "event")
+        error = events[0]
     assert error["level"] == "error"
 
 
-def test_span_status_error(sentry_init, capture_items):
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_span_status_error(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     global llm_type
     llm_type = "acme-llm"
 
     sentry_init(
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("event", "transaction", "span")
+    if stream_gen_ai_spans:
+        items = capture_items("event", "transaction", "span")
+
+        with start_transaction(name="test"):
+            prompt = ChatPromptTemplate.from_messages(
+                [
+                    (
+                        "system",
+                        "You are very powerful assistant, but don't know current events",
+                    ),
+                    ("user", "{input}"),
+                    MessagesPlaceholder(variable_name="agent_scratchpad"),
+                ]
+            )
+            llm = MockOpenAI(
+                model_name="gpt-3.5-turbo",
+                temperature=0,
+                openai_api_key="badkey",
+            )
+            agent = create_openai_tools_agent(llm, [get_word_length], prompt)
 
-    with start_transaction(name="test"):
-        prompt = ChatPromptTemplate.from_messages(
-            [
-                (
-                    "system",
-                    "You are very powerful assistant, but don't know current events",
-                ),
-                ("user", "{input}"),
-                MessagesPlaceholder(variable_name="agent_scratchpad"),
-            ]
-        )
-        global stream_result_mock
-        stream_result_mock = Mock(side_effect=ValueError("API rate limit error"))
-        llm = MockOpenAI(
-            model_name="gpt-3.5-turbo",
-            temperature=0,
-            openai_api_key="badkey",
-        )
-        agent = create_openai_tools_agent(llm, [get_word_length], prompt)
+            agent_executor = AgentExecutor(
+                agent=agent, tools=[get_word_length], verbose=True
+            )
 
-        agent_executor = AgentExecutor(
-            agent=agent, tools=[get_word_length], verbose=True
-        )
+            with pytest.raises(ValueError):
+                list(
+                    agent_executor.stream(
+                        {"input": "How many letters in the word eudca"}
+                    )
+                )
 
-        with pytest.raises(ValueError):
-            list(agent_executor.stream({"input": "How many letters in the word eudca"}))
+        (error,) = (item.payload for item in items if item.type == "event")
+        assert error["level"] == "error"
 
-    (error,) = (item.payload for item in items if item.type == "event")
-    assert error["level"] == "error"
+        spans = [item.payload for item in items if item.type == "span"]
+        assert spans[0]["status"] == "error"
+
+        (transaction,) = (item.payload for item in items if item.type == "transaction")
+    else:
+        events = capture_events()
+
+        with start_transaction(name="test"):
+            prompt = ChatPromptTemplate.from_messages(
+                [
+                    (
+                        "system",
+                        "You are very powerful assistant, but don't know current events",
+                    ),
+                    ("user", "{input}"),
+                    MessagesPlaceholder(variable_name="agent_scratchpad"),
+                ]
+            )
+            llm = MockOpenAI(
+                model_name="gpt-3.5-turbo",
+                temperature=0,
+                openai_api_key="badkey",
+            )
+            agent = create_openai_tools_agent(llm, [get_word_length], prompt)
+
+            agent_executor = AgentExecutor(
+                agent=agent, tools=[get_word_length], verbose=True
+            )
+
+            with pytest.raises(ValueError):
+                list(
+                    agent_executor.stream(
+                        {"input": "How many letters in the word eudca"}
+                    )
+                )
 
-    spans = [item.payload for item in items if item.type == "span"]
-    assert spans[0]["status"] == "error"
+        (error, transaction) = events
+        assert error["level"] == "error"
+        assert transaction["spans"][0]["status"] == "internal_error"
+        assert transaction["spans"][0]["tags"]["status"] == "internal_error"
 
-    (transaction,) = (item.payload for item in items if item.type == "transaction")
     assert transaction["contexts"]["trace"]["status"] == "internal_error"
 
 
@@ -1880,7 +2746,13 @@ def test_langchain_callback_list_existing_callback(sentry_init):
         assert handler is sentry_callback
 
 
-def test_langchain_message_role_mapping(sentry_init, capture_items):
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_langchain_message_role_mapping(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     """Test that message roles are properly normalized in langchain integration."""
     global llm_type
     llm_type = "openai-chat"
@@ -1889,8 +2761,8 @@ def test_langchain_message_role_mapping(sentry_init, capture_items):
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
 
     prompt = ChatPromptTemplate.from_messages(
         [
@@ -1923,53 +2795,112 @@ def test_langchain_message_role_mapping(sentry_init, capture_items):
     # Test input that should trigger message role normalization
     test_input = "Hello, how are you?"
 
-    with start_transaction():
-        list(agent_executor.stream({"input": test_input}))
+    message_data_found = False
+    if stream_gen_ai_spans:
+        items = capture_items("span")
 
-    spans = [item.payload for item in items if item.type == "span"]
-    # Find spans with gen_ai operation that should have message data
-    gen_ai_spans = [
-        span
-        for span in spans
-        if span["attributes"].get("sentry.op", "").startswith("gen_ai")
-    ]
+        with start_transaction():
+            list(agent_executor.stream({"input": test_input}))
+
+        spans = [item.payload for item in items if item.type == "span"]
+        # Find spans with gen_ai operation that should have message data
+        gen_ai_spans = [
+            span
+            for span in spans
+            if span["attributes"].get("sentry.op", "").startswith("gen_ai")
+        ]
 
-    # Check if any span has message data with normalized roles
-    message_data_found = False
-    for span in gen_ai_spans:
-        span_data = span.get("attributes", {})
-        if SPANDATA.GEN_AI_REQUEST_MESSAGES in span_data:
-            message_data_found = True
-            messages_data = span_data[SPANDATA.GEN_AI_REQUEST_MESSAGES]
-
-            # Parse the message data (might be JSON string)
-            if isinstance(messages_data, str):
-                try:
-                    messages = json.loads(messages_data)
-                except json.JSONDecodeError:
-                    # If not valid JSON, skip this assertion
-                    continue
-            else:
-                messages = messages_data
+        # Check if any span has message data with normalized roles
+        for span in gen_ai_spans:
+            span_data = span.get("attributes", {})
+            if SPANDATA.GEN_AI_REQUEST_MESSAGES in span_data:
+                message_data_found = True
+                messages_data = span_data[SPANDATA.GEN_AI_REQUEST_MESSAGES]
+
+                # Parse the message data (might be JSON string)
+                if isinstance(messages_data, str):
+                    try:
+                        messages = json.loads(messages_data)
+                    except json.JSONDecodeError:
+                        # If not valid JSON, skip this assertion
+                        continue
+                else:
+                    messages = messages_data
+
+                # Verify that the input message is present and contains the test input
+                assert isinstance(messages, list)
+                assert len(messages) > 0
+
+                # The test input should be in one of the messages
+                input_found = False
+                for msg in messages:
+                    if isinstance(msg, dict) and test_input in str(
+                        msg.get("content", "")
+                    ):
+                        input_found = True
+                        break
+                    elif isinstance(msg, str) and test_input in msg:
+                        input_found = True
+                        break
+
+                assert input_found, (
+                    f"Test input '{test_input}' not found in messages: {messages}"
+                )
+                break
+    else:
+        events = capture_events()
 
-            # Verify that the input message is present and contains the test input
-            assert isinstance(messages, list)
-            assert len(messages) > 0
+        with start_transaction():
+            list(agent_executor.stream({"input": test_input}))
 
-            # The test input should be in one of the messages
-            input_found = False
-            for msg in messages:
-                if isinstance(msg, dict) and test_input in str(msg.get("content", "")):
-                    input_found = True
-                    break
-                elif isinstance(msg, str) and test_input in msg:
-                    input_found = True
-                    break
+        assert len(events) > 0
+        tx = events[0]
+        assert tx["type"] == "transaction"
 
-            assert input_found, (
-                f"Test input '{test_input}' not found in messages: {messages}"
-            )
-            break
+        # Find spans with gen_ai operation that should have message data
+        gen_ai_spans = [
+            span
+            for span in tx.get("spans", [])
+            if span.get("op", "").startswith("gen_ai")
+        ]
+
+        # Check if any span has message data with normalized roles
+        for span in gen_ai_spans:
+            span_data = span.get("data", {})
+            if SPANDATA.GEN_AI_REQUEST_MESSAGES in span_data:
+                message_data_found = True
+                messages_data = span_data[SPANDATA.GEN_AI_REQUEST_MESSAGES]
+
+                # Parse the message data (might be JSON string)
+                if isinstance(messages_data, str):
+                    try:
+                        messages = json.loads(messages_data)
+                    except json.JSONDecodeError:
+                        # If not valid JSON, skip this assertion
+                        continue
+                else:
+                    messages = messages_data
+
+                # Verify that the input message is present and contains the test input
+                assert isinstance(messages, list)
+                assert len(messages) > 0
+
+                # The test input should be in one of the messages
+                input_found = False
+                for msg in messages:
+                    if isinstance(msg, dict) and test_input in str(
+                        msg.get("content", "")
+                    ):
+                        input_found = True
+                        break
+                    elif isinstance(msg, str) and test_input in msg:
+                        input_found = True
+                        break
+
+                assert input_found, (
+                    f"Test input '{test_input}' not found in messages: {messages}"
+                )
+                break
 
     # The message role mapping functionality is primarily tested through the normalization
     # that happens in the integration code. The fact that we can capture and process
@@ -2018,7 +2949,13 @@ def test_langchain_message_role_normalization_units():
     assert normalized[5] == "string message"  # String message unchanged
 
 
-def test_langchain_message_truncation(sentry_init, capture_items):
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_langchain_message_truncation(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     """Test that large messages are truncated properly in Langchain integration."""
     from langchain_core.outputs import LLMResult, Generation
 
@@ -2026,8 +2963,8 @@ def test_langchain_message_truncation(sentry_init, capture_items):
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     callback = SentryLangchainCallback(max_span_map_size=100, include_prompts=True)
 
@@ -2045,48 +2982,101 @@ def test_langchain_message_truncation(sentry_init, capture_items):
         "small message 5",
     ]
 
-    with start_transaction():
-        callback.on_llm_start(
-            serialized=serialized,
-            prompts=prompts,
-            run_id=run_id,
-            name="my_pipeline",
-            invocation_params={
-                "temperature": 0.7,
-                "max_tokens": 100,
-                "model": "gpt-3.5-turbo",
-            },
-        )
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
 
-        response = LLMResult(
-            generations=[[Generation(text="The response")]],
-            llm_output={
-                "token_usage": {
-                    "total_tokens": 25,
-                    "prompt_tokens": 10,
-                    "completion_tokens": 15,
-                }
-            },
-        )
-        callback.on_llm_end(response=response, run_id=run_id)
+        with start_transaction():
+            callback.on_llm_start(
+                serialized=serialized,
+                prompts=prompts,
+                run_id=run_id,
+                name="my_pipeline",
+                invocation_params={
+                    "temperature": 0.7,
+                    "max_tokens": 100,
+                    "model": "gpt-3.5-turbo",
+                },
+            )
 
-    tx = next(item.payload for item in items if item.type == "transaction")
-    assert tx["type"] == "transaction"
+            response = LLMResult(
+                generations=[[Generation(text="The response")]],
+                llm_output={
+                    "token_usage": {
+                        "total_tokens": 25,
+                        "prompt_tokens": 10,
+                        "completion_tokens": 15,
+                    }
+                },
+            )
+            callback.on_llm_end(response=response, run_id=run_id)
 
-    spans = [item.payload for item in items if item.type == "span"]
-    llm_spans = [
-        span
-        for span in spans
-        if span["attributes"].get("sentry.op") == "gen_ai.text_completion"
-    ]
-    assert len(llm_spans) > 0
+        tx = next(item.payload for item in items if item.type == "transaction")
+        assert tx["type"] == "transaction"
+
+        spans = [item.payload for item in items if item.type == "span"]
+        llm_spans = [
+            span
+            for span in spans
+            if span["attributes"].get("sentry.op") == "gen_ai.text_completion"
+        ]
+
+        assert len(llm_spans) > 0
+
+        llm_span = llm_spans[0]
+
+        assert llm_span["attributes"]["gen_ai.operation.name"] == "text_completion"
+        assert llm_span["attributes"][SPANDATA.GEN_AI_FUNCTION_ID] == "my_pipeline"
+
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in llm_span["attributes"]
+        messages_data = llm_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+    else:
+        events = capture_events()
+
+        with start_transaction():
+            callback.on_llm_start(
+                serialized=serialized,
+                prompts=prompts,
+                run_id=run_id,
+                name="my_pipeline",
+                invocation_params={
+                    "temperature": 0.7,
+                    "max_tokens": 100,
+                    "model": "gpt-3.5-turbo",
+                },
+            )
+
+            response = LLMResult(
+                generations=[[Generation(text="The response")]],
+                llm_output={
+                    "token_usage": {
+                        "total_tokens": 25,
+                        "prompt_tokens": 10,
+                        "completion_tokens": 15,
+                    }
+                },
+            )
+            callback.on_llm_end(response=response, run_id=run_id)
+
+        assert len(events) > 0
+        tx = events[0]
+        assert tx["type"] == "transaction"
+
+        llm_spans = [
+            span
+            for span in tx.get("spans", [])
+            if span.get("op") == "gen_ai.text_completion"
+        ]
+
+        assert len(llm_spans) > 0
+
+        llm_span = llm_spans[0]
+
+        assert llm_span["data"]["gen_ai.operation.name"] == "text_completion"
+        assert llm_span["data"][SPANDATA.GEN_AI_FUNCTION_ID] == "my_pipeline"
 
-    llm_span = llm_spans[0]
-    assert llm_span["attributes"]["gen_ai.operation.name"] == "text_completion"
-    assert llm_span["attributes"][SPANDATA.GEN_AI_FUNCTION_ID] == "my_pipeline"
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in llm_span["data"]
+        messages_data = llm_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
 
-    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in llm_span["attributes"]
-    messages_data = llm_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
     assert isinstance(messages_data, str)
 
     parsed_messages = json.loads(messages_data)
@@ -2096,6 +3086,7 @@ def test_langchain_message_truncation(sentry_init, capture_items):
     assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.parametrize(
     "send_default_pii, include_prompts",
     [
@@ -2106,7 +3097,12 @@ def test_langchain_message_truncation(sentry_init, capture_items):
     ],
 )
 def test_langchain_embeddings_sync(
-    sentry_init, capture_items, send_default_pii, include_prompts
+    sentry_init,
+    capture_events,
+    capture_items,
+    send_default_pii,
+    include_prompts,
+    stream_gen_ai_spans,
 ):
     """Test that sync embedding methods (embed_documents, embed_query) are properly traced."""
     try:
@@ -2118,64 +3114,129 @@ def test_langchain_embeddings_sync(
         integrations=[LangchainIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
+    if stream_gen_ai_spans:
+        items = capture_items("span")
+
+        # Mock the actual API call
+        with mock.patch.object(
+            OpenAIEmbeddings,
+            "embed_documents",
+            wraps=lambda self, texts: [[0.1, 0.2, 0.3] for _ in texts],
+        ) as mock_embed_documents:
+            embeddings = OpenAIEmbeddings(
+                model="text-embedding-ada-002", openai_api_key="test-key"
+            )
 
-    # Mock the actual API call
-    with mock.patch.object(
-        OpenAIEmbeddings,
-        "embed_documents",
-        wraps=lambda self, texts: [[0.1, 0.2, 0.3] for _ in texts],
-    ) as mock_embed_documents:
-        embeddings = OpenAIEmbeddings(
-            model="text-embedding-ada-002", openai_api_key="test-key"
-        )
+            # Force setup to re-run to ensure our mock is wrapped
+            LangchainIntegration.setup_once()
 
-        # Force setup to re-run to ensure our mock is wrapped
-        LangchainIntegration.setup_once()
+            with start_transaction(name="test_embeddings"):
+                # Test embed_documents
+                result = embeddings.embed_documents(["Hello world", "Test document"])
 
-        with start_transaction(name="test_embeddings"):
-            # Test embed_documents
-            result = embeddings.embed_documents(["Hello world", "Test document"])
+            assert len(result) == 2
+            mock_embed_documents.assert_called_once()
 
-        assert len(result) == 2
-        mock_embed_documents.assert_called_once()
-
-    spans = [item.payload for item in items if item.type == "span"]
-    # Find embeddings span
-    embeddings_spans = [
-        span
-        for span in spans
-        if span["attributes"].get("sentry.op") == "gen_ai.embeddings"
-    ]
-    assert len(embeddings_spans) == 1
+        spans = [item.payload for item in items if item.type == "span"]
+        # Find embeddings span
+        embeddings_spans = [
+            span
+            for span in spans
+            if span["attributes"].get("sentry.op") == "gen_ai.embeddings"
+        ]
 
-    embeddings_span = embeddings_spans[0]
-    assert embeddings_span["name"] == "embeddings text-embedding-ada-002"
-    assert embeddings_span["attributes"]["sentry.origin"] == "auto.ai.langchain"
-    assert embeddings_span["attributes"]["gen_ai.operation.name"] == "embeddings"
-    assert (
-        embeddings_span["attributes"]["gen_ai.request.model"]
-        == "text-embedding-ada-002"
-    )
+        assert len(embeddings_spans) == 1
 
-    # Check if input is captured based on PII settings
-    if send_default_pii and include_prompts:
-        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in embeddings_span["attributes"]
-        input_data = embeddings_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
-        # Could be serialized as string
-        if isinstance(input_data, str):
-            assert "Hello world" in input_data
-            assert "Test document" in input_data
+        embeddings_span = embeddings_spans[0]
+        assert embeddings_span["name"] == "embeddings text-embedding-ada-002"
+        assert embeddings_span["attributes"]["sentry.origin"] == "auto.ai.langchain"
+        assert embeddings_span["attributes"]["gen_ai.operation.name"] == "embeddings"
+        assert (
+            embeddings_span["attributes"]["gen_ai.request.model"]
+            == "text-embedding-ada-002"
+        )
+
+        # Check if input is captured based on PII settings
+        if send_default_pii and include_prompts:
+            assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in embeddings_span["attributes"]
+            input_data = embeddings_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
+
+            # Could be serialized as string
+            if isinstance(input_data, str):
+                assert "Hello world" in input_data
+                assert "Test document" in input_data
+            else:
+                assert "Hello world" in input_data
+                assert "Test document" in input_data
         else:
-            assert "Hello world" in input_data
-            assert "Test document" in input_data
+            assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embeddings_span.get(
+                "attributes", {}
+            )
     else:
-        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embeddings_span.get(
-            "attributes", {}
+        events = capture_events()
+
+        # Mock the actual API call
+        with mock.patch.object(
+            OpenAIEmbeddings,
+            "embed_documents",
+            wraps=lambda self, texts: [[0.1, 0.2, 0.3] for _ in texts],
+        ) as mock_embed_documents:
+            embeddings = OpenAIEmbeddings(
+                model="text-embedding-ada-002", openai_api_key="test-key"
+            )
+
+            # Force setup to re-run to ensure our mock is wrapped
+            LangchainIntegration.setup_once()
+
+            with start_transaction(name="test_embeddings"):
+                # Test embed_documents
+                result = embeddings.embed_documents(["Hello world", "Test document"])
+
+            assert len(result) == 2
+            mock_embed_documents.assert_called_once()
+
+        # Check captured events
+        assert len(events) >= 1
+        tx = events[0]
+        assert tx["type"] == "transaction"
+
+        # Find embeddings span
+        embeddings_spans = [
+            span
+            for span in tx.get("spans", [])
+            if span.get("op") == "gen_ai.embeddings"
+        ]
+
+        assert len(embeddings_spans) == 1
+
+        embeddings_span = embeddings_spans[0]
+        assert embeddings_span["description"] == "embeddings text-embedding-ada-002"
+        assert embeddings_span["origin"] == "auto.ai.langchain"
+        assert embeddings_span["data"]["gen_ai.operation.name"] == "embeddings"
+        assert (
+            embeddings_span["data"]["gen_ai.request.model"] == "text-embedding-ada-002"
         )
 
+        # Check if input is captured based on PII settings
+        if send_default_pii and include_prompts:
+            assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in embeddings_span["data"]
+            input_data = embeddings_span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
+            # Could be serialized as string
+            if isinstance(input_data, str):
+                assert "Hello world" in input_data
+                assert "Test document" in input_data
+            else:
+                assert "Hello world" in input_data
+                assert "Test document" in input_data
+        else:
+            assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embeddings_span.get(
+                "data", {}
+            )
+
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.parametrize(
     "send_default_pii, include_prompts",
     [
@@ -2184,7 +3245,12 @@ def test_langchain_embeddings_sync(
     ],
 )
 def test_langchain_embeddings_embed_query(
-    sentry_init, capture_items, send_default_pii, include_prompts
+    sentry_init,
+    capture_events,
+    capture_items,
+    send_default_pii,
+    include_prompts,
+    stream_gen_ai_spans,
 ):
     """Test that embed_query method is properly traced."""
     try:
@@ -2196,59 +3262,121 @@ def test_langchain_embeddings_embed_query(
         integrations=[LangchainIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
+    if stream_gen_ai_spans:
+        items = capture_items("span")
+
+        # Mock the actual API call
+        with mock.patch.object(
+            OpenAIEmbeddings,
+            "embed_query",
+            wraps=lambda self, text: [0.1, 0.2, 0.3],
+        ) as mock_embed_query:
+            embeddings = OpenAIEmbeddings(
+                model="text-embedding-ada-002", openai_api_key="test-key"
+            )
 
-    # Mock the actual API call
-    with mock.patch.object(
-        OpenAIEmbeddings,
-        "embed_query",
-        wraps=lambda self, text: [0.1, 0.2, 0.3],
-    ) as mock_embed_query:
-        embeddings = OpenAIEmbeddings(
-            model="text-embedding-ada-002", openai_api_key="test-key"
-        )
+            # Force setup to re-run to ensure our mock is wrapped
+            LangchainIntegration.setup_once()
 
-        # Force setup to re-run to ensure our mock is wrapped
-        LangchainIntegration.setup_once()
+            with start_transaction(name="test_embeddings_query"):
+                result = embeddings.embed_query("What is the capital of France?")
 
-        with start_transaction(name="test_embeddings_query"):
-            result = embeddings.embed_query("What is the capital of France?")
+            assert len(result) == 3
+            mock_embed_query.assert_called_once()
 
-        assert len(result) == 3
-        mock_embed_query.assert_called_once()
+        spans = [item.payload for item in items if item.type == "span"]
+        # Find embeddings span
+        embeddings_spans = [
+            span
+            for span in spans
+            if span["attributes"].get("sentry.op") == "gen_ai.embeddings"
+        ]
 
-    spans = [item.payload for item in items if item.type == "span"]
-    # Find embeddings span
-    embeddings_spans = [
-        span
-        for span in spans
-        if span["attributes"].get("sentry.op") == "gen_ai.embeddings"
-    ]
-    assert len(embeddings_spans) == 1
+        assert len(embeddings_spans) == 1
 
-    embeddings_span = embeddings_spans[0]
-    assert embeddings_span["attributes"]["gen_ai.operation.name"] == "embeddings"
-    assert (
-        embeddings_span["attributes"]["gen_ai.request.model"]
-        == "text-embedding-ada-002"
-    )
+        embeddings_span = embeddings_spans[0]
 
-    # Check if input is captured based on PII settings
-    if send_default_pii and include_prompts:
-        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in embeddings_span["attributes"]
-        input_data = embeddings_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
-        # Could be serialized as string
-        if isinstance(input_data, str):
-            assert "What is the capital of France?" in input_data
+        assert embeddings_span["attributes"]["gen_ai.operation.name"] == "embeddings"
+        assert (
+            embeddings_span["attributes"]["gen_ai.request.model"]
+            == "text-embedding-ada-002"
+        )
+
+        # Check if input is captured based on PII settings
+        if send_default_pii and include_prompts:
+            assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in embeddings_span["attributes"]
+            input_data = embeddings_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
+
+            # Could be serialized as string
+            if isinstance(input_data, str):
+                assert "What is the capital of France?" in input_data
+            else:
+                assert "What is the capital of France?" in input_data
         else:
-            assert "What is the capital of France?" in input_data
+            assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embeddings_span.get(
+                "attributes", {}
+            )
     else:
-        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embeddings_span.get(
-            "attributes", {}
+        events = capture_events()
+
+        # Mock the actual API call
+        with mock.patch.object(
+            OpenAIEmbeddings,
+            "embed_query",
+            wraps=lambda self, text: [0.1, 0.2, 0.3],
+        ) as mock_embed_query:
+            embeddings = OpenAIEmbeddings(
+                model="text-embedding-ada-002", openai_api_key="test-key"
+            )
+
+            # Force setup to re-run to ensure our mock is wrapped
+            LangchainIntegration.setup_once()
+
+            with start_transaction(name="test_embeddings_query"):
+                result = embeddings.embed_query("What is the capital of France?")
+
+            assert len(result) == 3
+            mock_embed_query.assert_called_once()
+
+        # Check captured events
+        assert len(events) >= 1
+        tx = events[0]
+        assert tx["type"] == "transaction"
+
+        # Find embeddings span
+        embeddings_spans = [
+            span
+            for span in tx.get("spans", [])
+            if span.get("op") == "gen_ai.embeddings"
+        ]
+
+        assert len(embeddings_spans) == 1
+
+        embeddings_span = embeddings_spans[0]
+
+        assert embeddings_span["data"]["gen_ai.operation.name"] == "embeddings"
+        assert (
+            embeddings_span["data"]["gen_ai.request.model"] == "text-embedding-ada-002"
         )
 
+        # Check if input is captured based on PII settings
+        if send_default_pii and include_prompts:
+            assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in embeddings_span["data"]
+            input_data = embeddings_span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
+            # Could be serialized as string
+            if isinstance(input_data, str):
+                assert "What is the capital of France?" in input_data
+            else:
+                assert "What is the capital of France?" in input_data
+        else:
+            assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embeddings_span.get(
+                "data", {}
+            )
+
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.parametrize(
     "send_default_pii, include_prompts",
     [
@@ -2258,7 +3386,12 @@ def test_langchain_embeddings_embed_query(
 )
 @pytest.mark.asyncio
 async def test_langchain_embeddings_async(
-    sentry_init, capture_items, send_default_pii, include_prompts
+    sentry_init,
+    capture_events,
+    capture_items,
+    send_default_pii,
+    include_prompts,
+    stream_gen_ai_spans,
 ):
     """Test that async embedding methods (aembed_documents, aembed_query) are properly traced."""
     try:
@@ -2270,68 +3403,147 @@ async def test_langchain_embeddings_async(
         integrations=[LangchainIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
 
     async def mock_aembed_documents(self, texts):
         return [[0.1, 0.2, 0.3] for _ in texts]
 
-    # Mock the actual API call
-    with mock.patch.object(
-        OpenAIEmbeddings,
-        "aembed_documents",
-        wraps=mock_aembed_documents,
-    ) as mock_aembed:
-        embeddings = OpenAIEmbeddings(
-            model="text-embedding-ada-002", openai_api_key="test-key"
-        )
+    if stream_gen_ai_spans:
+        items = capture_items("span")
+
+        # Mock the actual API call
+        with mock.patch.object(
+            OpenAIEmbeddings,
+            "aembed_documents",
+            wraps=mock_aembed_documents,
+        ) as mock_aembed:
+            embeddings = OpenAIEmbeddings(
+                model="text-embedding-ada-002", openai_api_key="test-key"
+            )
 
-        # Force setup to re-run to ensure our mock is wrapped
-        LangchainIntegration.setup_once()
+            # Force setup to re-run to ensure our mock is wrapped
+            LangchainIntegration.setup_once()
 
-        with start_transaction(name="test_async_embeddings"):
-            result = await embeddings.aembed_documents(
-                ["Async hello", "Async test document"]
-            )
+            with start_transaction(name="test_async_embeddings"):
+                result = await embeddings.aembed_documents(
+                    ["Async hello", "Async test document"]
+                )
 
-        assert len(result) == 2
-        mock_aembed.assert_called_once()
-
-    spans = [item.payload for item in items if item.type == "span"]
-    # Find embeddings span
-    embeddings_spans = [
-        span
-        for span in spans
-        if span["attributes"].get("sentry.op") == "gen_ai.embeddings"
-    ]
-    assert len(embeddings_spans) == 1
+            assert len(result) == 2
+            mock_aembed.assert_called_once()
 
-    embeddings_span = embeddings_spans[0]
-    assert embeddings_span["name"] == "embeddings text-embedding-ada-002"
-    assert embeddings_span["attributes"]["sentry.origin"] == "auto.ai.langchain"
-    assert embeddings_span["attributes"]["gen_ai.operation.name"] == "embeddings"
-    assert (
-        embeddings_span["attributes"]["gen_ai.request.model"]
-        == "text-embedding-ada-002"
-    )
+        spans = [item.payload for item in items if item.type == "span"]
+        # Find embeddings span
+        embeddings_spans = [
+            span
+            for span in spans
+            if span["attributes"].get("sentry.op") == "gen_ai.embeddings"
+        ]
 
-    # Check if input is captured based on PII settings
-    if send_default_pii and include_prompts:
-        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in embeddings_span["attributes"]
-        input_data = embeddings_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
-        # Could be serialized as string
-        if isinstance(input_data, str):
-            assert "Async hello" in input_data or "Async test document" in input_data
+        assert len(embeddings_spans) == 1
+
+        embeddings_span = embeddings_spans[0]
+        assert embeddings_span["name"] == "embeddings text-embedding-ada-002"
+        assert embeddings_span["attributes"]["sentry.origin"] == "auto.ai.langchain"
+        assert embeddings_span["attributes"]["gen_ai.operation.name"] == "embeddings"
+        assert (
+            embeddings_span["attributes"]["gen_ai.request.model"]
+            == "text-embedding-ada-002"
+        )
+
+        # Check if input is captured based on PII settings
+        if send_default_pii and include_prompts:
+            assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in embeddings_span["attributes"]
+            input_data = embeddings_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
+
+            # Could be serialized as string
+            if isinstance(input_data, str):
+                assert (
+                    "Async hello" in input_data or "Async test document" in input_data
+                )
+            else:
+                assert (
+                    "Async hello" in input_data or "Async test document" in input_data
+                )
         else:
-            assert "Async hello" in input_data or "Async test document" in input_data
+            assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embeddings_span.get(
+                "attributes", {}
+            )
+
     else:
-        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embeddings_span.get(
-            "attributes", {}
+        events = capture_events()
+
+        # Mock the actual API call
+        with mock.patch.object(
+            OpenAIEmbeddings,
+            "aembed_documents",
+            wraps=mock_aembed_documents,
+        ) as mock_aembed:
+            embeddings = OpenAIEmbeddings(
+                model="text-embedding-ada-002", openai_api_key="test-key"
+            )
+
+            # Force setup to re-run to ensure our mock is wrapped
+            LangchainIntegration.setup_once()
+
+            with start_transaction(name="test_async_embeddings"):
+                result = await embeddings.aembed_documents(
+                    ["Async hello", "Async test document"]
+                )
+
+            assert len(result) == 2
+            mock_aembed.assert_called_once()
+
+        # Check captured events
+        assert len(events) >= 1
+        tx = events[0]
+        assert tx["type"] == "transaction"
+
+        # Find embeddings span
+        embeddings_spans = [
+            span
+            for span in tx.get("spans", [])
+            if span.get("op") == "gen_ai.embeddings"
+        ]
+
+        assert len(embeddings_spans) == 1
+
+        embeddings_span = embeddings_spans[0]
+        assert embeddings_span["description"] == "embeddings text-embedding-ada-002"
+        assert embeddings_span["origin"] == "auto.ai.langchain"
+        assert embeddings_span["data"]["gen_ai.operation.name"] == "embeddings"
+        assert (
+            embeddings_span["data"]["gen_ai.request.model"] == "text-embedding-ada-002"
         )
 
+        # Check if input is captured based on PII settings
+        if send_default_pii and include_prompts:
+            assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in embeddings_span["data"]
+            input_data = embeddings_span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
+            # Could be serialized as string
+            if isinstance(input_data, str):
+                assert (
+                    "Async hello" in input_data or "Async test document" in input_data
+                )
+            else:
+                assert (
+                    "Async hello" in input_data or "Async test document" in input_data
+                )
+        else:
+            assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embeddings_span.get(
+                "data", {}
+            )
+
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
-async def test_langchain_embeddings_aembed_query(sentry_init, capture_items):
+async def test_langchain_embeddings_aembed_query(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     """Test that aembed_query method is properly traced."""
     try:
         from langchain_openai import OpenAIEmbeddings
@@ -2342,50 +3554,100 @@ async def test_langchain_embeddings_aembed_query(sentry_init, capture_items):
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
 
     async def mock_aembed_query(self, text):
         return [0.1, 0.2, 0.3]
 
-    # Mock the actual API call
-    with mock.patch.object(
-        OpenAIEmbeddings,
-        "aembed_query",
-        wraps=mock_aembed_query,
-    ) as mock_aembed:
-        embeddings = OpenAIEmbeddings(
-            model="text-embedding-ada-002", openai_api_key="test-key"
+    if stream_gen_ai_spans:
+        items = capture_items("span")
+
+        # Mock the actual API call
+        with mock.patch.object(
+            OpenAIEmbeddings,
+            "aembed_query",
+            wraps=mock_aembed_query,
+        ) as mock_aembed:
+            embeddings = OpenAIEmbeddings(
+                model="text-embedding-ada-002", openai_api_key="test-key"
+            )
+
+            # Force setup to re-run to ensure our mock is wrapped
+            LangchainIntegration.setup_once()
+
+            with start_transaction(name="test_async_embeddings_query"):
+                result = await embeddings.aembed_query("Async query test")
+
+            assert len(result) == 3
+            mock_aembed.assert_called_once()
+
+        spans = [item.payload for item in items if item.type == "span"]
+        # Find embeddings span
+        embeddings_spans = [
+            span
+            for span in spans
+            if span["attributes"].get("sentry.op") == "gen_ai.embeddings"
+        ]
+
+        assert len(embeddings_spans) == 1
+
+        embeddings_span = embeddings_spans[0]
+        assert embeddings_span["attributes"]["gen_ai.operation.name"] == "embeddings"
+        assert (
+            embeddings_span["attributes"]["gen_ai.request.model"]
+            == "text-embedding-ada-002"
         )
 
-        # Force setup to re-run to ensure our mock is wrapped
-        LangchainIntegration.setup_once()
+        # Check if input is captured
+        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in embeddings_span["attributes"]
+        input_data = embeddings_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
+    else:
+        events = capture_events()
+
+        # Mock the actual API call
+        with mock.patch.object(
+            OpenAIEmbeddings,
+            "aembed_query",
+            wraps=mock_aembed_query,
+        ) as mock_aembed:
+            embeddings = OpenAIEmbeddings(
+                model="text-embedding-ada-002", openai_api_key="test-key"
+            )
 
-        with start_transaction(name="test_async_embeddings_query"):
-            result = await embeddings.aembed_query("Async query test")
+            # Force setup to re-run to ensure our mock is wrapped
+            LangchainIntegration.setup_once()
 
-        assert len(result) == 3
-        mock_aembed.assert_called_once()
+            with start_transaction(name="test_async_embeddings_query"):
+                result = await embeddings.aembed_query("Async query test")
 
-    spans = [item.payload for item in items if item.type == "span"]
-    # Find embeddings span
-    embeddings_spans = [
-        span
-        for span in spans
-        if span["attributes"].get("sentry.op") == "gen_ai.embeddings"
-    ]
-    assert len(embeddings_spans) == 1
+            assert len(result) == 3
+            mock_aembed.assert_called_once()
 
-    embeddings_span = embeddings_spans[0]
-    assert embeddings_span["attributes"]["gen_ai.operation.name"] == "embeddings"
-    assert (
-        embeddings_span["attributes"]["gen_ai.request.model"]
-        == "text-embedding-ada-002"
-    )
+        # Check captured events
+        assert len(events) >= 1
+        tx = events[0]
+        assert tx["type"] == "transaction"
+
+        # Find embeddings span
+        embeddings_spans = [
+            span
+            for span in tx.get("spans", [])
+            if span.get("op") == "gen_ai.embeddings"
+        ]
+
+        assert len(embeddings_spans) == 1
+
+        embeddings_span = embeddings_spans[0]
+        assert embeddings_span["data"]["gen_ai.operation.name"] == "embeddings"
+        assert (
+            embeddings_span["data"]["gen_ai.request.model"] == "text-embedding-ada-002"
+        )
+
+        # Check if input is captured
+        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in embeddings_span["data"]
+        input_data = embeddings_span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
 
-    # Check if input is captured
-    assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in embeddings_span["attributes"]
-    input_data = embeddings_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
     # Could be serialized as string
     if isinstance(input_data, str):
         assert "Async query test" in input_data
@@ -2393,7 +3655,13 @@ async def mock_aembed_query(self, text):
         assert "Async query test" in input_data
 
 
-def test_langchain_embeddings_no_model_name(sentry_init, capture_items):
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_langchain_embeddings_no_model_name(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     """Test embeddings when model name is not available."""
     try:
         from langchain_openai import OpenAIEmbeddings
@@ -2403,81 +3671,169 @@ def test_langchain_embeddings_no_model_name(sentry_init, capture_items):
     sentry_init(
         integrations=[LangchainIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
+    if stream_gen_ai_spans:
+        items = capture_items("span")
+
+        # Mock the actual API call and remove model attribute
+        with mock.patch.object(
+            OpenAIEmbeddings,
+            "embed_documents",
+            wraps=lambda self, texts: [[0.1, 0.2, 0.3] for _ in texts],
+        ):
+            embeddings = OpenAIEmbeddings(openai_api_key="test-key")
+            # Remove model attribute to test fallback
+            delattr(embeddings, "model")
+            if hasattr(embeddings, "model_name"):
+                delattr(embeddings, "model_name")
 
-    # Mock the actual API call and remove model attribute
-    with mock.patch.object(
-        OpenAIEmbeddings,
-        "embed_documents",
-        wraps=lambda self, texts: [[0.1, 0.2, 0.3] for _ in texts],
-    ):
-        embeddings = OpenAIEmbeddings(openai_api_key="test-key")
-        # Remove model attribute to test fallback
-        delattr(embeddings, "model")
-        if hasattr(embeddings, "model_name"):
-            delattr(embeddings, "model_name")
+            # Force setup to re-run to ensure our mock is wrapped
+            LangchainIntegration.setup_once()
 
-        # Force setup to re-run to ensure our mock is wrapped
-        LangchainIntegration.setup_once()
+            with start_transaction(name="test_embeddings_no_model"):
+                embeddings.embed_documents(["Test"])
 
-        with start_transaction(name="test_embeddings_no_model"):
-            embeddings.embed_documents(["Test"])
+        spans = [item.payload for item in items if item.type == "span"]
+        # Find embeddings span
+        embeddings_spans = [
+            span
+            for span in spans
+            if span["attributes"].get("sentry.op") == "gen_ai.embeddings"
+        ]
 
-    spans = [item.payload for item in items if item.type == "span"]
-    # Find embeddings span
-    embeddings_spans = [
-        span
-        for span in spans
-        if span["attributes"].get("sentry.op") == "gen_ai.embeddings"
-    ]
-    assert len(embeddings_spans) == 1
+        assert len(embeddings_spans) == 1
 
-    embeddings_span = embeddings_spans[0]
-    assert embeddings_span["name"] == "embeddings"
-    assert embeddings_span["attributes"]["gen_ai.operation.name"] == "embeddings"
-    # Model name should not be set if not available
-    assert (
-        "gen_ai.request.model" not in embeddings_span["attributes"]
-        or embeddings_span["attributes"]["gen_ai.request.model"] is None
-    )
+        embeddings_span = embeddings_spans[0]
+        assert embeddings_span["name"] == "embeddings"
+        assert embeddings_span["attributes"]["gen_ai.operation.name"] == "embeddings"
+        # Model name should not be set if not available
+        assert (
+            "gen_ai.request.model" not in embeddings_span["attributes"]
+            or embeddings_span["attributes"]["gen_ai.request.model"] is None
+        )
+    else:
+        events = capture_events()
+
+        # Mock the actual API call and remove model attribute
+        with mock.patch.object(
+            OpenAIEmbeddings,
+            "embed_documents",
+            wraps=lambda self, texts: [[0.1, 0.2, 0.3] for _ in texts],
+        ):
+            embeddings = OpenAIEmbeddings(openai_api_key="test-key")
+            # Remove model attribute to test fallback
+            delattr(embeddings, "model")
+            if hasattr(embeddings, "model_name"):
+                delattr(embeddings, "model_name")
+
+            # Force setup to re-run to ensure our mock is wrapped
+            LangchainIntegration.setup_once()
+
+            with start_transaction(name="test_embeddings_no_model"):
+                embeddings.embed_documents(["Test"])
+
+        # Check captured events
+        assert len(events) >= 1
+        tx = events[0]
+        assert tx["type"] == "transaction"
+
+        # Find embeddings span
+        embeddings_spans = [
+            span
+            for span in tx.get("spans", [])
+            if span.get("op") == "gen_ai.embeddings"
+        ]
+
+        assert len(embeddings_spans) == 1
 
+        embeddings_span = embeddings_spans[0]
+        assert embeddings_span["description"] == "embeddings"
+        assert embeddings_span["data"]["gen_ai.operation.name"] == "embeddings"
+        # Model name should not be set if not available
+        assert (
+            "gen_ai.request.model" not in embeddings_span["data"]
+            or embeddings_span["data"]["gen_ai.request.model"] is None
+        )
 
-def test_langchain_embeddings_integration_disabled(sentry_init, capture_items):
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_langchain_embeddings_integration_disabled(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     """Test that embeddings are not traced when integration is disabled."""
     try:
         from langchain_openai import OpenAIEmbeddings
     except ImportError:
         pytest.skip("langchain_openai not installed")
 
+    sentry_init(
+        traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+    )
+
     # Initialize without LangchainIntegration
-    sentry_init(traces_sample_rate=1.0, _experiments={"gen_ai_as_v2_spans": True})
-    items = capture_items("span")
+    if stream_gen_ai_spans:
+        items = capture_items("span")
 
-    with mock.patch.object(
-        OpenAIEmbeddings,
-        "embed_documents",
-        return_value=[[0.1, 0.2, 0.3]],
-    ):
-        embeddings = OpenAIEmbeddings(
-            model="text-embedding-ada-002", openai_api_key="test-key"
-        )
+        with mock.patch.object(
+            OpenAIEmbeddings,
+            "embed_documents",
+            return_value=[[0.1, 0.2, 0.3]],
+        ):
+            embeddings = OpenAIEmbeddings(
+                model="text-embedding-ada-002", openai_api_key="test-key"
+            )
 
-        with start_transaction(name="test_embeddings_disabled"):
-            embeddings.embed_documents(["Test"])
+            with start_transaction(name="test_embeddings_disabled"):
+                embeddings.embed_documents(["Test"])
 
-    # Check that no embeddings spans were created
-    spans = [item.payload for item in items if item.type == "span"]
-    embeddings_spans = [
-        span
-        for span in spans
-        if span["attributes"].get("sentry.op") == "gen_ai.embeddings"
-    ]
-    # Should be empty since integration is disabled
-    assert len(embeddings_spans) == 0
+        # Check that no embeddings spans were created
+        spans = [item.payload for item in items if item.type == "span"]
+        embeddings_spans = [
+            span
+            for span in spans
+            if span["attributes"].get("sentry.op") == "gen_ai.embeddings"
+        ]
+        # Should be empty since integration is disabled
+        assert len(embeddings_spans) == 0
+    else:
+        events = capture_events()
 
+        with mock.patch.object(
+            OpenAIEmbeddings,
+            "embed_documents",
+            return_value=[[0.1, 0.2, 0.3]],
+        ):
+            embeddings = OpenAIEmbeddings(
+                model="text-embedding-ada-002", openai_api_key="test-key"
+            )
+
+            with start_transaction(name="test_embeddings_disabled"):
+                embeddings.embed_documents(["Test"])
 
-def test_langchain_embeddings_multiple_providers(sentry_init, capture_items):
+        # Check that no embeddings spans were created
+        if events:
+            tx = events[0]
+            embeddings_spans = [
+                span
+                for span in tx.get("spans", [])
+                if span.get("op") == "gen_ai.embeddings"
+            ]
+            # Should be empty since integration is disabled
+            assert len(embeddings_spans) == 0
+
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_langchain_embeddings_multiple_providers(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     """Test that embeddings work with different providers."""
     try:
         from langchain_openai import OpenAIEmbeddings, AzureOpenAIEmbeddings
@@ -2488,53 +3844,107 @@ def test_langchain_embeddings_multiple_providers(sentry_init, capture_items):
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
+    if stream_gen_ai_spans:
+        items = capture_items("span")
+
+        # Mock both providers
+        with mock.patch.object(
+            OpenAIEmbeddings,
+            "embed_documents",
+            wraps=lambda self, texts: [[0.1, 0.2, 0.3] for _ in texts],
+        ), mock.patch.object(
+            AzureOpenAIEmbeddings,
+            "embed_documents",
+            wraps=lambda self, texts: [[0.4, 0.5, 0.6] for _ in texts],
+        ):
+            openai_embeddings = OpenAIEmbeddings(
+                model="text-embedding-ada-002", openai_api_key="test-key"
+            )
+            azure_embeddings = AzureOpenAIEmbeddings(
+                model="text-embedding-ada-002",
+                azure_endpoint="https://test.openai.azure.com/",
+                openai_api_key="test-key",
+            )
 
-    # Mock both providers
-    with mock.patch.object(
-        OpenAIEmbeddings,
-        "embed_documents",
-        wraps=lambda self, texts: [[0.1, 0.2, 0.3] for _ in texts],
-    ), mock.patch.object(
-        AzureOpenAIEmbeddings,
-        "embed_documents",
-        wraps=lambda self, texts: [[0.4, 0.5, 0.6] for _ in texts],
-    ):
-        openai_embeddings = OpenAIEmbeddings(
-            model="text-embedding-ada-002", openai_api_key="test-key"
-        )
-        azure_embeddings = AzureOpenAIEmbeddings(
-            model="text-embedding-ada-002",
-            azure_endpoint="https://test.openai.azure.com/",
-            openai_api_key="test-key",
-        )
+            # Force setup to re-run
+            LangchainIntegration.setup_once()
 
-        # Force setup to re-run
-        LangchainIntegration.setup_once()
+            with start_transaction(name="test_multiple_providers"):
+                openai_embeddings.embed_documents(["OpenAI test"])
+                azure_embeddings.embed_documents(["Azure test"])
+
+        spans = [item.payload for item in items if item.type == "span"]
+        # Find embeddings spans
+        embeddings_spans = [
+            span
+            for span in spans
+            if span["attributes"].get("sentry.op") == "gen_ai.embeddings"
+        ]
 
-        with start_transaction(name="test_multiple_providers"):
-            openai_embeddings.embed_documents(["OpenAI test"])
-            azure_embeddings.embed_documents(["Azure test"])
+        # Should have 2 spans, one for each provider
+        assert len(embeddings_spans) == 2
 
-    spans = [item.payload for item in items if item.type == "span"]
-    # Find embeddings spans
-    embeddings_spans = [
-        span
-        for span in spans
-        if span["attributes"].get("sentry.op") == "gen_ai.embeddings"
-    ]
-    # Should have 2 spans, one for each provider
-    assert len(embeddings_spans) == 2
+        # Verify both spans have proper data
+        for span in embeddings_spans:
+            assert span["attributes"]["gen_ai.operation.name"] == "embeddings"
+            assert (
+                span["attributes"]["gen_ai.request.model"] == "text-embedding-ada-002"
+            )
+            assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in span["attributes"]
+    else:
+        events = capture_events()
+
+        # Mock both providers
+        with mock.patch.object(
+            OpenAIEmbeddings,
+            "embed_documents",
+            wraps=lambda self, texts: [[0.1, 0.2, 0.3] for _ in texts],
+        ), mock.patch.object(
+            AzureOpenAIEmbeddings,
+            "embed_documents",
+            wraps=lambda self, texts: [[0.4, 0.5, 0.6] for _ in texts],
+        ):
+            openai_embeddings = OpenAIEmbeddings(
+                model="text-embedding-ada-002", openai_api_key="test-key"
+            )
+            azure_embeddings = AzureOpenAIEmbeddings(
+                model="text-embedding-ada-002",
+                azure_endpoint="https://test.openai.azure.com/",
+                openai_api_key="test-key",
+            )
+
+            # Force setup to re-run
+            LangchainIntegration.setup_once()
 
-    # Verify both spans have proper data
-    for span in embeddings_spans:
-        assert span["attributes"]["gen_ai.operation.name"] == "embeddings"
-        assert span["attributes"]["gen_ai.request.model"] == "text-embedding-ada-002"
-        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in span["attributes"]
+            with start_transaction(name="test_multiple_providers"):
+                openai_embeddings.embed_documents(["OpenAI test"])
+                azure_embeddings.embed_documents(["Azure test"])
 
+        # Check captured events
+        assert len(events) >= 1
+        tx = events[0]
+        assert tx["type"] == "transaction"
 
-def test_langchain_embeddings_error_handling(sentry_init, capture_items):
+        # Find embeddings spans
+        embeddings_spans = [
+            span
+            for span in tx.get("spans", [])
+            if span.get("op") == "gen_ai.embeddings"
+        ]
+
+        # Should have 2 spans, one for each provider
+        assert len(embeddings_spans) == 2
+
+        # Verify both spans have proper data
+        for span in embeddings_spans:
+            assert span["data"]["gen_ai.operation.name"] == "embeddings"
+            assert span["data"]["gen_ai.request.model"] == "text-embedding-ada-002"
+            assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in span["data"]
+
+
+def test_langchain_embeddings_error_handling(sentry_init, capture_events):
     """Test that errors in embeddings are properly captured."""
     try:
         from langchain_openai import OpenAIEmbeddings
@@ -2546,7 +3956,7 @@ def test_langchain_embeddings_error_handling(sentry_init, capture_items):
         traces_sample_rate=1.0,
         send_default_pii=True,
     )
-    items = capture_items("event")
+    events = capture_events()
 
     # Mock the API call to raise an error
     with mock.patch.object(
@@ -2561,20 +3971,24 @@ def test_langchain_embeddings_error_handling(sentry_init, capture_items):
         # Force setup to re-run
         LangchainIntegration.setup_once()
 
-        with start_transaction(name="test_embeddings_error"):
-            with pytest.raises(ValueError):
-                embeddings.embed_documents(["Test"])
+        with start_transaction(name="test_embeddings_error"), pytest.raises(ValueError):
+            embeddings.embed_documents(["Test"])
 
-    [
-        item.payload
-        for item in items
-        if item.type == "event" and item.payload.get("level") == "error"
-    ]
+    # The error should be captured
+    assert len(events) >= 1
+    # We should have both the transaction and potentially an error event
+    [e for e in events if e.get("level") == "error"]
     # Note: errors might not be auto-captured depending on SDK settings,
     # but the span should still be created
 
 
-def test_langchain_embeddings_multiple_calls(sentry_init, capture_items):
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_langchain_embeddings_multiple_calls(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     """Test that multiple embeddings calls within a transaction are all traced."""
     try:
         from langchain_openai import OpenAIEmbeddings
@@ -2585,59 +3999,122 @@ def test_langchain_embeddings_multiple_calls(sentry_init, capture_items):
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
+    if stream_gen_ai_spans:
+        items = capture_items("span")
+
+        # Mock the actual API calls
+        with mock.patch.object(
+            OpenAIEmbeddings,
+            "embed_documents",
+            wraps=lambda self, texts: [[0.1, 0.2, 0.3] for _ in texts],
+        ), mock.patch.object(
+            OpenAIEmbeddings,
+            "embed_query",
+            wraps=lambda self, text: [0.4, 0.5, 0.6],
+        ):
+            embeddings = OpenAIEmbeddings(
+                model="text-embedding-ada-002", openai_api_key="test-key"
+            )
 
-    # Mock the actual API calls
-    with mock.patch.object(
-        OpenAIEmbeddings,
-        "embed_documents",
-        wraps=lambda self, texts: [[0.1, 0.2, 0.3] for _ in texts],
-    ), mock.patch.object(
-        OpenAIEmbeddings,
-        "embed_query",
-        wraps=lambda self, text: [0.4, 0.5, 0.6],
-    ):
-        embeddings = OpenAIEmbeddings(
-            model="text-embedding-ada-002", openai_api_key="test-key"
-        )
+            # Force setup to re-run
+            LangchainIntegration.setup_once()
+
+            with start_transaction(name="test_multiple_embeddings"):
+                # Call embed_documents
+                embeddings.embed_documents(["First batch", "Second batch"])
+                # Call embed_query
+                embeddings.embed_query("Single query")
+                # Call embed_documents again
+                embeddings.embed_documents(["Third batch"])
+
+        spans = [item.payload for item in items if item.type == "span"]
+        # Find embeddings spans - should have 3 (2 embed_documents + 1 embed_query)
+        embeddings_spans = [
+            span
+            for span in spans
+            if span["attributes"].get("sentry.op") == "gen_ai.embeddings"
+        ]
 
-        # Force setup to re-run
-        LangchainIntegration.setup_once()
+        assert len(embeddings_spans) == 3
 
-        with start_transaction(name="test_multiple_embeddings"):
-            # Call embed_documents
-            embeddings.embed_documents(["First batch", "Second batch"])
-            # Call embed_query
-            embeddings.embed_query("Single query")
-            # Call embed_documents again
-            embeddings.embed_documents(["Third batch"])
-
-    spans = [item.payload for item in items if item.type == "span"]
-    # Find embeddings spans - should have 3 (2 embed_documents + 1 embed_query)
-    embeddings_spans = [
-        span
-        for span in spans
-        if span["attributes"].get("sentry.op") == "gen_ai.embeddings"
-    ]
-    assert len(embeddings_spans) == 3
-
-    # Verify all spans have proper data
-    for span in embeddings_spans:
-        assert span["attributes"]["gen_ai.operation.name"] == "embeddings"
-        assert span["attributes"]["gen_ai.request.model"] == "text-embedding-ada-002"
-        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in span["attributes"]
-
-    # Verify the input data is different for each span
-    input_data_list = [
-        span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
-        for span in embeddings_spans
-    ]
+        # Verify all spans have proper data
+        for span in embeddings_spans:
+            assert span["attributes"]["gen_ai.operation.name"] == "embeddings"
+            assert (
+                span["attributes"]["gen_ai.request.model"] == "text-embedding-ada-002"
+            )
+            assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in span["attributes"]
+
+        # Verify the input data is different for each span
+        input_data_list = [
+            span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
+            for span in embeddings_spans
+        ]
+    else:
+        events = capture_events()
+
+        # Mock the actual API calls
+        with mock.patch.object(
+            OpenAIEmbeddings,
+            "embed_documents",
+            wraps=lambda self, texts: [[0.1, 0.2, 0.3] for _ in texts],
+        ), mock.patch.object(
+            OpenAIEmbeddings,
+            "embed_query",
+            wraps=lambda self, text: [0.4, 0.5, 0.6],
+        ):
+            embeddings = OpenAIEmbeddings(
+                model="text-embedding-ada-002", openai_api_key="test-key"
+            )
+
+            # Force setup to re-run
+            LangchainIntegration.setup_once()
+
+            with start_transaction(name="test_multiple_embeddings"):
+                # Call embed_documents
+                embeddings.embed_documents(["First batch", "Second batch"])
+                # Call embed_query
+                embeddings.embed_query("Single query")
+                # Call embed_documents again
+                embeddings.embed_documents(["Third batch"])
+
+        # Check captured events
+        assert len(events) >= 1
+        tx = events[0]
+        assert tx["type"] == "transaction"
+
+        # Find embeddings spans - should have 3 (2 embed_documents + 1 embed_query)
+        embeddings_spans = [
+            span
+            for span in tx.get("spans", [])
+            if span.get("op") == "gen_ai.embeddings"
+        ]
+
+        assert len(embeddings_spans) == 3
+
+        # Verify all spans have proper data
+        for span in embeddings_spans:
+            assert span["data"]["gen_ai.operation.name"] == "embeddings"
+            assert span["data"]["gen_ai.request.model"] == "text-embedding-ada-002"
+            assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in span["data"]
+
+        # Verify the input data is different for each span
+        input_data_list = [
+            span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] for span in embeddings_spans
+        ]
     # They should all be different (different inputs)
     assert len(set(str(data) for data in input_data_list)) == 3
 
 
-def test_langchain_embeddings_span_hierarchy(sentry_init, capture_items):
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_langchain_embeddings_span_hierarchy(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     """Test that embeddings spans are properly nested within parent spans."""
     try:
         from langchain_openai import OpenAIEmbeddings
@@ -2648,49 +4125,106 @@ def test_langchain_embeddings_span_hierarchy(sentry_init, capture_items):
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        # Mock the actual API call
+        with mock.patch.object(
+            OpenAIEmbeddings,
+            "embed_documents",
+            wraps=lambda self, texts: [[0.1, 0.2, 0.3] for _ in texts],
+        ):
+            embeddings = OpenAIEmbeddings(
+                model="text-embedding-ada-002", openai_api_key="test-key"
+            )
 
-    # Mock the actual API call
-    with mock.patch.object(
-        OpenAIEmbeddings,
-        "embed_documents",
-        wraps=lambda self, texts: [[0.1, 0.2, 0.3] for _ in texts],
-    ):
-        embeddings = OpenAIEmbeddings(
-            model="text-embedding-ada-002", openai_api_key="test-key"
-        )
+            # Force setup to re-run
+            LangchainIntegration.setup_once()
 
-        # Force setup to re-run
-        LangchainIntegration.setup_once()
+            with start_transaction(name="test_span_hierarchy"), sentry_sdk.start_span(
+                op="custom", name="custom operation"
+            ):
+                embeddings.embed_documents(["Test within custom span"])
+
+        spans = [item.payload for item in items if item.type == "span"]
+        # Find all spans
+        embeddings_spans = [
+            span
+            for span in spans
+            if span["attributes"].get("sentry.op") == "gen_ai.embeddings"
+        ]
+
+        tx = next(item.payload for item in items if item.type == "transaction")
+
+        custom_spans = [
+            span for span in tx.get("spans", []) if span.get("op") == "custom"
+        ]
+
+        assert len(embeddings_spans) == 1
+        assert len(custom_spans) == 1
+
+        # Both spans should exist
+        embeddings_span = embeddings_spans[0]
+        custom_span = custom_spans[0]
+
+        assert embeddings_span["attributes"]["gen_ai.operation.name"] == "embeddings"
+    else:
+        events = capture_events()
 
-        with start_transaction(name="test_span_hierarchy"):
-            with sentry_sdk.start_span(op="custom", name="custom operation"):
+        # Mock the actual API call
+        with mock.patch.object(
+            OpenAIEmbeddings,
+            "embed_documents",
+            wraps=lambda self, texts: [[0.1, 0.2, 0.3] for _ in texts],
+        ):
+            embeddings = OpenAIEmbeddings(
+                model="text-embedding-ada-002", openai_api_key="test-key"
+            )
+
+            # Force setup to re-run
+            LangchainIntegration.setup_once()
+
+            with start_transaction(name="test_span_hierarchy"), sentry_sdk.start_span(
+                op="custom", name="custom operation"
+            ):
                 embeddings.embed_documents(["Test within custom span"])
 
-    spans = [item.payload for item in items if item.type == "span"]
-    # Find all spans
-    embeddings_spans = [
-        span
-        for span in spans
-        if span["attributes"].get("sentry.op") == "gen_ai.embeddings"
-    ]
+        # Check captured events
+        assert len(events) >= 1
+        tx = events[0]
+        assert tx["type"] == "transaction"
+
+        # Find all spans
+        embeddings_spans = [
+            span
+            for span in tx.get("spans", [])
+            if span.get("op") == "gen_ai.embeddings"
+        ]
 
-    tx = next(item.payload for item in items if item.type == "transaction")
-    custom_spans = [span for span in tx.get("spans", []) if span.get("op") == "custom"]
+        custom_spans = [
+            span for span in tx.get("spans", []) if span.get("op") == "custom"
+        ]
 
-    assert len(embeddings_spans) == 1
-    assert len(custom_spans) == 1
+        assert len(embeddings_spans) == 1
+        assert len(custom_spans) == 1
 
-    # Both spans should exist
-    embeddings_span = embeddings_spans[0]
-    custom_span = custom_spans[0]
+        # Both spans should exist
+        embeddings_span = embeddings_spans[0]
+        custom_span = custom_spans[0]
 
-    assert embeddings_span["attributes"]["gen_ai.operation.name"] == "embeddings"
+        assert embeddings_span["data"]["gen_ai.operation.name"] == "embeddings"
     assert custom_span["description"] == "custom operation"
 
 
-def test_langchain_embeddings_with_list_and_string_inputs(sentry_init, capture_items):
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_langchain_embeddings_with_list_and_string_inputs(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     """Test that embeddings correctly handle both list and string inputs."""
     try:
         from langchain_openai import OpenAIEmbeddings
@@ -2701,53 +4235,111 @@ def test_langchain_embeddings_with_list_and_string_inputs(sentry_init, capture_i
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
-
-    # Mock the actual API calls
-    with mock.patch.object(
-        OpenAIEmbeddings,
-        "embed_documents",
-        wraps=lambda self, texts: [[0.1, 0.2, 0.3] for _ in texts],
-    ), mock.patch.object(
-        OpenAIEmbeddings,
-        "embed_query",
-        wraps=lambda self, text: [0.4, 0.5, 0.6],
-    ):
-        embeddings = OpenAIEmbeddings(
-            model="text-embedding-ada-002", openai_api_key="test-key"
-        )
+    if stream_gen_ai_spans:
+        items = capture_items("span")
+
+        # Mock the actual API calls
+        with mock.patch.object(
+            OpenAIEmbeddings,
+            "embed_documents",
+            wraps=lambda self, texts: [[0.1, 0.2, 0.3] for _ in texts],
+        ), mock.patch.object(
+            OpenAIEmbeddings,
+            "embed_query",
+            wraps=lambda self, text: [0.4, 0.5, 0.6],
+        ):
+            embeddings = OpenAIEmbeddings(
+                model="text-embedding-ada-002", openai_api_key="test-key"
+            )
 
-        # Force setup to re-run
-        LangchainIntegration.setup_once()
+            # Force setup to re-run
+            LangchainIntegration.setup_once()
 
-        with start_transaction(name="test_input_types"):
-            # embed_documents takes a list
-            embeddings.embed_documents(["List item 1", "List item 2", "List item 3"])
-            # embed_query takes a string
-            embeddings.embed_query("Single string query")
-
-    spans = [item.payload for item in items if item.type == "span"]
-    # Find embeddings spans
-    embeddings_spans = [
-        span
-        for span in spans
-        if span["attributes"].get("sentry.op") == "gen_ai.embeddings"
-    ]
-    assert len(embeddings_spans) == 2
+            with start_transaction(name="test_input_types"):
+                # embed_documents takes a list
+                embeddings.embed_documents(
+                    ["List item 1", "List item 2", "List item 3"]
+                )
+                # embed_query takes a string
+                embeddings.embed_query("Single string query")
+
+        spans = [item.payload for item in items if item.type == "span"]
+        # Find embeddings spans
+        embeddings_spans = [
+            span
+            for span in spans
+            if span["attributes"].get("sentry.op") == "gen_ai.embeddings"
+        ]
 
-    # Both should have input data captured as lists
-    for span in embeddings_spans:
-        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in span["attributes"]
-        input_data = span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
-        # Input should be normalized to list format
-        if isinstance(input_data, str):
-            # If serialized, should contain the input text
-            assert "List item" in input_data or "Single string query" in input_data, (
-                f"Expected input text in serialized data: {input_data}"
+        assert len(embeddings_spans) == 2
+
+        # Both should have input data captured as lists
+        for span in embeddings_spans:
+            assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in span["attributes"]
+            input_data = span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
+            # Input should be normalized to list format
+            if isinstance(input_data, str):
+                # If serialized, should contain the input text
+                assert (
+                    "List item" in input_data or "Single string query" in input_data
+                ), f"Expected input text in serialized data: {input_data}"
+    else:
+        events = capture_events()
+
+        # Mock the actual API calls
+        with mock.patch.object(
+            OpenAIEmbeddings,
+            "embed_documents",
+            wraps=lambda self, texts: [[0.1, 0.2, 0.3] for _ in texts],
+        ), mock.patch.object(
+            OpenAIEmbeddings,
+            "embed_query",
+            wraps=lambda self, text: [0.4, 0.5, 0.6],
+        ):
+            embeddings = OpenAIEmbeddings(
+                model="text-embedding-ada-002", openai_api_key="test-key"
             )
 
+            # Force setup to re-run
+            LangchainIntegration.setup_once()
+
+            with start_transaction(name="test_input_types"):
+                # embed_documents takes a list
+                embeddings.embed_documents(
+                    ["List item 1", "List item 2", "List item 3"]
+                )
+                # embed_query takes a string
+                embeddings.embed_query("Single string query")
+
+        # Check captured events
+        assert len(events) >= 1
+        tx = events[0]
+        assert tx["type"] == "transaction"
+
+        # Find embeddings spans
+        embeddings_spans = [
+            span
+            for span in tx.get("spans", [])
+            if span.get("op") == "gen_ai.embeddings"
+        ]
+
+        assert len(embeddings_spans) == 2
+
+        # Both should have input data captured as lists
+        for span in embeddings_spans:
+            assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in span["data"]
+            input_data = span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
+            # Input should be normalized to list format
+            if isinstance(input_data, str):
+                # If serialized, should contain the input text
+                assert (
+                    "List item" in input_data or "Single string query" in input_data
+                ), f"Expected input text in serialized data: {input_data}"
 
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.parametrize(
     "response_metadata_model,expected_model",
     [
@@ -2757,16 +4349,18 @@ def test_langchain_embeddings_with_list_and_string_inputs(sentry_init, capture_i
 )
 def test_langchain_response_model_extraction(
     sentry_init,
+    capture_events,
     capture_items,
     response_metadata_model,
     expected_model,
+    stream_gen_ai_spans,
 ):
     sentry_init(
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
 
     callback = SentryLangchainCallback(max_span_map_size=100, include_prompts=True)
 
@@ -2774,39 +4368,87 @@ def test_langchain_response_model_extraction(
     serialized = {"_type": "openai-chat", "model_name": "gpt-3.5-turbo"}
     prompts = ["Test prompt"]
 
-    with start_transaction():
-        callback.on_llm_start(
-            serialized=serialized,
-            prompts=prompts,
-            run_id=run_id,
-            invocation_params={"model": "gpt-3.5-turbo"},
-        )
+    if stream_gen_ai_spans:
+        items = capture_items("span")
 
-        response_metadata = {"model_name": response_metadata_model}
-        message = AIMessageChunk(
-            content="Test response", response_metadata=response_metadata
-        )
+        with start_transaction():
+            callback.on_llm_start(
+                serialized=serialized,
+                prompts=prompts,
+                run_id=run_id,
+                invocation_params={"model": "gpt-3.5-turbo"},
+            )
+
+            response_metadata = {"model_name": response_metadata_model}
+            message = AIMessageChunk(
+                content="Test response", response_metadata=response_metadata
+            )
 
-        generation = Mock(text="Test response", message=message)
-        response = Mock(generations=[[generation]])
-        callback.on_llm_end(response=response, run_id=run_id)
+            generation = Mock(text="Test response", message=message)
+            response = Mock(generations=[[generation]])
+            callback.on_llm_end(response=response, run_id=run_id)
 
-    spans = [item.payload for item in items if item.type == "span"]
-    llm_spans = [
-        span
-        for span in spans
-        if span["attributes"].get("sentry.op") == "gen_ai.text_completion"
-    ]
-    assert len(llm_spans) > 0
+        spans = [item.payload for item in items if item.type == "span"]
+        llm_spans = [
+            span
+            for span in spans
+            if span["attributes"].get("sentry.op") == "gen_ai.text_completion"
+        ]
+
+        assert len(llm_spans) > 0
 
-    llm_span = llm_spans[0]
-    assert llm_span["attributes"]["gen_ai.operation.name"] == "text_completion"
+        llm_span = llm_spans[0]
 
-    if expected_model is not None:
-        assert SPANDATA.GEN_AI_RESPONSE_MODEL in llm_span["attributes"]
-        assert llm_span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == expected_model
+        assert llm_span["attributes"]["gen_ai.operation.name"] == "text_completion"
+
+        if expected_model is not None:
+            assert SPANDATA.GEN_AI_RESPONSE_MODEL in llm_span["attributes"]
+            assert (
+                llm_span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == expected_model
+            )
+        else:
+            assert SPANDATA.GEN_AI_RESPONSE_MODEL not in llm_span.get("attributes", {})
     else:
-        assert SPANDATA.GEN_AI_RESPONSE_MODEL not in llm_span.get("attributes", {})
+        events = capture_events()
+
+        with start_transaction():
+            callback.on_llm_start(
+                serialized=serialized,
+                prompts=prompts,
+                run_id=run_id,
+                invocation_params={"model": "gpt-3.5-turbo"},
+            )
+
+            response_metadata = {"model_name": response_metadata_model}
+            message = AIMessageChunk(
+                content="Test response", response_metadata=response_metadata
+            )
+
+            generation = Mock(text="Test response", message=message)
+            response = Mock(generations=[[generation]])
+            callback.on_llm_end(response=response, run_id=run_id)
+
+        assert len(events) > 0
+        tx = events[0]
+        assert tx["type"] == "transaction"
+
+        llm_spans = [
+            span
+            for span in tx.get("spans", [])
+            if span.get("op") == "gen_ai.text_completion"
+        ]
+
+        assert len(llm_spans) > 0
+
+        llm_span = llm_spans[0]
+
+        assert llm_span["data"]["gen_ai.operation.name"] == "text_completion"
+
+        if expected_model is not None:
+            assert SPANDATA.GEN_AI_RESPONSE_MODEL in llm_span["data"]
+            assert llm_span["data"][SPANDATA.GEN_AI_RESPONSE_MODEL] == expected_model
+        else:
+            assert SPANDATA.GEN_AI_RESPONSE_MODEL not in llm_span.get("data", {})
 
 
 # Tests for multimodal content transformation functions
@@ -3020,6 +4662,7 @@ def test_transform_google_file_data(self):
         }
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.parametrize(
     "ai_type,expected_system",
     [
@@ -3065,13 +4708,18 @@ def test_transform_google_file_data(self):
     ],
 )
 def test_langchain_ai_system_detection(
-    sentry_init, capture_items, ai_type, expected_system
+    sentry_init,
+    capture_events,
+    capture_items,
+    ai_type,
+    expected_system,
+    stream_gen_ai_spans,
 ):
     sentry_init(
         integrations=[LangchainIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
 
     callback = SentryLangchainCallback(max_span_map_size=100, include_prompts=True)
 
@@ -3079,32 +4727,67 @@ def test_langchain_ai_system_detection(
     serialized = {"_type": ai_type} if ai_type is not None else {}
     prompts = ["Test prompt"]
 
-    with start_transaction():
-        callback.on_llm_start(
-            serialized=serialized,
-            prompts=prompts,
-            run_id=run_id,
-            invocation_params={"_type": ai_type, "model": "test-model"},
-        )
+    if stream_gen_ai_spans:
+        items = capture_items("span")
 
-        generation = Mock(text="Test response", message=None)
-        response = Mock(generations=[[generation]])
-        callback.on_llm_end(response=response, run_id=run_id)
+        with start_transaction():
+            callback.on_llm_start(
+                serialized=serialized,
+                prompts=prompts,
+                run_id=run_id,
+                invocation_params={"_type": ai_type, "model": "test-model"},
+            )
 
-    spans = [item.payload for item in items if item.type == "span"]
-    llm_spans = [
-        span
-        for span in spans
-        if span["attributes"].get("sentry.op") == "gen_ai.text_completion"
-    ]
-    assert len(llm_spans) > 0
+            generation = Mock(text="Test response", message=None)
+            response = Mock(generations=[[generation]])
+            callback.on_llm_end(response=response, run_id=run_id)
+
+        spans = [item.payload for item in items if item.type == "span"]
+        llm_spans = [
+            span
+            for span in spans
+            if span["attributes"].get("sentry.op") == "gen_ai.text_completion"
+        ]
 
-    llm_span = llm_spans[0]
+        assert len(llm_spans) > 0
+        llm_span = llm_spans[0]
 
-    if expected_system is not None:
-        assert llm_span["attributes"][SPANDATA.GEN_AI_SYSTEM] == expected_system
+        if expected_system is not None:
+            assert llm_span["attributes"][SPANDATA.GEN_AI_SYSTEM] == expected_system
+        else:
+            assert SPANDATA.GEN_AI_SYSTEM not in llm_span.get("attributes", {})
     else:
-        assert SPANDATA.GEN_AI_SYSTEM not in llm_span.get("attributes", {})
+        events = capture_events()
+
+        with start_transaction():
+            callback.on_llm_start(
+                serialized=serialized,
+                prompts=prompts,
+                run_id=run_id,
+                invocation_params={"_type": ai_type, "model": "test-model"},
+            )
+
+            generation = Mock(text="Test response", message=None)
+            response = Mock(generations=[[generation]])
+            callback.on_llm_end(response=response, run_id=run_id)
+
+        assert len(events) > 0
+        tx = events[0]
+        assert tx["type"] == "transaction"
+
+        llm_spans = [
+            span
+            for span in tx.get("spans", [])
+            if span.get("op") == "gen_ai.text_completion"
+        ]
+
+        assert len(llm_spans) > 0
+        llm_span = llm_spans[0]
+
+        if expected_system is not None:
+            assert llm_span["data"][SPANDATA.GEN_AI_SYSTEM] == expected_system
+        else:
+            assert SPANDATA.GEN_AI_SYSTEM not in llm_span.get("data", {})
 
 
 class TestTransformLangchainMessageContent:
diff --git a/tests/integrations/langgraph/test_langgraph.py b/tests/integrations/langgraph/test_langgraph.py
index e1a3baa0a8..0052fefa29 100644
--- a/tests/integrations/langgraph/test_langgraph.py
+++ b/tests/integrations/langgraph/test_langgraph.py
@@ -137,6 +137,7 @@ def test_langgraph_integration_init():
     assert integration.origin == "auto.ai.langgraph"
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.parametrize(
     "send_default_pii, include_prompts",
     [
@@ -147,53 +148,103 @@ def test_langgraph_integration_init():
     ],
 )
 def test_state_graph_compile(
-    sentry_init, capture_items, send_default_pii, include_prompts
+    sentry_init,
+    capture_events,
+    capture_items,
+    send_default_pii,
+    include_prompts,
+    stream_gen_ai_spans,
 ):
     """Test StateGraph.compile() wrapper creates proper create_agent span."""
     sentry_init(
         integrations=[LanggraphIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
+
     graph = MockStateGraph()
 
     def original_compile(self, *args, **kwargs):
         return MockCompiledGraph(self.name)
 
-    with patch("sentry_sdk.integrations.langgraph.StateGraph"):
-        with start_transaction():
-            wrapped_compile = _wrap_state_graph_compile(original_compile)
-            compiled_graph = wrapped_compile(
-                graph, model="test-model", checkpointer=None
-            )
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
 
-    assert compiled_graph is not None
-    assert compiled_graph.name == "test_graph"
+        with patch("sentry_sdk.integrations.langgraph.StateGraph"):
+            with start_transaction():
+                wrapped_compile = _wrap_state_graph_compile(original_compile)
+                compiled_graph = wrapped_compile(
+                    graph, model="test-model", checkpointer=None
+                )
 
-    spans = [item.payload for item in items if item.type == "span"]
-    agent_spans = [
-        span
-        for span in spans
-        if span["attributes"]["sentry.op"] == OP.GEN_AI_CREATE_AGENT
-    ]
-    assert len(agent_spans) == 1
+        assert compiled_graph is not None
+        assert compiled_graph.name == "test_graph"
+
+        spans = [item.payload for item in items if item.type == "span"]
+        agent_spans = [
+            span
+            for span in spans
+            if span["attributes"]["sentry.op"] == OP.GEN_AI_CREATE_AGENT
+        ]
+
+        assert len(agent_spans) == 1
+        agent_span = agent_spans[0]
+
+        assert agent_span["name"] == "create_agent test_graph"
+        assert agent_span["attributes"]["sentry.origin"] == "auto.ai.langgraph"
+        assert (
+            agent_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "create_agent"
+        )
+        assert agent_span["attributes"][SPANDATA.GEN_AI_AGENT_NAME] == "test_graph"
+        assert agent_span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "test-model"
+        assert SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS in agent_span["attributes"]
+
+        tools_data = agent_span["attributes"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS]
 
-    agent_span = agent_spans[0]
-    assert agent_span["name"] == "create_agent test_graph"
-    assert agent_span["attributes"]["sentry.origin"] == "auto.ai.langgraph"
-    assert agent_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "create_agent"
-    assert agent_span["attributes"][SPANDATA.GEN_AI_AGENT_NAME] == "test_graph"
-    assert agent_span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "test-model"
-    assert SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS in agent_span["attributes"]
+        assert tools_data == ["search_tool", "calculator"]
+        assert len(tools_data) == 2
+        assert "search_tool" in tools_data
+        assert "calculator" in tools_data
+    else:
+        events = capture_events()
 
-    tools_data = agent_span["attributes"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS]
-    assert tools_data == ["search_tool", "calculator"]
-    assert len(tools_data) == 2
-    assert "search_tool" in tools_data
-    assert "calculator" in tools_data
+        with patch("sentry_sdk.integrations.langgraph.StateGraph"):
+            with start_transaction():
+                wrapped_compile = _wrap_state_graph_compile(original_compile)
+                compiled_graph = wrapped_compile(
+                    graph, model="test-model", checkpointer=None
+                )
 
+        assert compiled_graph is not None
+        assert compiled_graph.name == "test_graph"
 
+        tx = events[0]
+        assert tx["type"] == "transaction"
+
+        agent_spans = [
+            span for span in tx["spans"] if span["op"] == OP.GEN_AI_CREATE_AGENT
+        ]
+
+        assert len(agent_spans) == 1
+        agent_span = agent_spans[0]
+
+        assert agent_span["description"] == "create_agent test_graph"
+        assert agent_span["origin"] == "auto.ai.langgraph"
+        assert agent_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "create_agent"
+        assert agent_span["data"][SPANDATA.GEN_AI_AGENT_NAME] == "test_graph"
+        assert agent_span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "test-model"
+        assert SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS in agent_span["data"]
+
+        tools_data = agent_span["data"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS]
+
+        assert tools_data == ["search_tool", "calculator"]
+        assert len(tools_data) == 2
+        assert "search_tool" in tools_data
+        assert "calculator" in tools_data
+
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.parametrize(
     "send_default_pii, include_prompts",
     [
@@ -203,14 +254,21 @@ def original_compile(self, *args, **kwargs):
         (False, False),
     ],
 )
-def test_pregel_invoke(sentry_init, capture_items, send_default_pii, include_prompts):
+def test_pregel_invoke(
+    sentry_init,
+    capture_events,
+    capture_items,
+    send_default_pii,
+    include_prompts,
+    stream_gen_ai_spans,
+):
     """Test Pregel.invoke() wrapper creates proper invoke_agent span."""
     sentry_init(
         integrations=[LanggraphIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     test_state = {
         "messages": [
@@ -241,61 +299,137 @@ def original_invoke(self, *args, **kwargs):
         ]
         return {"messages": new_messages}
 
-    with start_transaction():
-        wrapped_invoke = _wrap_pregel_invoke(original_invoke)
-        result = wrapped_invoke(pregel, test_state)
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
 
-    assert result is not None
+        with start_transaction():
+            wrapped_invoke = _wrap_pregel_invoke(original_invoke)
+            result = wrapped_invoke(pregel, test_state)
 
-    spans = [item.payload for item in items if item.type == "span"]
-    invoke_spans = [
-        span
-        for span in spans
-        if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
-    ]
-    assert len(invoke_spans) == 1
+        assert result is not None
 
-    invoke_span = invoke_spans[0]
-    assert invoke_span["name"] == "invoke_agent test_graph"
-    assert invoke_span["attributes"]["sentry.origin"] == "auto.ai.langgraph"
-    assert invoke_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "invoke_agent"
-    assert invoke_span["attributes"][SPANDATA.GEN_AI_PIPELINE_NAME] == "test_graph"
-    assert invoke_span["attributes"][SPANDATA.GEN_AI_AGENT_NAME] == "test_graph"
+        spans = [item.payload for item in items if item.type == "span"]
+        invoke_spans = [
+            span
+            for span in spans
+            if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
+        ]
 
-    if send_default_pii and include_prompts:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["attributes"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT in invoke_span["attributes"]
+        assert len(invoke_spans) == 1
 
-        request_messages = invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        invoke_span = invoke_spans[0]
 
-        if isinstance(request_messages, str):
-            import json
+        assert invoke_span["name"] == "invoke_agent test_graph"
+        assert invoke_span["attributes"]["sentry.origin"] == "auto.ai.langgraph"
+        assert (
+            invoke_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "invoke_agent"
+        )
+        assert invoke_span["attributes"][SPANDATA.GEN_AI_PIPELINE_NAME] == "test_graph"
+        assert invoke_span["attributes"][SPANDATA.GEN_AI_AGENT_NAME] == "test_graph"
 
-            request_messages = json.loads(request_messages)
-        assert len(request_messages) == 1
-        assert request_messages[0]["content"] == "Of course! How can I assist you?"
+        if send_default_pii and include_prompts:
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["attributes"]
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT in invoke_span["attributes"]
 
-        response_text = invoke_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
-        assert response_text == expected_assistant_response
+            request_messages = invoke_span["attributes"][
+                SPANDATA.GEN_AI_REQUEST_MESSAGES
+            ]
 
-        assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in invoke_span["attributes"]
-        tool_calls_data = invoke_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS]
-        if isinstance(tool_calls_data, str):
-            import json
+            if isinstance(request_messages, str):
+                import json
+
+                request_messages = json.loads(request_messages)
+            assert len(request_messages) == 1
+            assert request_messages[0]["content"] == "Of course! How can I assist you?"
 
-            tool_calls_data = json.loads(tool_calls_data)
+            response_text = invoke_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+            assert response_text == expected_assistant_response
 
-        assert len(tool_calls_data) == 1
-        assert tool_calls_data[0]["id"] == "call_test_123"
-        assert tool_calls_data[0]["function"]["name"] == "search_tool"
+            assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in invoke_span["attributes"]
+            tool_calls_data = invoke_span["attributes"][
+                SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS
+            ]
+
+            if isinstance(tool_calls_data, str):
+                import json
+
+                tool_calls_data = json.loads(tool_calls_data)
+
+            assert len(tool_calls_data) == 1
+            assert tool_calls_data[0]["id"] == "call_test_123"
+            assert tool_calls_data[0]["function"]["name"] == "search_tool"
+        else:
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in invoke_span.get(
+                "attributes", {}
+            )
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT not in invoke_span.get(
+                "attributes", {}
+            )
+            assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in invoke_span.get(
+                "attributes", {}
+            )
     else:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in invoke_span.get("attributes", {})
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in invoke_span.get("attributes", {})
-        assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in invoke_span.get(
-            "attributes", {}
-        )
+        events = capture_events()
+
+        with start_transaction():
+            wrapped_invoke = _wrap_pregel_invoke(original_invoke)
+            result = wrapped_invoke(pregel, test_state)
+
+        assert result is not None
 
+        tx = events[0]
+        assert tx["type"] == "transaction"
 
+        invoke_spans = [
+            span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT
+        ]
+
+        assert len(invoke_spans) == 1
+
+        invoke_span = invoke_spans[0]
+
+        assert invoke_span["description"] == "invoke_agent test_graph"
+        assert invoke_span["origin"] == "auto.ai.langgraph"
+        assert invoke_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "invoke_agent"
+        assert invoke_span["data"][SPANDATA.GEN_AI_PIPELINE_NAME] == "test_graph"
+        assert invoke_span["data"][SPANDATA.GEN_AI_AGENT_NAME] == "test_graph"
+
+        if send_default_pii and include_prompts:
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["data"]
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT in invoke_span["data"]
+
+            request_messages = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+
+            if isinstance(request_messages, str):
+                import json
+
+                request_messages = json.loads(request_messages)
+            assert len(request_messages) == 1
+            assert request_messages[0]["content"] == "Of course! How can I assist you?"
+
+            response_text = invoke_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+            assert response_text == expected_assistant_response
+
+            assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in invoke_span["data"]
+            tool_calls_data = invoke_span["data"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS]
+
+            if isinstance(tool_calls_data, str):
+                import json
+
+                tool_calls_data = json.loads(tool_calls_data)
+
+            assert len(tool_calls_data) == 1
+            assert tool_calls_data[0]["id"] == "call_test_123"
+            assert tool_calls_data[0]["function"]["name"] == "search_tool"
+        else:
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in invoke_span.get("data", {})
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT not in invoke_span.get("data", {})
+            assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in invoke_span.get(
+                "data", {}
+            )
+
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.parametrize(
     "send_default_pii, include_prompts",
     [
@@ -305,14 +439,22 @@ def original_invoke(self, *args, **kwargs):
         (False, False),
     ],
 )
-def test_pregel_ainvoke(sentry_init, capture_items, send_default_pii, include_prompts):
+def test_pregel_ainvoke(
+    sentry_init,
+    capture_events,
+    capture_items,
+    send_default_pii,
+    include_prompts,
+    stream_gen_ai_spans,
+):
     """Test Pregel.ainvoke() async wrapper creates proper invoke_agent span."""
     sentry_init(
         integrations=[LanggraphIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
+
     test_state = {"messages": [MockMessage("What's the weather like?", name="user")]}
     pregel = MockPregelInstance("async_graph")
 
@@ -342,87 +484,187 @@ async def run_test():
             result = await wrapped_ainvoke(pregel, test_state)
             return result
 
-    result = asyncio.run(run_test())
-    assert result is not None
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
 
-    spans = [item.payload for item in items if item.type == "span"]
-    invoke_spans = [
-        span
-        for span in spans
-        if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
-    ]
-    assert len(invoke_spans) == 1
+        result = asyncio.run(run_test())
+        assert result is not None
+
+        spans = [item.payload for item in items if item.type == "span"]
+        invoke_spans = [
+            span
+            for span in spans
+            if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
+        ]
 
-    invoke_span = invoke_spans[0]
-    assert invoke_span["name"] == "invoke_agent async_graph"
-    assert invoke_span["attributes"]["sentry.origin"] == "auto.ai.langgraph"
-    assert invoke_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "invoke_agent"
-    assert invoke_span["attributes"][SPANDATA.GEN_AI_PIPELINE_NAME] == "async_graph"
-    assert invoke_span["attributes"][SPANDATA.GEN_AI_AGENT_NAME] == "async_graph"
+        assert len(invoke_spans) == 1
 
-    if send_default_pii and include_prompts:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["attributes"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT in invoke_span["attributes"]
+        invoke_span = invoke_spans[0]
 
-        response_text = invoke_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
-        assert response_text == expected_assistant_response
+        assert invoke_span["name"] == "invoke_agent async_graph"
+        assert invoke_span["attributes"]["sentry.origin"] == "auto.ai.langgraph"
+        assert (
+            invoke_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "invoke_agent"
+        )
+        assert invoke_span["attributes"][SPANDATA.GEN_AI_PIPELINE_NAME] == "async_graph"
+        assert invoke_span["attributes"][SPANDATA.GEN_AI_AGENT_NAME] == "async_graph"
 
-        assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in invoke_span["attributes"]
-        tool_calls_data = invoke_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS]
-        if isinstance(tool_calls_data, str):
-            import json
+        if send_default_pii and include_prompts:
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["attributes"]
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT in invoke_span["attributes"]
+
+            response_text = invoke_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+            assert response_text == expected_assistant_response
+
+            assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in invoke_span["attributes"]
+            tool_calls_data = invoke_span["attributes"][
+                SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS
+            ]
+
+            if isinstance(tool_calls_data, str):
+                import json
 
-            tool_calls_data = json.loads(tool_calls_data)
+                tool_calls_data = json.loads(tool_calls_data)
 
-        assert len(tool_calls_data) == 1
-        assert tool_calls_data[0]["id"] == "call_weather_456"
-        assert tool_calls_data[0]["function"]["name"] == "get_weather"
+            assert len(tool_calls_data) == 1
+            assert tool_calls_data[0]["id"] == "call_weather_456"
+            assert tool_calls_data[0]["function"]["name"] == "get_weather"
+        else:
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in invoke_span.get(
+                "attributes", {}
+            )
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT not in invoke_span.get(
+                "attributes", {}
+            )
+            assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in invoke_span.get(
+                "attributes", {}
+            )
     else:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in invoke_span.get("attributes", {})
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in invoke_span.get("attributes", {})
-        assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in invoke_span.get(
-            "attributes", {}
-        )
+        events = capture_events()
+
+        result = asyncio.run(run_test())
+        assert result is not None
+
+        tx = events[0]
+        assert tx["type"] == "transaction"
+
+        invoke_spans = [
+            span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT
+        ]
+
+        assert len(invoke_spans) == 1
+
+        invoke_span = invoke_spans[0]
 
+        assert invoke_span["description"] == "invoke_agent async_graph"
+        assert invoke_span["origin"] == "auto.ai.langgraph"
+        assert invoke_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "invoke_agent"
+        assert invoke_span["data"][SPANDATA.GEN_AI_PIPELINE_NAME] == "async_graph"
+        assert invoke_span["data"][SPANDATA.GEN_AI_AGENT_NAME] == "async_graph"
 
-def test_pregel_invoke_error(sentry_init, capture_items):
+        if send_default_pii and include_prompts:
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["data"]
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT in invoke_span["data"]
+
+            response_text = invoke_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+            assert response_text == expected_assistant_response
+
+            assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in invoke_span["data"]
+            tool_calls_data = invoke_span["data"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS]
+
+            if isinstance(tool_calls_data, str):
+                import json
+
+                tool_calls_data = json.loads(tool_calls_data)
+
+            assert len(tool_calls_data) == 1
+            assert tool_calls_data[0]["id"] == "call_weather_456"
+            assert tool_calls_data[0]["function"]["name"] == "get_weather"
+        else:
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in invoke_span.get("data", {})
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT not in invoke_span.get("data", {})
+            assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in invoke_span.get(
+                "data", {}
+            )
+
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_pregel_invoke_error(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     """Test error handling during graph execution."""
     sentry_init(
         integrations=[LanggraphIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
+
     test_state = {"messages": [MockMessage("This will fail")]}
     pregel = MockPregelInstance("error_graph")
 
     def original_invoke(self, *args, **kwargs):
         raise Exception("Graph execution failed")
 
-    with start_transaction(), pytest.raises(Exception, match="Graph execution failed"):
-        wrapped_invoke = _wrap_pregel_invoke(original_invoke)
-        wrapped_invoke(pregel, test_state)
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
 
-    spans = [item.payload for item in items if item.type == "span"]
-    invoke_spans = [
-        span
-        for span in spans
-        if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
-    ]
-    assert len(invoke_spans) == 1
+        with start_transaction(), pytest.raises(
+            Exception, match="Graph execution failed"
+        ):
+            wrapped_invoke = _wrap_pregel_invoke(original_invoke)
+            wrapped_invoke(pregel, test_state)
+
+        spans = [item.payload for item in items if item.type == "span"]
+        invoke_spans = [
+            span
+            for span in spans
+            if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
+        ]
+
+        assert len(invoke_spans) == 1
+
+        invoke_span = invoke_spans[0]
+        assert invoke_span.get("status") == "error"
+    else:
+        events = capture_events()
+
+        with start_transaction(), pytest.raises(
+            Exception, match="Graph execution failed"
+        ):
+            wrapped_invoke = _wrap_pregel_invoke(original_invoke)
+            wrapped_invoke(pregel, test_state)
+
+        tx = events[0]
+        invoke_spans = [
+            span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT
+        ]
+
+        assert len(invoke_spans) == 1
 
-    invoke_span = invoke_spans[0]
-    assert invoke_span.get("status") == "error"
+        invoke_span = invoke_spans[0]
+        assert invoke_span.get("status") == "internal_error"
+        assert invoke_span.get("tags", {}).get("status") == "internal_error"
 
 
-def test_pregel_ainvoke_error(sentry_init, capture_items):
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_pregel_ainvoke_error(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     """Test error handling during async graph execution."""
     sentry_init(
         integrations=[LanggraphIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
+
     test_state = {"messages": [MockMessage("This will fail async")]}
     pregel = MockPregelInstance("async_error_graph")
 
@@ -436,58 +678,105 @@ async def run_error_test():
             wrapped_ainvoke = _wrap_pregel_ainvoke(original_ainvoke)
             await wrapped_ainvoke(pregel, test_state)
 
-    asyncio.run(run_error_test())
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
 
-    spans = [item.payload for item in items if item.type == "span"]
-    invoke_spans = [
-        span
-        for span in spans
-        if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
-    ]
-    assert len(invoke_spans) == 1
+        asyncio.run(run_error_test())
+
+        spans = [item.payload for item in items if item.type == "span"]
+        invoke_spans = [
+            span
+            for span in spans
+            if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
+        ]
+
+        assert len(invoke_spans) == 1
+
+        invoke_span = invoke_spans[0]
+        assert invoke_span.get("status") == "error"
+    else:
+        events = capture_events()
+
+        asyncio.run(run_error_test())
 
-    invoke_span = invoke_spans[0]
-    assert invoke_span.get("status") == "error"
+        tx = events[0]
+        invoke_spans = [
+            span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT
+        ]
+
+        assert len(invoke_spans) == 1
+
+        invoke_span = invoke_spans[0]
+        assert invoke_span.get("status") == "internal_error"
+        assert invoke_span.get("tags", {}).get("status") == "internal_error"
 
 
-def test_span_origin(sentry_init, capture_items):
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_span_origin(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     """Test that span origins are correctly set."""
     sentry_init(
         integrations=[LanggraphIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     graph = MockStateGraph()
 
     def original_compile(self, *args, **kwargs):
         return MockCompiledGraph(self.name)
 
-    with start_transaction():
-        from sentry_sdk.integrations.langgraph import _wrap_state_graph_compile
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with start_transaction():
+            from sentry_sdk.integrations.langgraph import _wrap_state_graph_compile
+
+            wrapped_compile = _wrap_state_graph_compile(original_compile)
+            wrapped_compile(graph)
+
+        tx = next(item.payload for item in items if item.type == "transaction")
+        assert tx["contexts"]["trace"]["origin"] == "manual"
+
+        spans = [item.payload for item in items if item.type == "span"]
+        for span in spans:
+            assert span["attributes"]["sentry.origin"] == "auto.ai.langgraph"
+    else:
+        events = capture_events()
+
+        with start_transaction():
+            from sentry_sdk.integrations.langgraph import _wrap_state_graph_compile
 
-        wrapped_compile = _wrap_state_graph_compile(original_compile)
-        wrapped_compile(graph)
+            wrapped_compile = _wrap_state_graph_compile(original_compile)
+            wrapped_compile(graph)
 
-    tx = next(item.payload for item in items if item.type == "transaction")
-    assert tx["contexts"]["trace"]["origin"] == "manual"
+        tx = events[0]
+        assert tx["contexts"]["trace"]["origin"] == "manual"
 
-    spans = [item.payload for item in items if item.type == "span"]
-    for span in spans:
-        assert span["attributes"]["sentry.origin"] == "auto.ai.langgraph"
+        for span in tx["spans"]:
+            assert span["origin"] == "auto.ai.langgraph"
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.parametrize("graph_name", ["my_graph", None, ""])
 def test_pregel_invoke_with_different_graph_names(
-    sentry_init, capture_items, graph_name
+    sentry_init,
+    capture_events,
+    capture_items,
+    graph_name,
+    stream_gen_ai_spans,
 ):
     """Test Pregel.invoke() with different graph name scenarios."""
     sentry_init(
         integrations=[LanggraphIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     pregel = MockPregelInstance(graph_name) if graph_name else MockPregelInstance()
     if not graph_name:
@@ -497,31 +786,69 @@ def test_pregel_invoke_with_different_graph_names(
     def original_invoke(self, *args, **kwargs):
         return {"result": "test"}
 
-    with start_transaction():
-        wrapped_invoke = _wrap_pregel_invoke(original_invoke)
-        wrapped_invoke(pregel, {"messages": []})
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
 
-    spans = [item.payload for item in items if item.type == "span"]
-    invoke_spans = [
-        span
-        for span in spans
-        if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
-    ]
-    assert len(invoke_spans) == 1
+        with start_transaction():
+            wrapped_invoke = _wrap_pregel_invoke(original_invoke)
+            wrapped_invoke(pregel, {"messages": []})
+
+        spans = [item.payload for item in items if item.type == "span"]
+        invoke_spans = [
+            span
+            for span in spans
+            if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
+        ]
 
-    invoke_span = invoke_spans[0]
+        assert len(invoke_spans) == 1
 
-    if graph_name and graph_name.strip():
-        assert invoke_span["name"] == "invoke_agent my_graph"
-        assert invoke_span["attributes"][SPANDATA.GEN_AI_PIPELINE_NAME] == graph_name
-        assert invoke_span["attributes"][SPANDATA.GEN_AI_AGENT_NAME] == graph_name
+        invoke_span = invoke_spans[0]
+
+        if graph_name and graph_name.strip():
+            assert invoke_span["name"] == "invoke_agent my_graph"
+            assert (
+                invoke_span["attributes"][SPANDATA.GEN_AI_PIPELINE_NAME] == graph_name
+            )
+            assert invoke_span["attributes"][SPANDATA.GEN_AI_AGENT_NAME] == graph_name
+        else:
+            assert invoke_span["name"] == "invoke_agent"
+            assert SPANDATA.GEN_AI_PIPELINE_NAME not in invoke_span.get(
+                "attributes", {}
+            )
+            assert SPANDATA.GEN_AI_AGENT_NAME not in invoke_span.get("attributes", {})
     else:
-        assert invoke_span["name"] == "invoke_agent"
-        assert SPANDATA.GEN_AI_PIPELINE_NAME not in invoke_span.get("attributes", {})
-        assert SPANDATA.GEN_AI_AGENT_NAME not in invoke_span.get("attributes", {})
+        events = capture_events()
+
+        with start_transaction():
+            wrapped_invoke = _wrap_pregel_invoke(original_invoke)
+            wrapped_invoke(pregel, {"messages": []})
+
+        tx = events[0]
+        invoke_spans = [
+            span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT
+        ]
 
+        assert len(invoke_spans) == 1
+
+        invoke_span = invoke_spans[0]
+
+        if graph_name and graph_name.strip():
+            assert invoke_span["description"] == "invoke_agent my_graph"
+            assert invoke_span["data"][SPANDATA.GEN_AI_PIPELINE_NAME] == graph_name
+            assert invoke_span["data"][SPANDATA.GEN_AI_AGENT_NAME] == graph_name
+        else:
+            assert invoke_span["description"] == "invoke_agent"
+            assert SPANDATA.GEN_AI_PIPELINE_NAME not in invoke_span.get("data", {})
+            assert SPANDATA.GEN_AI_AGENT_NAME not in invoke_span.get("data", {})
 
-def test_pregel_invoke_span_includes_usage_data(sentry_init, capture_items):
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_pregel_invoke_span_includes_usage_data(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     """
     Test that invoke_agent spans include aggregated usage data from context_wrapper.
     This verifies the new functionality added to track token usage in invoke_agent spans.
@@ -529,8 +856,8 @@ def test_pregel_invoke_span_includes_usage_data(sentry_init, capture_items):
     sentry_init(
         integrations=[LanggraphIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     test_state = {
         "messages": [
@@ -569,35 +896,75 @@ def original_invoke(self, *args, **kwargs):
         ]
         return {"messages": new_messages}
 
-    with start_transaction():
-        wrapped_invoke = _wrap_pregel_invoke(original_invoke)
-        result = wrapped_invoke(pregel, test_state)
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
 
-    assert result is not None
+        with start_transaction():
+            wrapped_invoke = _wrap_pregel_invoke(original_invoke)
+            result = wrapped_invoke(pregel, test_state)
 
-    spans = [item.payload for item in items if item.type == "span"]
-    invoke_spans = [
-        span
-        for span in spans
-        if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
-    ]
-    assert len(invoke_spans) == 1
+        assert result is not None
 
-    invoke_agent_span = invoke_spans[0]
+        spans = [item.payload for item in items if item.type == "span"]
+        invoke_spans = [
+            span
+            for span in spans
+            if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
+        ]
+
+        assert len(invoke_spans) == 1
+
+        invoke_agent_span = invoke_spans[0]
+
+        # Verify invoke_agent span has usage data
+        assert invoke_agent_span["name"] == "invoke_agent test_graph"
+        assert "gen_ai.usage.input_tokens" in invoke_agent_span["attributes"]
+        assert "gen_ai.usage.output_tokens" in invoke_agent_span["attributes"]
+        assert "gen_ai.usage.total_tokens" in invoke_agent_span["attributes"]
+
+        # The usage should match the mock_usage values (aggregated across all calls)
+        assert invoke_agent_span["attributes"]["gen_ai.usage.input_tokens"] == 10
+        assert invoke_agent_span["attributes"]["gen_ai.usage.output_tokens"] == 20
+        assert invoke_agent_span["attributes"]["gen_ai.usage.total_tokens"] == 30
+    else:
+        events = capture_events()
+
+        with start_transaction():
+            wrapped_invoke = _wrap_pregel_invoke(original_invoke)
+            result = wrapped_invoke(pregel, test_state)
+
+        assert result is not None
+
+        tx = events[0]
+        assert tx["type"] == "transaction"
+
+        invoke_spans = [
+            span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT
+        ]
+
+        assert len(invoke_spans) == 1
+
+        invoke_agent_span = invoke_spans[0]
 
-    # Verify invoke_agent span has usage data
-    assert invoke_agent_span["name"] == "invoke_agent test_graph"
-    assert "gen_ai.usage.input_tokens" in invoke_agent_span["attributes"]
-    assert "gen_ai.usage.output_tokens" in invoke_agent_span["attributes"]
-    assert "gen_ai.usage.total_tokens" in invoke_agent_span["attributes"]
+        # Verify invoke_agent span has usage data
+        assert invoke_agent_span["description"] == "invoke_agent test_graph"
+        assert "gen_ai.usage.input_tokens" in invoke_agent_span["data"]
+        assert "gen_ai.usage.output_tokens" in invoke_agent_span["data"]
+        assert "gen_ai.usage.total_tokens" in invoke_agent_span["data"]
 
-    # The usage should match the mock_usage values (aggregated across all calls)
-    assert invoke_agent_span["attributes"]["gen_ai.usage.input_tokens"] == 10
-    assert invoke_agent_span["attributes"]["gen_ai.usage.output_tokens"] == 20
-    assert invoke_agent_span["attributes"]["gen_ai.usage.total_tokens"] == 30
+        # The usage should match the mock_usage values (aggregated across all calls)
+        assert invoke_agent_span["data"]["gen_ai.usage.input_tokens"] == 10
+        assert invoke_agent_span["data"]["gen_ai.usage.output_tokens"] == 20
+        assert invoke_agent_span["data"]["gen_ai.usage.total_tokens"] == 30
 
 
-def test_pregel_ainvoke_span_includes_usage_data(sentry_init, capture_items):
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_pregel_ainvoke_span_includes_usage_data(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     """
     Test that invoke_agent spans include aggregated usage data from context_wrapper.
     This verifies the new functionality added to track token usage in invoke_agent spans.
@@ -605,8 +972,8 @@ def test_pregel_ainvoke_span_includes_usage_data(sentry_init, capture_items):
     sentry_init(
         integrations=[LanggraphIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     test_state = {
         "messages": [
@@ -651,32 +1018,69 @@ async def run_test():
             result = await wrapped_ainvoke(pregel, test_state)
             return result
 
-    result = asyncio.run(run_test())
-    assert result is not None
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
 
-    spans = [item.payload for item in items if item.type == "span"]
-    invoke_spans = [
-        span
-        for span in spans
-        if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
-    ]
-    assert len(invoke_spans) == 1
+        result = asyncio.run(run_test())
+        assert result is not None
 
-    invoke_agent_span = invoke_spans[0]
+        spans = [item.payload for item in items if item.type == "span"]
+        invoke_spans = [
+            span
+            for span in spans
+            if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
+        ]
+
+        assert len(invoke_spans) == 1
+
+        invoke_agent_span = invoke_spans[0]
 
-    # Verify invoke_agent span has usage data
-    assert invoke_agent_span["name"] == "invoke_agent test_graph"
-    assert "gen_ai.usage.input_tokens" in invoke_agent_span["attributes"]
-    assert "gen_ai.usage.output_tokens" in invoke_agent_span["attributes"]
-    assert "gen_ai.usage.total_tokens" in invoke_agent_span["attributes"]
+        # Verify invoke_agent span has usage data
+        assert invoke_agent_span["name"] == "invoke_agent test_graph"
+        assert "gen_ai.usage.input_tokens" in invoke_agent_span["attributes"]
+        assert "gen_ai.usage.output_tokens" in invoke_agent_span["attributes"]
+        assert "gen_ai.usage.total_tokens" in invoke_agent_span["attributes"]
+
+        # The usage should match the mock_usage values (aggregated across all calls)
+        assert invoke_agent_span["attributes"]["gen_ai.usage.input_tokens"] == 10
+        assert invoke_agent_span["attributes"]["gen_ai.usage.output_tokens"] == 20
+        assert invoke_agent_span["attributes"]["gen_ai.usage.total_tokens"] == 30
+    else:
+        events = capture_events()
 
-    # The usage should match the mock_usage values (aggregated across all calls)
-    assert invoke_agent_span["attributes"]["gen_ai.usage.input_tokens"] == 10
-    assert invoke_agent_span["attributes"]["gen_ai.usage.output_tokens"] == 20
-    assert invoke_agent_span["attributes"]["gen_ai.usage.total_tokens"] == 30
+        result = asyncio.run(run_test())
+        assert result is not None
 
+        tx = events[0]
+        assert tx["type"] == "transaction"
 
-def test_pregel_invoke_multiple_llm_calls_aggregate_usage(sentry_init, capture_items):
+        invoke_spans = [
+            span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT
+        ]
+
+        assert len(invoke_spans) == 1
+
+        invoke_agent_span = invoke_spans[0]
+
+        # Verify invoke_agent span has usage data
+        assert invoke_agent_span["description"] == "invoke_agent test_graph"
+        assert "gen_ai.usage.input_tokens" in invoke_agent_span["data"]
+        assert "gen_ai.usage.output_tokens" in invoke_agent_span["data"]
+        assert "gen_ai.usage.total_tokens" in invoke_agent_span["data"]
+
+        # The usage should match the mock_usage values (aggregated across all calls)
+        assert invoke_agent_span["data"]["gen_ai.usage.input_tokens"] == 10
+        assert invoke_agent_span["data"]["gen_ai.usage.output_tokens"] == 20
+        assert invoke_agent_span["data"]["gen_ai.usage.total_tokens"] == 30
+
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_pregel_invoke_multiple_llm_calls_aggregate_usage(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     """
     Test that invoke_agent spans show aggregated usage across multiple LLM calls
     (e.g., when tools are used and multiple API calls are made).
@@ -684,8 +1088,8 @@ def test_pregel_invoke_multiple_llm_calls_aggregate_usage(sentry_init, capture_i
     sentry_init(
         integrations=[LanggraphIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     test_state = {
         "messages": [
@@ -735,29 +1139,63 @@ def original_invoke(self, *args, **kwargs):
         ]
         return {"messages": new_messages}
 
-    with start_transaction():
-        wrapped_invoke = _wrap_pregel_invoke(original_invoke)
-        result = wrapped_invoke(pregel, test_state)
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
 
-    assert result is not None
+        with start_transaction():
+            wrapped_invoke = _wrap_pregel_invoke(original_invoke)
+            result = wrapped_invoke(pregel, test_state)
 
-    spans = [item.payload for item in items if item.type == "span"]
-    invoke_spans = [
-        span
-        for span in spans
-        if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
-    ]
-    assert len(invoke_spans) == 1
-    invoke_agent_span = invoke_spans[0]
+        assert result is not None
+
+        spans = [item.payload for item in items if item.type == "span"]
+        invoke_spans = [
+            span
+            for span in spans
+            if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
+        ]
+
+        assert len(invoke_spans) == 1
+        invoke_agent_span = invoke_spans[0]
+
+        # Verify invoke_agent span has aggregated usage from both API calls
+        # Total: 10 + 20 = 30 input tokens, 5 + 15 = 20 output tokens, 15 + 35 = 50 total
+        assert invoke_agent_span["attributes"]["gen_ai.usage.input_tokens"] == 30
+        assert invoke_agent_span["attributes"]["gen_ai.usage.output_tokens"] == 20
+        assert invoke_agent_span["attributes"]["gen_ai.usage.total_tokens"] == 50
+    else:
+        events = capture_events()
+
+        with start_transaction():
+            wrapped_invoke = _wrap_pregel_invoke(original_invoke)
+            result = wrapped_invoke(pregel, test_state)
+
+        assert result is not None
+
+        tx = events[0]
+        assert tx["type"] == "transaction"
+
+        invoke_spans = [
+            span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT
+        ]
 
-    # Verify invoke_agent span has aggregated usage from both API calls
-    # Total: 10 + 20 = 30 input tokens, 5 + 15 = 20 output tokens, 15 + 35 = 50 total
-    assert invoke_agent_span["attributes"]["gen_ai.usage.input_tokens"] == 30
-    assert invoke_agent_span["attributes"]["gen_ai.usage.output_tokens"] == 20
-    assert invoke_agent_span["attributes"]["gen_ai.usage.total_tokens"] == 50
+        assert len(invoke_spans) == 1
+        invoke_agent_span = invoke_spans[0]
 
+        # Verify invoke_agent span has aggregated usage from both API calls
+        # Total: 10 + 20 = 30 input tokens, 5 + 15 = 20 output tokens, 15 + 35 = 50 total
+        assert invoke_agent_span["data"]["gen_ai.usage.input_tokens"] == 30
+        assert invoke_agent_span["data"]["gen_ai.usage.output_tokens"] == 20
+        assert invoke_agent_span["data"]["gen_ai.usage.total_tokens"] == 50
 
-def test_pregel_ainvoke_multiple_llm_calls_aggregate_usage(sentry_init, capture_items):
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_pregel_ainvoke_multiple_llm_calls_aggregate_usage(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     """
     Test that invoke_agent spans show aggregated usage across multiple LLM calls
     (e.g., when tools are used and multiple API calls are made).
@@ -765,8 +1203,8 @@ def test_pregel_ainvoke_multiple_llm_calls_aggregate_usage(sentry_init, capture_
     sentry_init(
         integrations=[LanggraphIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     test_state = {
         "messages": [
@@ -822,26 +1260,57 @@ async def run_test():
             result = await wrapped_ainvoke(pregel, test_state)
             return result
 
-    result = asyncio.run(run_test())
-    assert result is not None
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
 
-    spans = [item.payload for item in items if item.type == "span"]
-    invoke_spans = [
-        span
-        for span in spans
-        if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
-    ]
-    assert len(invoke_spans) == 1
-    invoke_agent_span = invoke_spans[0]
+        result = asyncio.run(run_test())
+        assert result is not None
+
+        spans = [item.payload for item in items if item.type == "span"]
+        invoke_spans = [
+            span
+            for span in spans
+            if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
+        ]
+
+        assert len(invoke_spans) == 1
+        invoke_agent_span = invoke_spans[0]
+
+        # Verify invoke_agent span has aggregated usage from both API calls
+        # Total: 10 + 20 = 30 input tokens, 5 + 15 = 20 output tokens, 15 + 35 = 50 total
+        assert invoke_agent_span["attributes"]["gen_ai.usage.input_tokens"] == 30
+        assert invoke_agent_span["attributes"]["gen_ai.usage.output_tokens"] == 20
+        assert invoke_agent_span["attributes"]["gen_ai.usage.total_tokens"] == 50
+    else:
+        events = capture_events()
+
+        result = asyncio.run(run_test())
+        assert result is not None
+
+        tx = events[0]
+        assert tx["type"] == "transaction"
 
-    # Verify invoke_agent span has aggregated usage from both API calls
-    # Total: 10 + 20 = 30 input tokens, 5 + 15 = 20 output tokens, 15 + 35 = 50 total
-    assert invoke_agent_span["attributes"]["gen_ai.usage.input_tokens"] == 30
-    assert invoke_agent_span["attributes"]["gen_ai.usage.output_tokens"] == 20
-    assert invoke_agent_span["attributes"]["gen_ai.usage.total_tokens"] == 50
+        invoke_spans = [
+            span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT
+        ]
+
+        assert len(invoke_spans) == 1
+        invoke_agent_span = invoke_spans[0]
+
+        # Verify invoke_agent span has aggregated usage from both API calls
+        # Total: 10 + 20 = 30 input tokens, 5 + 15 = 20 output tokens, 15 + 35 = 50 total
+        assert invoke_agent_span["data"]["gen_ai.usage.input_tokens"] == 30
+        assert invoke_agent_span["data"]["gen_ai.usage.output_tokens"] == 20
+        assert invoke_agent_span["data"]["gen_ai.usage.total_tokens"] == 50
 
 
-def test_pregel_invoke_span_includes_response_model(sentry_init, capture_items):
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_pregel_invoke_span_includes_response_model(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     """
     Test that invoke_agent spans include the response model.
     When an agent makes multiple LLM calls, it should report the last model used.
@@ -849,8 +1318,8 @@ def test_pregel_invoke_span_includes_response_model(sentry_init, capture_items):
     sentry_init(
         integrations=[LanggraphIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     test_state = {
         "messages": [
@@ -889,31 +1358,68 @@ def original_invoke(self, *args, **kwargs):
         ]
         return {"messages": new_messages}
 
-    with start_transaction():
-        wrapped_invoke = _wrap_pregel_invoke(original_invoke)
-        result = wrapped_invoke(pregel, test_state)
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
 
-    assert result is not None
+        with start_transaction():
+            wrapped_invoke = _wrap_pregel_invoke(original_invoke)
+            result = wrapped_invoke(pregel, test_state)
 
-    spans = [item.payload for item in items if item.type == "span"]
-    invoke_spans = [
-        span
-        for span in spans
-        if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
-    ]
-    assert len(invoke_spans) == 1
+        assert result is not None
 
-    invoke_agent_span = invoke_spans[0]
+        spans = [item.payload for item in items if item.type == "span"]
+        invoke_spans = [
+            span
+            for span in spans
+            if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
+        ]
 
-    # Verify invoke_agent span has response model
-    assert invoke_agent_span["name"] == "invoke_agent test_graph"
-    assert "gen_ai.response.model" in invoke_agent_span["attributes"]
-    assert (
-        invoke_agent_span["attributes"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14"
-    )
+        assert len(invoke_spans) == 1
+
+        invoke_agent_span = invoke_spans[0]
+
+        # Verify invoke_agent span has response model
+        assert invoke_agent_span["name"] == "invoke_agent test_graph"
+        assert "gen_ai.response.model" in invoke_agent_span["attributes"]
+        assert (
+            invoke_agent_span["attributes"]["gen_ai.response.model"]
+            == "gpt-4.1-2025-04-14"
+        )
+    else:
+        events = capture_events()
+
+        with start_transaction():
+            wrapped_invoke = _wrap_pregel_invoke(original_invoke)
+            result = wrapped_invoke(pregel, test_state)
+
+        assert result is not None
+
+        tx = events[0]
+        assert tx["type"] == "transaction"
+
+        invoke_spans = [
+            span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT
+        ]
+
+        assert len(invoke_spans) == 1
+
+        invoke_agent_span = invoke_spans[0]
+
+        # Verify invoke_agent span has response model
+        assert invoke_agent_span["description"] == "invoke_agent test_graph"
+        assert "gen_ai.response.model" in invoke_agent_span["data"]
+        assert (
+            invoke_agent_span["data"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14"
+        )
 
 
-def test_pregel_ainvoke_span_includes_response_model(sentry_init, capture_items):
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_pregel_ainvoke_span_includes_response_model(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     """
     Test that invoke_agent spans include the response model.
     When an agent makes multiple LLM calls, it should report the last model used.
@@ -921,8 +1427,8 @@ def test_pregel_ainvoke_span_includes_response_model(sentry_init, capture_items)
     sentry_init(
         integrations=[LanggraphIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     test_state = {
         "messages": [
@@ -967,28 +1473,62 @@ async def run_test():
             result = await wrapped_ainvoke(pregel, test_state)
             return result
 
-    result = asyncio.run(run_test())
-    assert result is not None
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
 
-    spans = [item.payload for item in items if item.type == "span"]
-    invoke_spans = [
-        span
-        for span in spans
-        if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
-    ]
-    assert len(invoke_spans) == 1
+        result = asyncio.run(run_test())
+        assert result is not None
 
-    invoke_agent_span = invoke_spans[0]
+        spans = [item.payload for item in items if item.type == "span"]
+        invoke_spans = [
+            span
+            for span in spans
+            if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
+        ]
 
-    # Verify invoke_agent span has response model
-    assert invoke_agent_span["name"] == "invoke_agent test_graph"
-    assert "gen_ai.response.model" in invoke_agent_span["attributes"]
-    assert (
-        invoke_agent_span["attributes"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14"
-    )
+        assert len(invoke_spans) == 1
+
+        invoke_agent_span = invoke_spans[0]
+
+        # Verify invoke_agent span has response model
+        assert invoke_agent_span["name"] == "invoke_agent test_graph"
+        assert "gen_ai.response.model" in invoke_agent_span["attributes"]
+        assert (
+            invoke_agent_span["attributes"]["gen_ai.response.model"]
+            == "gpt-4.1-2025-04-14"
+        )
+    else:
+        events = capture_events()
+
+        result = asyncio.run(run_test())
+        assert result is not None
 
+        tx = events[0]
+        assert tx["type"] == "transaction"
 
-def test_pregel_invoke_span_uses_last_response_model(sentry_init, capture_items):
+        invoke_spans = [
+            span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT
+        ]
+
+        assert len(invoke_spans) == 1
+
+        invoke_agent_span = invoke_spans[0]
+
+        # Verify invoke_agent span has response model
+        assert invoke_agent_span["description"] == "invoke_agent test_graph"
+        assert "gen_ai.response.model" in invoke_agent_span["data"]
+        assert (
+            invoke_agent_span["data"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14"
+        )
+
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_pregel_invoke_span_uses_last_response_model(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     """
     Test that when an agent makes multiple LLM calls (e.g., with tools),
     the invoke_agent span reports the last response model used.
@@ -996,8 +1536,8 @@ def test_pregel_invoke_span_uses_last_response_model(sentry_init, capture_items)
     sentry_init(
         integrations=[LanggraphIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     test_state = {
         "messages": [
@@ -1049,30 +1589,66 @@ def original_invoke(self, *args, **kwargs):
         ]
         return {"messages": new_messages}
 
-    with start_transaction():
-        wrapped_invoke = _wrap_pregel_invoke(original_invoke)
-        result = wrapped_invoke(pregel, test_state)
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
 
-    assert result is not None
+        with start_transaction():
+            wrapped_invoke = _wrap_pregel_invoke(original_invoke)
+            result = wrapped_invoke(pregel, test_state)
 
-    spans = [item.payload for item in items if item.type == "span"]
-    invoke_spans = [
-        span
-        for span in spans
-        if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
-    ]
-    assert len(invoke_spans) == 1
+        assert result is not None
 
-    invoke_agent_span = invoke_spans[0]
+        spans = [item.payload for item in items if item.type == "span"]
+        invoke_spans = [
+            span
+            for span in spans
+            if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
+        ]
 
-    # Verify invoke_agent span uses the LAST response model
-    assert "gen_ai.response.model" in invoke_agent_span["attributes"]
-    assert (
-        invoke_agent_span["attributes"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14"
-    )
+        assert len(invoke_spans) == 1
+
+        invoke_agent_span = invoke_spans[0]
+
+        # Verify invoke_agent span uses the LAST response model
+        assert "gen_ai.response.model" in invoke_agent_span["attributes"]
+        assert (
+            invoke_agent_span["attributes"]["gen_ai.response.model"]
+            == "gpt-4.1-2025-04-14"
+        )
+    else:
+        events = capture_events()
+
+        with start_transaction():
+            wrapped_invoke = _wrap_pregel_invoke(original_invoke)
+            result = wrapped_invoke(pregel, test_state)
+
+        assert result is not None
+
+        tx = events[0]
+        assert tx["type"] == "transaction"
+
+        invoke_spans = [
+            span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT
+        ]
+
+        assert len(invoke_spans) == 1
+
+        invoke_agent_span = invoke_spans[0]
+
+        # Verify invoke_agent span uses the LAST response model
+        assert "gen_ai.response.model" in invoke_agent_span["data"]
+        assert (
+            invoke_agent_span["data"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14"
+        )
 
 
-def test_pregel_ainvoke_span_uses_last_response_model(sentry_init, capture_items):
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_pregel_ainvoke_span_uses_last_response_model(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     """
     Test that when an agent makes multiple LLM calls (e.g., with tools),
     the invoke_agent span reports the last response model used.
@@ -1080,8 +1656,8 @@ def test_pregel_ainvoke_span_uses_last_response_model(sentry_init, capture_items
     sentry_init(
         integrations=[LanggraphIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     test_state = {
         "messages": [
@@ -1139,24 +1715,51 @@ async def run_test():
             result = await wrapped_ainvoke(pregel, test_state)
             return result
 
-    result = asyncio.run(run_test())
-    assert result is not None
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
 
-    spans = [item.payload for item in items if item.type == "span"]
-    invoke_spans = [
-        span
-        for span in spans
-        if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
-    ]
-    assert len(invoke_spans) == 1
+        result = asyncio.run(run_test())
+        assert result is not None
 
-    invoke_agent_span = invoke_spans[0]
+        spans = [item.payload for item in items if item.type == "span"]
+        invoke_spans = [
+            span
+            for span in spans
+            if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
+        ]
 
-    # Verify invoke_agent span uses the LAST response model
-    assert "gen_ai.response.model" in invoke_agent_span["attributes"]
-    assert (
-        invoke_agent_span["attributes"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14"
-    )
+        assert len(invoke_spans) == 1
+
+        invoke_agent_span = invoke_spans[0]
+
+        # Verify invoke_agent span uses the LAST response model
+        assert "gen_ai.response.model" in invoke_agent_span["attributes"]
+        assert (
+            invoke_agent_span["attributes"]["gen_ai.response.model"]
+            == "gpt-4.1-2025-04-14"
+        )
+    else:
+        events = capture_events()
+
+        result = asyncio.run(run_test())
+        assert result is not None
+
+        tx = events[0]
+        assert tx["type"] == "transaction"
+
+        invoke_spans = [
+            span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT
+        ]
+
+        assert len(invoke_spans) == 1
+
+        invoke_agent_span = invoke_spans[0]
+
+        # Verify invoke_agent span uses the LAST response model
+        assert "gen_ai.response.model" in invoke_agent_span["data"]
+        assert (
+            invoke_agent_span["data"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14"
+        )
 
 
 def test_complex_message_parsing():
@@ -1206,14 +1809,20 @@ def test_complex_message_parsing():
     assert result[2]["function_call"]["name"] == "search"
 
 
-def test_extraction_functions_complex_scenario(sentry_init, capture_items):
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_extraction_functions_complex_scenario(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     """Test extraction functions with complex scenarios including multiple messages and edge cases."""
     sentry_init(
         integrations=[LanggraphIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     pregel = MockPregelInstance("complex_graph")
     test_state = {"messages": [MockMessage("Complex request", name="user")]}
@@ -1248,29 +1857,59 @@ def original_invoke(self, *args, **kwargs):
         ]
         return {"messages": new_messages}
 
-    with start_transaction():
-        wrapped_invoke = _wrap_pregel_invoke(original_invoke)
-        result = wrapped_invoke(pregel, test_state)
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
 
-    assert result is not None
+        with start_transaction():
+            wrapped_invoke = _wrap_pregel_invoke(original_invoke)
+            result = wrapped_invoke(pregel, test_state)
 
-    spans = [item.payload for item in items if item.type == "span"]
-    invoke_spans = [
-        span
-        for span in spans
-        if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
-    ]
-    assert len(invoke_spans) == 1
+        assert result is not None
 
-    invoke_span = invoke_spans[0]
-    assert SPANDATA.GEN_AI_RESPONSE_TEXT in invoke_span["attributes"]
-    response_text = invoke_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
-    assert response_text == "Final response"
+        spans = [item.payload for item in items if item.type == "span"]
+        invoke_spans = [
+            span
+            for span in spans
+            if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
+        ]
 
-    assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in invoke_span["attributes"]
-    import json
+        assert len(invoke_spans) == 1
+
+        invoke_span = invoke_spans[0]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT in invoke_span["attributes"]
+        response_text = invoke_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+        assert response_text == "Final response"
+
+        assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in invoke_span["attributes"]
+        import json
+
+        tool_calls_data = invoke_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS]
+    else:
+        events = capture_events()
+
+        with start_transaction():
+            wrapped_invoke = _wrap_pregel_invoke(original_invoke)
+            result = wrapped_invoke(pregel, test_state)
+
+        assert result is not None
+
+        tx = events[0]
+        invoke_spans = [
+            span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT
+        ]
+
+        assert len(invoke_spans) == 1
+
+        invoke_span = invoke_spans[0]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT in invoke_span["data"]
+        response_text = invoke_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+        assert response_text == "Final response"
+
+        assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in invoke_span["data"]
+        import json
+
+        tool_calls_data = invoke_span["data"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS]
 
-    tool_calls_data = invoke_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS]
     if isinstance(tool_calls_data, str):
         tool_calls_data = json.loads(tool_calls_data)
 
@@ -1281,14 +1920,20 @@ def original_invoke(self, *args, **kwargs):
     assert tool_calls_data[1]["function"]["name"] == "calculate"
 
 
-def test_langgraph_message_role_mapping(sentry_init, capture_items):
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_langgraph_message_role_mapping(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     """Test that Langgraph integration properly maps message roles like 'ai' to 'assistant'"""
     sentry_init(
         integrations=[LanggraphIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     # Mock a langgraph message with mixed roles
     class MockMessage:
@@ -1309,51 +1954,83 @@ def __init__(self, content, message_type="human"):
     compiled_graph = MockCompiledGraph("test_graph")
     pregel = MockPregelInstance(compiled_graph)
 
-    with start_transaction(name="langgraph tx"):
-        # Use the wrapped invoke function directly
-        from sentry_sdk.integrations.langgraph import _wrap_pregel_invoke
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
 
-        wrapped_invoke = _wrap_pregel_invoke(
-            lambda self, state_data: {"result": "success"}
-        )
-        wrapped_invoke(pregel, state_data)
+        with start_transaction(name="langgraph tx"):
+            # Use the wrapped invoke function directly
+            from sentry_sdk.integrations.langgraph import _wrap_pregel_invoke
 
-    span = next(item.payload for item in items if item.type == "span")
+            wrapped_invoke = _wrap_pregel_invoke(
+                lambda self, state_data: {"result": "success"}
+            )
+            wrapped_invoke(pregel, state_data)
 
-    # Verify that the span was created correctly
-    assert span["attributes"]["sentry.op"] == "gen_ai.invoke_agent"
+        span = next(item.payload for item in items if item.type == "span")
 
-    # If messages were captured, verify role mapping
-    if SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]:
-        import json
+        # Verify that the span was created correctly
+        assert span["attributes"]["sentry.op"] == "gen_ai.invoke_agent"
 
-        stored_messages = json.loads(
-            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-        )
+        # If messages were captured, verify role mapping
+        if SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]:
+            import json
 
-        # Find messages with specific content to verify role mapping
-        ai_message = next(
-            (msg for msg in stored_messages if msg.get("content") == "Hi there!"), None
-        )
-        assistant_message = next(
-            (msg for msg in stored_messages if msg.get("content") == "How can I help?"),
-            None,
-        )
+            stored_messages = json.loads(
+                span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+            )
+    else:
+        events = capture_events()
+
+        with start_transaction(name="langgraph tx"):
+            # Use the wrapped invoke function directly
+            from sentry_sdk.integrations.langgraph import _wrap_pregel_invoke
+
+            wrapped_invoke = _wrap_pregel_invoke(
+                lambda self, state_data: {"result": "success"}
+            )
+            wrapped_invoke(pregel, state_data)
+
+        (event,) = events
+        span = event["spans"][0]
+
+        # Verify that the span was created correctly
+        assert span["op"] == "gen_ai.invoke_agent"
+
+        # If messages were captured, verify role mapping
+        if SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]:
+            import json
+
+            stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+
+    # Find messages with specific content to verify role mapping
+    ai_message = next(
+        (msg for msg in stored_messages if msg.get("content") == "Hi there!"), None
+    )
+    assistant_message = next(
+        (msg for msg in stored_messages if msg.get("content") == "How can I help?"),
+        None,
+    )
 
-        if ai_message:
-            # "ai" should have been mapped to "assistant"
-            assert ai_message["role"] == "assistant"
+    if ai_message:
+        # "ai" should have been mapped to "assistant"
+        assert ai_message["role"] == "assistant"
 
-        if assistant_message:
-            # "assistant" should stay "assistant"
-            assert assistant_message["role"] == "assistant"
+    if assistant_message:
+        # "assistant" should stay "assistant"
+        assert assistant_message["role"] == "assistant"
 
-        # Verify no "ai" roles remain
-        roles = [msg["role"] for msg in stored_messages if "role" in msg]
-        assert "ai" not in roles
+    # Verify no "ai" roles remain
+    roles = [msg["role"] for msg in stored_messages if "role" in msg]
+    assert "ai" not in roles
 
 
-def test_langgraph_message_truncation(sentry_init, capture_items):
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_langgraph_message_truncation(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     """Test that large messages are truncated properly in Langgraph integration."""
     import json
 
@@ -1361,8 +2038,8 @@ def test_langgraph_message_truncation(sentry_init, capture_items):
         integrations=[LanggraphIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     large_content = (
         "This is a very long message that will exceed our size limits. " * 1000
@@ -1382,30 +2059,66 @@ def test_langgraph_message_truncation(sentry_init, capture_items):
     def original_invoke(self, *args, **kwargs):
         return {"messages": args[0].get("messages", [])}
 
-    with start_transaction():
-        wrapped_invoke = _wrap_pregel_invoke(original_invoke)
-        result = wrapped_invoke(pregel, test_state)
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
 
-    assert result is not None
+        with start_transaction():
+            wrapped_invoke = _wrap_pregel_invoke(original_invoke)
+            result = wrapped_invoke(pregel, test_state)
 
-    spans = [item.payload for item in items if item.type == "span"]
-    invoke_spans = [
-        span
-        for span in spans
-        if span["attributes"].get("sentry.op") == OP.GEN_AI_INVOKE_AGENT
-    ]
-    assert len(invoke_spans) > 0
+        assert result is not None
+
+        spans = [item.payload for item in items if item.type == "span"]
+        invoke_spans = [
+            span
+            for span in spans
+            if span["attributes"].get("sentry.op") == OP.GEN_AI_INVOKE_AGENT
+        ]
+
+        assert len(invoke_spans) > 0
+
+        invoke_span = invoke_spans[0]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["attributes"]
+
+        messages_data = invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+
+        assert isinstance(messages_data, str)
+
+        parsed_messages = json.loads(messages_data)
+        assert isinstance(parsed_messages, list)
+        assert len(parsed_messages) == 1
+        assert "small message 5" in str(parsed_messages[0])
+        (tx,) = (item.payload for item in items if item.type == "transaction")
+    else:
+        events = capture_events()
+
+        with start_transaction():
+            wrapped_invoke = _wrap_pregel_invoke(original_invoke)
+            result = wrapped_invoke(pregel, test_state)
+
+        assert result is not None
+
+        assert len(events) > 0
+        tx = events[0]
+        assert tx["type"] == "transaction"
+
+        invoke_spans = [
+            span
+            for span in tx.get("spans", [])
+            if span.get("op") == OP.GEN_AI_INVOKE_AGENT
+        ]
+
+        assert len(invoke_spans) > 0
 
-    invoke_span = invoke_spans[0]
-    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["attributes"]
+        invoke_span = invoke_spans[0]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["data"]
 
-    messages_data = invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-    assert isinstance(messages_data, str)
+        messages_data = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        assert isinstance(messages_data, str)
 
-    parsed_messages = json.loads(messages_data)
-    assert isinstance(parsed_messages, list)
-    assert len(parsed_messages) == 1
-    assert "small message 5" in str(parsed_messages[0])
+        parsed_messages = json.loads(messages_data)
+        assert isinstance(parsed_messages, list)
+        assert len(parsed_messages) == 1
+        assert "small message 5" in str(parsed_messages[0])
 
-    (tx,) = (item.payload for item in items if item.type == "transaction")
     assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5
diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py
index eb00f7838a..ab60779ed6 100644
--- a/tests/integrations/litellm/test_litellm.py
+++ b/tests/integrations/litellm/test_litellm.py
@@ -131,6 +131,7 @@ def __init__(
         self.created = 1234567890
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.parametrize(
     "send_default_pii, include_prompts",
     [
@@ -143,18 +144,20 @@ def __init__(
 def test_nonstreaming_chat_completion(
     reset_litellm_executor,
     sentry_init,
+    capture_events,
     capture_items,
     send_default_pii,
     include_prompts,
     get_model_response,
     nonstreaming_chat_completions_model_response,
+    stream_gen_ai_spans,
 ):
     sentry_init(
         integrations=[LiteLLMIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     messages = [{"role": "user", "content": "Hello!"}]
 
@@ -176,12 +179,14 @@ def test_nonstreaming_chat_completion(
         request_headers={"X-Stainless-Raw-Response": "true"},
     )
 
-    with mock.patch.object(
-        client.completions._client._client,
-        "send",
-        return_value=model_response,
-    ):
-        with start_transaction(name="litellm test"):
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with mock.patch.object(
+            client.completions._client._client,
+            "send",
+            return_value=model_response,
+        ), start_transaction(name="litellm test"):
             litellm.completion(
                 model="gpt-3.5-turbo",
                 messages=messages,
@@ -190,38 +195,86 @@ def test_nonstreaming_chat_completion(
 
             litellm_utils.executor.shutdown(wait=True)
 
-    (event,) = (item.payload for item in items if item.type == "transaction")
-    assert event["transaction"] == "litellm test"
+        (event,) = (item.payload for item in items if item.type == "transaction")
+        assert event["transaction"] == "litellm test"
 
-    spans = [item.payload for item in items if item.type == "span"]
-    chat_spans = list(
-        x
-        for x in spans
-        if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-        and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
-    )
-    assert len(chat_spans) == 1
-    span = chat_spans[0]
+        spans = [item.payload for item in items if item.type == "span"]
+        chat_spans = list(
+            x
+            for x in spans
+            if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+            and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
+        )
+        assert len(chat_spans) == 1
+        span = chat_spans[0]
+
+        assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        assert span["name"] == "chat gpt-3.5-turbo"
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "gpt-3.5-turbo"
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "gpt-3.5-turbo"
+        assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
+        assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+
+        if send_default_pii and include_prompts:
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT in span["attributes"]
+        else:
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
+
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
+    else:
+        events = capture_events()
 
-    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-    assert span["name"] == "chat gpt-3.5-turbo"
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "gpt-3.5-turbo"
-    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "gpt-3.5-turbo"
-    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
-    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+        with mock.patch.object(
+            client.completions._client._client,
+            "send",
+            return_value=model_response,
+        ), start_transaction(name="litellm test"):
+            litellm.completion(
+                model="gpt-3.5-turbo",
+                messages=messages,
+                client=client,
+            )
 
-    if send_default_pii and include_prompts:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT in span["attributes"]
-    else:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
+            litellm_utils.executor.shutdown(wait=True)
+
+        assert len(events) == 1
+        (event,) = events
 
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
+        assert event["type"] == "transaction"
+        assert event["transaction"] == "litellm test"
 
+        chat_spans = list(
+            x
+            for x in event["spans"]
+            if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm"
+        )
+        assert len(chat_spans) == 1
+        span = chat_spans[0]
+
+        assert span["op"] == OP.GEN_AI_CHAT
+        assert span["description"] == "chat gpt-3.5-turbo"
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "gpt-3.5-turbo"
+        assert span["data"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "gpt-3.5-turbo"
+        assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai"
+        assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+
+        if send_default_pii and include_prompts:
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT in span["data"]
+        else:
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
 
+        assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+        assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
+        assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
+
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio(loop_scope="session")
 @pytest.mark.parametrize(
     "send_default_pii, include_prompts",
@@ -234,18 +287,20 @@ def test_nonstreaming_chat_completion(
 )
 async def test_async_nonstreaming_chat_completion(
     sentry_init,
+    capture_events,
     capture_items,
     send_default_pii,
     include_prompts,
     get_model_response,
     nonstreaming_chat_completions_model_response,
+    stream_gen_ai_spans,
 ):
     sentry_init(
         integrations=[LiteLLMIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     messages = [{"role": "user", "content": "Hello!"}]
 
@@ -267,53 +322,91 @@ async def test_async_nonstreaming_chat_completion(
         request_headers={"X-Stainless-Raw-Response": "true"},
     )
 
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+    else:
+        events = capture_events()
+
     with mock.patch.object(
         client.completions._client._client,
         "send",
         return_value=model_response,
-    ):
-        with start_transaction(name="litellm test"):
-            await litellm.acompletion(
-                model="gpt-3.5-turbo",
-                messages=messages,
-                client=client,
-            )
+    ), start_transaction(name="litellm test"):
+        await litellm.acompletion(
+            model="gpt-3.5-turbo",
+            messages=messages,
+            client=client,
+        )
 
-            await GLOBAL_LOGGING_WORKER.flush()
-            await asyncio.sleep(0.5)
+        await GLOBAL_LOGGING_WORKER.flush()
+        await asyncio.sleep(0.5)
 
-    (event,) = (item.payload for item in items if item.type == "transaction")
-    assert event["transaction"] == "litellm test"
+    if stream_gen_ai_spans:
+        (event,) = (item.payload for item in items if item.type == "transaction")
+        assert event["transaction"] == "litellm test"
 
-    spans = [item.payload for item in items if item.type == "span"]
-    chat_spans = list(
-        x
-        for x in spans
-        if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-        and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
-    )
-    assert len(chat_spans) == 1
-    span = chat_spans[0]
+        spans = [item.payload for item in items if item.type == "span"]
+        chat_spans = list(
+            x
+            for x in spans
+            if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+            and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
+        )
+        assert len(chat_spans) == 1
+        span = chat_spans[0]
+
+        assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        assert span["name"] == "chat gpt-3.5-turbo"
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "gpt-3.5-turbo"
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "gpt-3.5-turbo"
+        assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
+        assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+
+        if send_default_pii and include_prompts:
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT in span["attributes"]
+        else:
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
+
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
+        assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
+    else:
+        assert len(events) == 1
+        (event,) = events
 
-    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-    assert span["name"] == "chat gpt-3.5-turbo"
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "gpt-3.5-turbo"
-    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "gpt-3.5-turbo"
-    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
-    assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+        assert event["type"] == "transaction"
+        assert event["transaction"] == "litellm test"
 
-    if send_default_pii and include_prompts:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT in span["attributes"]
-    else:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
+        chat_spans = list(
+            x
+            for x in event["spans"]
+            if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm"
+        )
+        assert len(chat_spans) == 1
+        span = chat_spans[0]
+
+        assert span["op"] == OP.GEN_AI_CHAT
+        assert span["description"] == "chat gpt-3.5-turbo"
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "gpt-3.5-turbo"
+        assert span["data"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "gpt-3.5-turbo"
+        assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai"
+        assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+
+        if send_default_pii and include_prompts:
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT in span["data"]
+        else:
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
 
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
-    assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
+        assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+        assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
+        assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.parametrize(
     "send_default_pii, include_prompts",
     [
@@ -326,19 +419,21 @@ async def test_async_nonstreaming_chat_completion(
 def test_streaming_chat_completion(
     reset_litellm_executor,
     sentry_init,
+    capture_events,
     capture_items,
     send_default_pii,
     include_prompts,
     get_model_response,
     server_side_event_chunks,
     streaming_chat_completions_model_response,
+    stream_gen_ai_spans,
 ):
     sentry_init(
         integrations=[LiteLLMIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
 
     messages = [{"role": "user", "content": "Hello!"}]
 
@@ -352,12 +447,14 @@ def test_streaming_chat_completion(
         request_headers={"X-Stainless-Raw-Response": "true"},
     )
 
-    with mock.patch.object(
-        client.completions._client._client,
-        "send",
-        return_value=model_response,
-    ):
-        with start_transaction(name="litellm test"):
+    if stream_gen_ai_spans:
+        items = capture_items("span")
+
+        with mock.patch.object(
+            client.completions._client._client,
+            "send",
+            return_value=model_response,
+        ), start_transaction(name="litellm test"):
             response = litellm.completion(
                 model="gpt-3.5-turbo",
                 messages=messages,
@@ -369,20 +466,54 @@ def test_streaming_chat_completion(
 
             streaming_handler.executor.shutdown(wait=True)
 
-    spans = [item.payload for item in items if item.type == "span"]
-    chat_spans = list(
-        x
-        for x in spans
-        if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-        and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
-    )
-    assert len(chat_spans) == 1
-    span = chat_spans[0]
+        spans = [item.payload for item in items if item.type == "span"]
+        chat_spans = list(
+            x
+            for x in spans
+            if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+            and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
+        )
+        assert len(chat_spans) == 1
+        span = chat_spans[0]
+
+        assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+    else:
+        events = capture_events()
+
+        with mock.patch.object(
+            client.completions._client._client,
+            "send",
+            return_value=model_response,
+        ), start_transaction(name="litellm test"):
+            response = litellm.completion(
+                model="gpt-3.5-turbo",
+                messages=messages,
+                client=client,
+                stream=True,
+            )
+            for _ in response:
+                pass
+
+            streaming_handler.executor.shutdown(wait=True)
+
+        assert len(events) == 1
+        (event,) = events
+
+        assert event["type"] == "transaction"
+        chat_spans = list(
+            x
+            for x in event["spans"]
+            if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm"
+        )
+        assert len(chat_spans) == 1
+        span = chat_spans[0]
 
-    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+        assert span["op"] == OP.GEN_AI_CHAT
+        assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio(loop_scope="session")
 @pytest.mark.parametrize(
     "send_default_pii, include_prompts",
@@ -395,6 +526,7 @@ def test_streaming_chat_completion(
 )
 async def test_async_streaming_chat_completion(
     sentry_init,
+    capture_events,
     capture_items,
     send_default_pii,
     include_prompts,
@@ -402,13 +534,14 @@ async def test_async_streaming_chat_completion(
     async_iterator,
     server_side_event_chunks,
     streaming_chat_completions_model_response,
+    stream_gen_ai_spans,
 ):
     sentry_init(
         integrations=[LiteLLMIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     messages = [{"role": "user", "content": "Hello!"}]
 
@@ -424,12 +557,14 @@ async def test_async_streaming_chat_completion(
         request_headers={"X-Stainless-Raw-Response": "true"},
     )
 
-    with mock.patch.object(
-        client.completions._client._client,
-        "send",
-        return_value=model_response,
-    ):
-        with start_transaction(name="litellm test"):
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with mock.patch.object(
+            client.completions._client._client,
+            "send",
+            return_value=model_response,
+        ), start_transaction(name="litellm test"):
             response = await litellm.acompletion(
                 model="gpt-3.5-turbo",
                 messages=messages,
@@ -442,26 +577,63 @@ async def test_async_streaming_chat_completion(
             await GLOBAL_LOGGING_WORKER.flush()
             await asyncio.sleep(0.5)
 
-    spans = [item.payload for item in items if item.type == "span"]
-    chat_spans = list(
-        x
-        for x in spans
-        if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-        and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
-    )
-    assert len(chat_spans) == 1
-    span = chat_spans[0]
+        spans = [item.payload for item in items if item.type == "span"]
+        chat_spans = list(
+            x
+            for x in spans
+            if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+            and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
+        )
+        assert len(chat_spans) == 1
+        span = chat_spans[0]
+
+        assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+    else:
+        events = capture_events()
+
+        with mock.patch.object(
+            client.completions._client._client,
+            "send",
+            return_value=model_response,
+        ), start_transaction(name="litellm test"):
+            response = await litellm.acompletion(
+                model="gpt-3.5-turbo",
+                messages=messages,
+                client=client,
+                stream=True,
+            )
+            async for _ in response:
+                pass
+
+            await GLOBAL_LOGGING_WORKER.flush()
+            await asyncio.sleep(0.5)
+
+        assert len(events) == 1
+        (event,) = events
+
+        assert event["type"] == "transaction"
+        chat_spans = list(
+            x
+            for x in event["spans"]
+            if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm"
+        )
+        assert len(chat_spans) == 1
+        span = chat_spans[0]
 
-    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+        assert span["op"] == OP.GEN_AI_CHAT
+        assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 def test_embeddings_create(
     sentry_init,
+    capture_events,
     capture_items,
     get_model_response,
     openai_embedding_model_response,
     clear_litellm_cache,
+    stream_gen_ai_spans,
 ):
     """
     Test that litellm.embedding() calls are properly instrumented.
@@ -473,8 +645,8 @@ def test_embeddings_create(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     client = OpenAI(api_key="test-key")
 
@@ -484,53 +656,103 @@ def test_embeddings_create(
         request_headers={"X-Stainless-Raw-Response": "true"},
     )
 
-    with mock.patch.object(
-        client.embeddings._client._client,
-        "send",
-        return_value=model_response,
-    ):
-        with start_transaction(name="litellm test"):
-            response = litellm.embedding(
-                model="text-embedding-ada-002",
-                input="Hello, world!",
-                client=client,
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with mock.patch.object(
+            client.embeddings._client._client,
+            "send",
+            return_value=model_response,
+        ):
+            with start_transaction(name="litellm test"):
+                response = litellm.embedding(
+                    model="text-embedding-ada-002",
+                    input="Hello, world!",
+                    client=client,
+                )
+                # Allow time for callbacks to complete (they may run in separate threads)
+                time.sleep(0.1)
+
+            # Response is processed by litellm, so just check it exists
+            assert response is not None
+
+            spans = [item.payload for item in items if item.type == "span"]
+            spans = list(
+                x
+                for x in spans
+                if x["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS
+                and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
             )
-            # Allow time for callbacks to complete (they may run in separate threads)
-            time.sleep(0.1)
 
-        # Response is processed by litellm, so just check it exists
-        assert response is not None
+            assert len(spans) == 1
+            span = spans[0]
 
-        spans = [item.payload for item in items if item.type == "span"]
-        spans = list(
-            x
-            for x in spans
-            if x["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS
-            and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
-        )
-        assert len(spans) == 1
-        span = spans[0]
-
-        assert span["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS
-        assert span["name"] == "embeddings text-embedding-ada-002"
-        assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings"
-        assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5
-        assert (
-            span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL]
-            == "text-embedding-ada-002"
-        )
-        # Check that embeddings input is captured (it's JSON serialized)
-        embeddings_input = span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
-        assert json.loads(embeddings_input) == ["Hello, world!"]
+            assert span["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS
+            assert span["name"] == "embeddings text-embedding-ada-002"
+            assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings"
+            assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5
+            assert (
+                span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL]
+                == "text-embedding-ada-002"
+            )
+            # Check that embeddings input is captured (it's JSON serialized)
+            embeddings_input = span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
+
+            assert json.loads(embeddings_input) == ["Hello, world!"]
+    else:
+        events = capture_events()
+
+        with mock.patch.object(
+            client.embeddings._client._client,
+            "send",
+            return_value=model_response,
+        ):
+            with start_transaction(name="litellm test"):
+                response = litellm.embedding(
+                    model="text-embedding-ada-002",
+                    input="Hello, world!",
+                    client=client,
+                )
+                # Allow time for callbacks to complete (they may run in separate threads)
+                time.sleep(0.1)
+
+            # Response is processed by litellm, so just check it exists
+            assert response is not None
+
+            assert len(events) == 1
+            (event,) = events
+
+            assert event["type"] == "transaction"
+            spans = list(
+                x
+                for x in event["spans"]
+                if x["op"] == OP.GEN_AI_EMBEDDINGS and x["origin"] == "auto.ai.litellm"
+            )
+            assert len(spans) == 1
+            span = spans[0]
+
+            assert span["op"] == OP.GEN_AI_EMBEDDINGS
+            assert span["description"] == "embeddings text-embedding-ada-002"
+            assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings"
+            assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5
+            assert (
+                span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-ada-002"
+            )
+            # Check that embeddings input is captured (it's JSON serialized)
+            embeddings_input = span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
+            assert json.loads(embeddings_input) == ["Hello, world!"]
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio(loop_scope="session")
 async def test_async_embeddings_create(
     sentry_init,
+    capture_events,
     capture_items,
     get_model_response,
     openai_embedding_model_response,
     clear_litellm_cache,
+    stream_gen_ai_spans,
 ):
     """
     Test that litellm.embedding() calls are properly instrumented.
@@ -542,8 +764,8 @@ async def test_async_embeddings_create(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     client = AsyncOpenAI(api_key="test-key")
 
@@ -553,61 +775,112 @@ async def test_async_embeddings_create(
         request_headers={"X-Stainless-Raw-Response": "true"},
     )
 
-    with mock.patch.object(
-        client.embeddings._client._client,
-        "send",
-        return_value=model_response,
-    ):
-        with start_transaction(name="litellm test"):
-            response = await litellm.aembedding(
-                model="text-embedding-ada-002",
-                input="Hello, world!",
-                client=client,
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with mock.patch.object(
+            client.embeddings._client._client,
+            "send",
+            return_value=model_response,
+        ):
+            with start_transaction(name="litellm test"):
+                response = await litellm.aembedding(
+                    model="text-embedding-ada-002",
+                    input="Hello, world!",
+                    client=client,
+                )
+
+                await GLOBAL_LOGGING_WORKER.flush()
+                await asyncio.sleep(0.5)
+
+            # Response is processed by litellm, so just check it exists
+            assert response is not None
+
+            spans = [item.payload for item in items if item.type == "span"]
+            spans = list(
+                x
+                for x in spans
+                if x["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS
+                and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
             )
 
-            await GLOBAL_LOGGING_WORKER.flush()
-            await asyncio.sleep(0.5)
+            assert len(spans) == 1
+            span = spans[0]
 
-        # Response is processed by litellm, so just check it exists
-        assert response is not None
+            assert span["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS
+            assert span["name"] == "embeddings text-embedding-ada-002"
+            assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings"
+            assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5
+            assert (
+                span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL]
+                == "text-embedding-ada-002"
+            )
+            # Check that embeddings input is captured (it's JSON serialized)
+            embeddings_input = span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
 
-        spans = [item.payload for item in items if item.type == "span"]
-        spans = list(
-            x
-            for x in spans
-            if x["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS
-            and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
-        )
-        assert len(spans) == 1
-        span = spans[0]
-
-        assert span["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS
-        assert span["name"] == "embeddings text-embedding-ada-002"
-        assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings"
-        assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5
-        assert (
-            span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL]
-            == "text-embedding-ada-002"
-        )
-        # Check that embeddings input is captured (it's JSON serialized)
-        embeddings_input = span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
-        assert json.loads(embeddings_input) == ["Hello, world!"]
+            assert json.loads(embeddings_input) == ["Hello, world!"]
+    else:
+        events = capture_events()
+
+        with mock.patch.object(
+            client.embeddings._client._client,
+            "send",
+            return_value=model_response,
+        ):
+            with start_transaction(name="litellm test"):
+                response = await litellm.aembedding(
+                    model="text-embedding-ada-002",
+                    input="Hello, world!",
+                    client=client,
+                )
+
+                await GLOBAL_LOGGING_WORKER.flush()
+                await asyncio.sleep(0.5)
+
+            # Response is processed by litellm, so just check it exists
+            assert response is not None
+
+            assert len(events) == 1
+            (event,) = events
+
+            assert event["type"] == "transaction"
+            spans = list(
+                x
+                for x in event["spans"]
+                if x["op"] == OP.GEN_AI_EMBEDDINGS and x["origin"] == "auto.ai.litellm"
+            )
+            assert len(spans) == 1
+            span = spans[0]
+
+            assert span["op"] == OP.GEN_AI_EMBEDDINGS
+            assert span["description"] == "embeddings text-embedding-ada-002"
+            assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings"
+            assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5
+            assert (
+                span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-ada-002"
+            )
+            # Check that embeddings input is captured (it's JSON serialized)
+            embeddings_input = span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
+            assert json.loads(embeddings_input) == ["Hello, world!"]
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 def test_embeddings_create_with_list_input(
     sentry_init,
+    capture_events,
     capture_items,
     get_model_response,
     openai_embedding_model_response,
     clear_litellm_cache,
+    stream_gen_ai_spans,
 ):
     """Test embedding with list input."""
     sentry_init(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     client = OpenAI(api_key="test-key")
 
@@ -617,59 +890,108 @@ def test_embeddings_create_with_list_input(
         request_headers={"X-Stainless-Raw-Response": "true"},
     )
 
-    with mock.patch.object(
-        client.embeddings._client._client,
-        "send",
-        return_value=model_response,
-    ):
-        with start_transaction(name="litellm test"):
-            response = litellm.embedding(
-                model="text-embedding-ada-002",
-                input=["First text", "Second text", "Third text"],
-                client=client,
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with mock.patch.object(
+            client.embeddings._client._client,
+            "send",
+            return_value=model_response,
+        ):
+            with start_transaction(name="litellm test"):
+                response = litellm.embedding(
+                    model="text-embedding-ada-002",
+                    input=["First text", "Second text", "Third text"],
+                    client=client,
+                )
+                # Allow time for callbacks to complete (they may run in separate threads)
+                time.sleep(0.1)
+
+            # Response is processed by litellm, so just check it exists
+            assert response is not None
+
+            spans = [item.payload for item in items if item.type == "span"]
+            spans = list(
+                x
+                for x in spans
+                if x["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS
+                and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
             )
-            # Allow time for callbacks to complete (they may run in separate threads)
-            time.sleep(0.1)
 
-        # Response is processed by litellm, so just check it exists
-        assert response is not None
+            assert len(spans) == 1
+            span = spans[0]
 
-        spans = [item.payload for item in items if item.type == "span"]
-        spans = list(
-            x
-            for x in spans
-            if x["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS
-            and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
-        )
-        assert len(spans) == 1
-        span = spans[0]
-
-        assert span["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS
-        assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings"
-        # Check that list of embeddings input is captured (it's JSON serialized)
-        embeddings_input = span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
-        assert json.loads(embeddings_input) == [
-            "First text",
-            "Second text",
-            "Third text",
-        ]
+            assert span["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS
+            assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings"
+            # Check that list of embeddings input is captured (it's JSON serialized)
+            embeddings_input = span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
+
+            assert json.loads(embeddings_input) == [
+                "First text",
+                "Second text",
+                "Third text",
+            ]
+    else:
+        events = capture_events()
+
+        with mock.patch.object(
+            client.embeddings._client._client,
+            "send",
+            return_value=model_response,
+        ):
+            with start_transaction(name="litellm test"):
+                response = litellm.embedding(
+                    model="text-embedding-ada-002",
+                    input=["First text", "Second text", "Third text"],
+                    client=client,
+                )
+                # Allow time for callbacks to complete (they may run in separate threads)
+                time.sleep(0.1)
+
+            # Response is processed by litellm, so just check it exists
+            assert response is not None
+
+            assert len(events) == 1
+            (event,) = events
+
+            assert event["type"] == "transaction"
+            spans = list(
+                x
+                for x in event["spans"]
+                if x["op"] == OP.GEN_AI_EMBEDDINGS and x["origin"] == "auto.ai.litellm"
+            )
+            assert len(spans) == 1
+            span = spans[0]
+
+            assert span["op"] == OP.GEN_AI_EMBEDDINGS
+            assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings"
+            # Check that list of embeddings input is captured (it's JSON serialized)
+            embeddings_input = span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
+            assert json.loads(embeddings_input) == [
+                "First text",
+                "Second text",
+                "Third text",
+            ]
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio(loop_scope="session")
 async def test_async_embeddings_create_with_list_input(
     sentry_init,
+    capture_events,
     capture_items,
     get_model_response,
     openai_embedding_model_response,
     clear_litellm_cache,
+    stream_gen_ai_spans,
 ):
     """Test embedding with list input."""
     sentry_init(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     client = AsyncOpenAI(api_key="test-key")
 
@@ -679,59 +1001,109 @@ async def test_async_embeddings_create_with_list_input(
         request_headers={"X-Stainless-Raw-Response": "true"},
     )
 
-    with mock.patch.object(
-        client.embeddings._client._client,
-        "send",
-        return_value=model_response,
-    ):
-        with start_transaction(name="litellm test"):
-            response = await litellm.aembedding(
-                model="text-embedding-ada-002",
-                input=["First text", "Second text", "Third text"],
-                client=client,
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with mock.patch.object(
+            client.embeddings._client._client,
+            "send",
+            return_value=model_response,
+        ):
+            with start_transaction(name="litellm test"):
+                response = await litellm.aembedding(
+                    model="text-embedding-ada-002",
+                    input=["First text", "Second text", "Third text"],
+                    client=client,
+                )
+
+                await GLOBAL_LOGGING_WORKER.flush()
+                await asyncio.sleep(0.5)
+
+            # Response is processed by litellm, so just check it exists
+            assert response is not None
+
+            spans = [item.payload for item in items if item.type == "span"]
+            spans = list(
+                x
+                for x in spans
+                if x["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS
+                and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
             )
 
-            await GLOBAL_LOGGING_WORKER.flush()
-            await asyncio.sleep(0.5)
+            assert len(spans) == 1
+            span = spans[0]
 
-        # Response is processed by litellm, so just check it exists
-        assert response is not None
+            assert span["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS
+            assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings"
+            # Check that list of embeddings input is captured (it's JSON serialized)
+            embeddings_input = span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
 
-        spans = [item.payload for item in items if item.type == "span"]
-        spans = list(
-            x
-            for x in spans
-            if x["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS
-            and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
-        )
-        assert len(spans) == 1
-        span = spans[0]
-
-        assert span["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS
-        assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings"
-        # Check that list of embeddings input is captured (it's JSON serialized)
-        embeddings_input = span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
-        assert json.loads(embeddings_input) == [
-            "First text",
-            "Second text",
-            "Third text",
-        ]
+            assert json.loads(embeddings_input) == [
+                "First text",
+                "Second text",
+                "Third text",
+            ]
+    else:
+        events = capture_events()
+
+        with mock.patch.object(
+            client.embeddings._client._client,
+            "send",
+            return_value=model_response,
+        ):
+            with start_transaction(name="litellm test"):
+                response = await litellm.aembedding(
+                    model="text-embedding-ada-002",
+                    input=["First text", "Second text", "Third text"],
+                    client=client,
+                )
+
+                await GLOBAL_LOGGING_WORKER.flush()
+                await asyncio.sleep(0.5)
 
+            # Response is processed by litellm, so just check it exists
+            assert response is not None
 
+            assert len(events) == 1
+            (event,) = events
+
+            assert event["type"] == "transaction"
+            spans = list(
+                x
+                for x in event["spans"]
+                if x["op"] == OP.GEN_AI_EMBEDDINGS and x["origin"] == "auto.ai.litellm"
+            )
+            assert len(spans) == 1
+            span = spans[0]
+
+            assert span["op"] == OP.GEN_AI_EMBEDDINGS
+            assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings"
+            # Check that list of embeddings input is captured (it's JSON serialized)
+            embeddings_input = span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
+            assert json.loads(embeddings_input) == [
+                "First text",
+                "Second text",
+                "Third text",
+            ]
+
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 def test_embeddings_no_pii(
     sentry_init,
+    capture_events,
     capture_items,
     get_model_response,
     openai_embedding_model_response,
     clear_litellm_cache,
+    stream_gen_ai_spans,
 ):
     """Test that PII is not captured when disabled."""
     sentry_init(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=False,  # PII disabled
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     client = OpenAI(api_key="test-key")
 
@@ -741,53 +1113,95 @@ def test_embeddings_no_pii(
         request_headers={"X-Stainless-Raw-Response": "true"},
     )
 
-    with mock.patch.object(
-        client.embeddings._client._client,
-        "send",
-        return_value=model_response,
-    ):
-        with start_transaction(name="litellm test"):
-            response = litellm.embedding(
-                model="text-embedding-ada-002",
-                input="Hello, world!",
-                client=client,
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with mock.patch.object(
+            client.embeddings._client._client,
+            "send",
+            return_value=model_response,
+        ):
+            with start_transaction(name="litellm test"):
+                response = litellm.embedding(
+                    model="text-embedding-ada-002",
+                    input="Hello, world!",
+                    client=client,
+                )
+                # Allow time for callbacks to complete (they may run in separate threads)
+                time.sleep(0.1)
+
+            # Response is processed by litellm, so just check it exists
+            assert response is not None
+
+            spans = [item.payload for item in items if item.type == "span"]
+            spans = list(
+                x
+                for x in spans
+                if x["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS
+                and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
             )
-            # Allow time for callbacks to complete (they may run in separate threads)
-            time.sleep(0.1)
 
-        # Response is processed by litellm, so just check it exists
-        assert response is not None
+            assert len(spans) == 1
+            span = spans[0]
 
-        spans = [item.payload for item in items if item.type == "span"]
-        spans = list(
-            x
-            for x in spans
-            if x["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS
-            and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
-        )
-        assert len(spans) == 1
-        span = spans[0]
+            assert span["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS
+            # Check that embeddings input is NOT captured when PII is disabled
+            assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["attributes"]
+    else:
+        events = capture_events()
+
+        with mock.patch.object(
+            client.embeddings._client._client,
+            "send",
+            return_value=model_response,
+        ):
+            with start_transaction(name="litellm test"):
+                response = litellm.embedding(
+                    model="text-embedding-ada-002",
+                    input="Hello, world!",
+                    client=client,
+                )
+                # Allow time for callbacks to complete (they may run in separate threads)
+                time.sleep(0.1)
+
+            # Response is processed by litellm, so just check it exists
+            assert response is not None
+
+            assert len(events) == 1
+            (event,) = events
 
-        assert span["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS
-        # Check that embeddings input is NOT captured when PII is disabled
-        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["attributes"]
+            assert event["type"] == "transaction"
+            spans = list(
+                x
+                for x in event["spans"]
+                if x["op"] == OP.GEN_AI_EMBEDDINGS and x["origin"] == "auto.ai.litellm"
+            )
+            assert len(spans) == 1
+            span = spans[0]
+
+            assert span["op"] == OP.GEN_AI_EMBEDDINGS
+            # Check that embeddings input is NOT captured when PII is disabled
+            assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["data"]
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio(loop_scope="session")
 async def test_async_embeddings_no_pii(
     sentry_init,
+    capture_events,
     capture_items,
     get_model_response,
     openai_embedding_model_response,
     clear_litellm_cache,
+    stream_gen_ai_spans,
 ):
     """Test that PII is not captured when disabled."""
     sentry_init(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=False,  # PII disabled
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     client = AsyncOpenAI(api_key="test-key")
 
@@ -797,47 +1211,92 @@ async def test_async_embeddings_no_pii(
         request_headers={"X-Stainless-Raw-Response": "true"},
     )
 
-    with mock.patch.object(
-        client.embeddings._client._client,
-        "send",
-        return_value=model_response,
-    ):
-        with start_transaction(name="litellm test"):
-            response = await litellm.aembedding(
-                model="text-embedding-ada-002",
-                input="Hello, world!",
-                client=client,
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with mock.patch.object(
+            client.embeddings._client._client,
+            "send",
+            return_value=model_response,
+        ):
+            with start_transaction(name="litellm test"):
+                response = await litellm.aembedding(
+                    model="text-embedding-ada-002",
+                    input="Hello, world!",
+                    client=client,
+                )
+
+                await GLOBAL_LOGGING_WORKER.flush()
+                await asyncio.sleep(0.5)
+
+            # Response is processed by litellm, so just check it exists
+            assert response is not None
+            spans = [item.payload for item in items if item.type == "span"]
+            spans = list(
+                x
+                for x in spans
+                if x["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS
+                and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
             )
 
-            await GLOBAL_LOGGING_WORKER.flush()
-            await asyncio.sleep(0.5)
+            assert len(spans) == 1
+            span = spans[0]
 
-        # Response is processed by litellm, so just check it exists
-        assert response is not None
+            assert span["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS
+            # Check that embeddings input is NOT captured when PII is disabled
+            assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["attributes"]
+    else:
+        events = capture_events()
+
+        with mock.patch.object(
+            client.embeddings._client._client,
+            "send",
+            return_value=model_response,
+        ):
+            with start_transaction(name="litellm test"):
+                response = await litellm.aembedding(
+                    model="text-embedding-ada-002",
+                    input="Hello, world!",
+                    client=client,
+                )
 
-        spans = [item.payload for item in items if item.type == "span"]
-        spans = list(
-            x
-            for x in spans
-            if x["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS
-            and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
-        )
-        assert len(spans) == 1
-        span = spans[0]
+                await GLOBAL_LOGGING_WORKER.flush()
+                await asyncio.sleep(0.5)
+
+            # Response is processed by litellm, so just check it exists
+            assert response is not None
+
+            assert len(events) == 1
+            (event,) = events
+
+            assert event["type"] == "transaction"
+            spans = list(
+                x
+                for x in event["spans"]
+                if x["op"] == OP.GEN_AI_EMBEDDINGS and x["origin"] == "auto.ai.litellm"
+            )
+            assert len(spans) == 1
+            span = spans[0]
 
-        assert span["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS
-        # Check that embeddings input is NOT captured when PII is disabled
-        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["attributes"]
+            assert span["op"] == OP.GEN_AI_EMBEDDINGS
+            # Check that embeddings input is NOT captured when PII is disabled
+            assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["data"]
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 def test_exception_handling(
-    reset_litellm_executor, sentry_init, capture_items, get_rate_limit_model_response
+    reset_litellm_executor,
+    sentry_init,
+    capture_events,
+    capture_items,
+    get_rate_limit_model_response,
+    stream_gen_ai_spans,
 ):
     sentry_init(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("event")
 
     messages = [{"role": "user", "content": "Hello!"}]
 
@@ -845,37 +1304,65 @@ def test_exception_handling(
 
     model_response = get_rate_limit_model_response()
 
-    with mock.patch.object(
-        client.completions._client._client,
-        "send",
-        return_value=model_response,
-    ):
-        with start_transaction(name="litellm test"):
-            with pytest.raises(litellm.RateLimitError):
-                litellm.completion(
-                    model="gpt-3.5-turbo",
-                    messages=messages,
-                    client=client,
-                )
+    if stream_gen_ai_spans:
+        items = capture_items("event")
 
-    # Find the error event
-    error_events = [
-        item.payload
-        for item in items
-        if item.type == "event" and item.payload.get("level") == "error"
-    ]
+        with mock.patch.object(
+            client.completions._client._client,
+            "send",
+            return_value=model_response,
+        ), start_transaction(name="litellm test"), pytest.raises(
+            litellm.RateLimitError
+        ):
+            litellm.completion(
+                model="gpt-3.5-turbo",
+                messages=messages,
+                client=client,
+            )
+
+        # Find the error event
+        error_events = [
+            item.payload
+            for item in items
+            if item.type == "event" and item.payload.get("level") == "error"
+        ]
+    else:
+        events = capture_events()
+
+        with mock.patch.object(
+            client.completions._client._client,
+            "send",
+            return_value=model_response,
+        ), start_transaction(name="litellm test"), pytest.raises(
+            litellm.RateLimitError
+        ):
+            litellm.completion(
+                model="gpt-3.5-turbo",
+                messages=messages,
+                client=client,
+            )
+
+        # Should have error event and transaction
+        assert len(events) >= 1
+        # Find the error event
+        error_events = [e for e in events if e.get("level") == "error"]
     assert len(error_events) == 1
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio(loop_scope="session")
 async def test_async_exception_handling(
-    sentry_init, capture_items, get_rate_limit_model_response
+    sentry_init,
+    capture_events,
+    capture_items,
+    get_rate_limit_model_response,
+    stream_gen_ai_spans,
 ):
     sentry_init(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("event")
 
     messages = [{"role": "user", "content": "Hello!"}]
 
@@ -883,40 +1370,66 @@ async def test_async_exception_handling(
 
     model_response = get_rate_limit_model_response()
 
-    with mock.patch.object(
-        client.embeddings._client._client,
-        "send",
-        return_value=model_response,
-    ):
-        with start_transaction(name="litellm test"):
-            with pytest.raises(litellm.RateLimitError):
-                await litellm.acompletion(
-                    model="gpt-3.5-turbo",
-                    messages=messages,
-                    client=client,
-                )
+    if stream_gen_ai_spans:
+        items = capture_items("event")
 
-    # Find the error event
-    error_events = [
-        item.payload
-        for item in items
-        if item.type == "event" and item.payload.get("level") == "error"
-    ]
+        with mock.patch.object(
+            client.embeddings._client._client,
+            "send",
+            return_value=model_response,
+        ), start_transaction(name="litellm test"), pytest.raises(
+            litellm.RateLimitError
+        ):
+            await litellm.acompletion(
+                model="gpt-3.5-turbo",
+                messages=messages,
+                client=client,
+            )
+
+        # Find the error event
+        error_events = [
+            item.payload
+            for item in items
+            if item.type == "event" and item.payload.get("level") == "error"
+        ]
+    else:
+        events = capture_events()
+
+        with mock.patch.object(
+            client.embeddings._client._client,
+            "send",
+            return_value=model_response,
+        ), start_transaction(name="litellm test"), pytest.raises(
+            litellm.RateLimitError
+        ):
+            await litellm.acompletion(
+                model="gpt-3.5-turbo",
+                messages=messages,
+                client=client,
+            )
+
+        # Should have error event and transaction
+        assert len(events) >= 1
+        # Find the error event
+        error_events = [e for e in events if e.get("level") == "error"]
     assert len(error_events) == 1
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 def test_span_origin(
     reset_litellm_executor,
     sentry_init,
+    capture_events,
     capture_items,
     get_model_response,
     nonstreaming_chat_completions_model_response,
+    stream_gen_ai_spans,
 ):
     sentry_init(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     messages = [{"role": "user", "content": "Hello!"}]
 
@@ -938,12 +1451,35 @@ def test_span_origin(
         request_headers={"X-Stainless-Raw-Response": "true"},
     )
 
-    with mock.patch.object(
-        client.completions._client._client,
-        "send",
-        return_value=model_response,
-    ):
-        with start_transaction(name="litellm test"):
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with mock.patch.object(
+            client.completions._client._client,
+            "send",
+            return_value=model_response,
+        ), start_transaction(name="litellm test"):
+            litellm.completion(
+                model="gpt-3.5-turbo",
+                messages=messages,
+                client=client,
+            )
+
+            litellm_utils.executor.shutdown(wait=True)
+
+        (event,) = (item.payload for item in items if item.type == "transaction")
+        assert event["contexts"]["trace"]["origin"] == "manual"
+
+        spans = [item.payload for item in items if item.type == "span"]
+        assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.litellm"
+    else:
+        events = capture_events()
+
+        with mock.patch.object(
+            client.completions._client._client,
+            "send",
+            return_value=model_response,
+        ), start_transaction(name="litellm test"):
             litellm.completion(
                 model="gpt-3.5-turbo",
                 messages=messages,
@@ -952,28 +1488,30 @@ def test_span_origin(
 
             litellm_utils.executor.shutdown(wait=True)
 
-    (event,) = (item.payload for item in items if item.type == "transaction")
-    assert event["contexts"]["trace"]["origin"] == "manual"
+        (event,) = events
 
-    spans = [item.payload for item in items if item.type == "span"]
-    assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.litellm"
+        assert event["contexts"]["trace"]["origin"] == "manual"
+        assert event["spans"][0]["origin"] == "auto.ai.litellm"
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 def test_multiple_providers(
     reset_litellm_executor,
     sentry_init,
+    capture_events,
     capture_items,
     get_model_response,
     nonstreaming_chat_completions_model_response,
     nonstreaming_anthropic_model_response,
     nonstreaming_google_genai_model_response,
+    stream_gen_ai_spans,
 ):
     """Test that the integration correctly identifies different providers."""
     sentry_init(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction")
 
     messages = [{"role": "user", "content": "Hello!"}]
 
@@ -994,12 +1532,14 @@ def test_multiple_providers(
         request_headers={"X-Stainless-Raw-Response": "true"},
     )
 
-    with mock.patch.object(
-        openai_client.completions._client._client,
-        "send",
-        return_value=openai_model_response,
-    ):
-        with start_transaction(name="test gpt-3.5-turbo"):
+    if stream_gen_ai_spans:
+        items = capture_items("transaction")
+
+        with mock.patch.object(
+            openai_client.completions._client._client,
+            "send",
+            return_value=openai_model_response,
+        ), start_transaction(name="test gpt-3.5-turbo"):
             litellm.completion(
                 model="gpt-3.5-turbo",
                 messages=messages,
@@ -1008,21 +1548,20 @@ def test_multiple_providers(
 
             litellm_utils.executor.shutdown(wait=True)
 
-    _reset_litellm_executor()
+        _reset_litellm_executor()
 
-    anthropic_client = HTTPHandler()
-    anthropic_model_response = get_model_response(
-        nonstreaming_anthropic_model_response,
-        serialize_pydantic=True,
-        request_headers={"X-Stainless-Raw-Response": "true"},
-    )
+        anthropic_client = HTTPHandler()
+        anthropic_model_response = get_model_response(
+            nonstreaming_anthropic_model_response,
+            serialize_pydantic=True,
+            request_headers={"X-Stainless-Raw-Response": "true"},
+        )
 
-    with mock.patch.object(
-        anthropic_client,
-        "post",
-        return_value=anthropic_model_response,
-    ):
-        with start_transaction(name="test claude-3-opus-20240229"):
+        with mock.patch.object(
+            anthropic_client,
+            "post",
+            return_value=anthropic_model_response,
+        ), start_transaction(name="test claude-3-opus-20240229"):
             litellm.completion(
                 model="claude-3-opus-20240229",
                 messages=messages,
@@ -1032,20 +1571,87 @@ def test_multiple_providers(
 
             litellm_utils.executor.shutdown(wait=True)
 
-    _reset_litellm_executor()
+        _reset_litellm_executor()
 
-    gemini_client = HTTPHandler()
-    gemini_model_response = get_model_response(
-        nonstreaming_google_genai_model_response,
-        serialize_pydantic=True,
-    )
+        gemini_client = HTTPHandler()
+        gemini_model_response = get_model_response(
+            nonstreaming_google_genai_model_response,
+            serialize_pydantic=True,
+        )
 
-    with mock.patch.object(
-        gemini_client,
-        "post",
-        return_value=gemini_model_response,
-    ):
-        with start_transaction(name="test gemini/gemini-pro"):
+        with mock.patch.object(
+            gemini_client,
+            "post",
+            return_value=gemini_model_response,
+        ), start_transaction(name="test gemini/gemini-pro"):
+            litellm.completion(
+                model="gemini/gemini-pro",
+                messages=messages,
+                client=gemini_client,
+                api_key="test-key",
+            )
+
+            litellm_utils.executor.shutdown(wait=True)
+
+        events = [item.payload for item in items if item.type == "transaction"]
+        assert len(events) == 3
+
+        spans = [item.payload for item in items if item.type == "span"]
+        for span in spans:
+            # The provider should be detected by litellm.get_llm_provider
+            assert SPANDATA.GEN_AI_SYSTEM in span["attributes"]
+    else:
+        events = capture_events()
+
+        with mock.patch.object(
+            openai_client.completions._client._client,
+            "send",
+            return_value=openai_model_response,
+        ), start_transaction(name="test gpt-3.5-turbo"):
+            litellm.completion(
+                model="gpt-3.5-turbo",
+                messages=messages,
+                client=openai_client,
+            )
+
+            litellm_utils.executor.shutdown(wait=True)
+
+        _reset_litellm_executor()
+
+        anthropic_client = HTTPHandler()
+        anthropic_model_response = get_model_response(
+            nonstreaming_anthropic_model_response,
+            serialize_pydantic=True,
+            request_headers={"X-Stainless-Raw-Response": "true"},
+        )
+
+        with mock.patch.object(
+            anthropic_client,
+            "post",
+            return_value=anthropic_model_response,
+        ), start_transaction(name="test claude-3-opus-20240229"):
+            litellm.completion(
+                model="claude-3-opus-20240229",
+                messages=messages,
+                client=anthropic_client,
+                api_key="test-key",
+            )
+
+            litellm_utils.executor.shutdown(wait=True)
+
+        _reset_litellm_executor()
+
+        gemini_client = HTTPHandler()
+        gemini_model_response = get_model_response(
+            nonstreaming_google_genai_model_response,
+            serialize_pydantic=True,
+        )
+
+        with mock.patch.object(
+            gemini_client,
+            "post",
+            return_value=gemini_model_response,
+        ), start_transaction(name="test gemini/gemini-pro"):
             litellm.completion(
                 model="gemini/gemini-pro",
                 messages=messages,
@@ -1055,30 +1661,32 @@ def test_multiple_providers(
 
             litellm_utils.executor.shutdown(wait=True)
 
-    events = [item.payload for item in items if item.type == "transaction"]
-    assert len(events) == 3
+        assert len(events) == 3
 
-    spans = [item.payload for item in items if item.type == "span"]
-    for span in spans:
-        # The provider should be detected by litellm.get_llm_provider
-        assert SPANDATA.GEN_AI_SYSTEM in span["attributes"]
+        for i in range(3):
+            span = events[i]["spans"][0]
+            # The provider should be detected by litellm.get_llm_provider
+            assert SPANDATA.GEN_AI_SYSTEM in span["data"]
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio(loop_scope="session")
 async def test_async_multiple_providers(
     sentry_init,
+    capture_events,
     capture_items,
     get_model_response,
     nonstreaming_chat_completions_model_response,
     nonstreaming_anthropic_model_response,
     nonstreaming_google_genai_model_response,
+    stream_gen_ai_spans,
 ):
     """Test that the integration correctly identifies different providers."""
     sentry_init(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     messages = [{"role": "user", "content": "Hello!"}]
 
@@ -1099,12 +1707,14 @@ async def test_async_multiple_providers(
         request_headers={"X-Stainless-Raw-Response": "true"},
     )
 
-    with mock.patch.object(
-        openai_client.completions._client._client,
-        "send",
-        return_value=openai_model_response,
-    ):
-        with start_transaction(name="test gpt-3.5-turbo"):
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with mock.patch.object(
+            openai_client.completions._client._client,
+            "send",
+            return_value=openai_model_response,
+        ), start_transaction(name="test gpt-3.5-turbo"):
             await litellm.acompletion(
                 model="gpt-3.5-turbo",
                 messages=messages,
@@ -1114,21 +1724,20 @@ async def test_async_multiple_providers(
             await GLOBAL_LOGGING_WORKER.flush()
             await asyncio.sleep(0.5)
 
-    _reset_litellm_executor()
+        _reset_litellm_executor()
 
-    anthropic_client = AsyncHTTPHandler()
-    anthropic_model_response = get_model_response(
-        nonstreaming_anthropic_model_response,
-        serialize_pydantic=True,
-        request_headers={"X-Stainless-Raw-Response": "True"},
-    )
+        anthropic_client = AsyncHTTPHandler()
+        anthropic_model_response = get_model_response(
+            nonstreaming_anthropic_model_response,
+            serialize_pydantic=True,
+            request_headers={"X-Stainless-Raw-Response": "True"},
+        )
 
-    with mock.patch.object(
-        anthropic_client,
-        "post",
-        return_value=anthropic_model_response,
-    ):
-        with start_transaction(name="test claude-3-opus-20240229"):
+        with mock.patch.object(
+            anthropic_client,
+            "post",
+            return_value=anthropic_model_response,
+        ), start_transaction(name="test claude-3-opus-20240229"):
             await litellm.acompletion(
                 model="claude-3-opus-20240229",
                 messages=messages,
@@ -1139,20 +1748,90 @@ async def test_async_multiple_providers(
             await GLOBAL_LOGGING_WORKER.flush()
             await asyncio.sleep(0.5)
 
-    _reset_litellm_executor()
+        _reset_litellm_executor()
 
-    gemini_client = AsyncHTTPHandler()
-    gemini_model_response = get_model_response(
-        nonstreaming_google_genai_model_response,
-        serialize_pydantic=True,
-    )
+        gemini_client = AsyncHTTPHandler()
+        gemini_model_response = get_model_response(
+            nonstreaming_google_genai_model_response,
+            serialize_pydantic=True,
+        )
 
-    with mock.patch.object(
-        gemini_client,
-        "post",
-        return_value=gemini_model_response,
-    ):
-        with start_transaction(name="test gemini/gemini-pro"):
+        with mock.patch.object(
+            gemini_client,
+            "post",
+            return_value=gemini_model_response,
+        ), start_transaction(name="test gemini/gemini-pro"):
+            await litellm.acompletion(
+                model="gemini/gemini-pro",
+                messages=messages,
+                client=gemini_client,
+                api_key="test-key",
+            )
+
+            await GLOBAL_LOGGING_WORKER.flush()
+            await asyncio.sleep(0.5)
+
+        events = [item.payload for item in items if item.type == "transaction"]
+        assert len(events) == 3
+
+        spans = [item.payload for item in items if item.type == "span"]
+        for span in spans:
+            # The provider should be detected by litellm.get_llm_provider
+            assert SPANDATA.GEN_AI_SYSTEM in span["attributes"]
+    else:
+        events = capture_events()
+
+        with mock.patch.object(
+            openai_client.completions._client._client,
+            "send",
+            return_value=openai_model_response,
+        ), start_transaction(name="test gpt-3.5-turbo"):
+            await litellm.acompletion(
+                model="gpt-3.5-turbo",
+                messages=messages,
+                client=openai_client,
+            )
+
+            await GLOBAL_LOGGING_WORKER.flush()
+            await asyncio.sleep(0.5)
+
+        _reset_litellm_executor()
+
+        anthropic_client = AsyncHTTPHandler()
+        anthropic_model_response = get_model_response(
+            nonstreaming_anthropic_model_response,
+            serialize_pydantic=True,
+            request_headers={"X-Stainless-Raw-Response": "True"},
+        )
+
+        with mock.patch.object(
+            anthropic_client,
+            "post",
+            return_value=anthropic_model_response,
+        ), start_transaction(name="test claude-3-opus-20240229"):
+            await litellm.acompletion(
+                model="claude-3-opus-20240229",
+                messages=messages,
+                client=anthropic_client,
+                api_key="test-key",
+            )
+
+            await GLOBAL_LOGGING_WORKER.flush()
+            await asyncio.sleep(0.5)
+
+        _reset_litellm_executor()
+
+        gemini_client = AsyncHTTPHandler()
+        gemini_model_response = get_model_response(
+            nonstreaming_google_genai_model_response,
+            serialize_pydantic=True,
+        )
+
+        with mock.patch.object(
+            gemini_client,
+            "post",
+            return_value=gemini_model_response,
+        ), start_transaction(name="test gemini/gemini-pro"):
             await litellm.acompletion(
                 model="gemini/gemini-pro",
                 messages=messages,
@@ -1163,28 +1842,30 @@ async def test_async_multiple_providers(
             await GLOBAL_LOGGING_WORKER.flush()
             await asyncio.sleep(0.5)
 
-    events = [item.payload for item in items if item.type == "transaction"]
-    assert len(events) == 3
+        assert len(events) == 3
 
-    spans = [item.payload for item in items if item.type == "span"]
-    for span in spans:
-        # The provider should be detected by litellm.get_llm_provider
-        assert SPANDATA.GEN_AI_SYSTEM in span["attributes"]
+        for i in range(3):
+            span = events[i]["spans"][0]
+            # The provider should be detected by litellm.get_llm_provider
+            assert SPANDATA.GEN_AI_SYSTEM in span["data"]
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 def test_additional_parameters(
     reset_litellm_executor,
     sentry_init,
+    capture_events,
     capture_items,
     get_model_response,
     nonstreaming_chat_completions_model_response,
+    stream_gen_ai_spans,
 ):
     """Test that additional parameters are captured."""
     sentry_init(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     messages = [{"role": "user", "content": "Hello!"}]
     client = OpenAI(api_key="test-key")
@@ -1205,12 +1886,14 @@ def test_additional_parameters(
         request_headers={"X-Stainless-Raw-Response": "true"},
     )
 
-    with mock.patch.object(
-        client.completions._client._client,
-        "send",
-        return_value=model_response,
-    ):
-        with start_transaction(name="litellm test"):
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with mock.patch.object(
+            client.completions._client._client,
+            "send",
+            return_value=model_response,
+        ), start_transaction(name="litellm test"):
             litellm.completion(
                 model="gpt-3.5-turbo",
                 messages=messages,
@@ -1224,36 +1907,74 @@ def test_additional_parameters(
 
             litellm_utils.executor.shutdown(wait=True)
 
-    spans = [item.payload for item in items if item.type == "span"]
-    chat_spans = list(
-        x
-        for x in spans
-        if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-        and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
-    )
-    assert len(chat_spans) == 1
-    span = chat_spans[0]
+        spans = [item.payload for item in items if item.type == "span"]
+        chat_spans = list(
+            x
+            for x in spans
+            if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+            and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
+        )
+        assert len(chat_spans) == 1
+        span = chat_spans[0]
+
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.5
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.5
+    else:
+        events = capture_events()
+
+        with mock.patch.object(
+            client.completions._client._client,
+            "send",
+            return_value=model_response,
+        ), start_transaction(name="litellm test"):
+            litellm.completion(
+                model="gpt-3.5-turbo",
+                messages=messages,
+                client=client,
+                temperature=0.7,
+                max_tokens=100,
+                top_p=0.9,
+                frequency_penalty=0.5,
+                presence_penalty=0.5,
+            )
 
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.5
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.5
+            litellm_utils.executor.shutdown(wait=True)
 
+        (event,) = events
+        chat_spans = list(
+            x
+            for x in event["spans"]
+            if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm"
+        )
+        assert len(chat_spans) == 1
+        span = chat_spans[0]
 
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.5
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.5
+
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio(loop_scope="session")
 async def test_async_additional_parameters(
     sentry_init,
+    capture_events,
     capture_items,
     get_model_response,
     nonstreaming_chat_completions_model_response,
+    stream_gen_ai_spans,
 ):
     """Test that additional parameters are captured."""
     sentry_init(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     messages = [{"role": "user", "content": "Hello!"}]
     client = AsyncOpenAI(api_key="test-key")
@@ -1274,12 +1995,14 @@ async def test_async_additional_parameters(
         request_headers={"X-Stainless-Raw-Response": "true"},
     )
 
-    with mock.patch.object(
-        client.completions._client._client,
-        "send",
-        return_value=model_response,
-    ):
-        with start_transaction(name="litellm test"):
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with mock.patch.object(
+            client.completions._client._client,
+            "send",
+            return_value=model_response,
+        ), start_transaction(name="litellm test"):
             await litellm.acompletion(
                 model="gpt-3.5-turbo",
                 messages=messages,
@@ -1294,35 +2017,74 @@ async def test_async_additional_parameters(
             await GLOBAL_LOGGING_WORKER.flush()
             await asyncio.sleep(0.5)
 
-    spans = [item.payload for item in items if item.type == "span"]
-    chat_spans = list(
-        x
-        for x in spans
-        if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-        and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
-    )
-    assert len(chat_spans) == 1
-    span = chat_spans[0]
+        spans = [item.payload for item in items if item.type == "span"]
+        chat_spans = list(
+            x
+            for x in spans
+            if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+            and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
+        )
+        assert len(chat_spans) == 1
+        span = chat_spans[0]
+
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.5
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.5
+    else:
+        events = capture_events()
 
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.5
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.5
+        with mock.patch.object(
+            client.completions._client._client,
+            "send",
+            return_value=model_response,
+        ), start_transaction(name="litellm test"):
+            await litellm.acompletion(
+                model="gpt-3.5-turbo",
+                messages=messages,
+                client=client,
+                temperature=0.7,
+                max_tokens=100,
+                top_p=0.9,
+                frequency_penalty=0.5,
+                presence_penalty=0.5,
+            )
 
+            await GLOBAL_LOGGING_WORKER.flush()
+            await asyncio.sleep(0.5)
+
+        (event,) = events
+        chat_spans = list(
+            x
+            for x in event["spans"]
+            if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm"
+        )
+        assert len(chat_spans) == 1
+        span = chat_spans[0]
 
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.5
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.5
+
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 def test_no_integration(
     reset_litellm_executor,
     sentry_init,
+    capture_events,
     capture_items,
     get_model_response,
     nonstreaming_chat_completions_model_response,
+    stream_gen_ai_spans,
 ):
     """Test that when integration is not enabled, callbacks don't break."""
     sentry_init(
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     messages = [{"role": "user", "content": "Hello!"}]
     client = OpenAI(api_key="test-key")
@@ -1343,12 +2105,37 @@ def test_no_integration(
         request_headers={"X-Stainless-Raw-Response": "true"},
     )
 
-    with mock.patch.object(
-        client.completions._client._client,
-        "send",
-        return_value=model_response,
-    ):
-        with start_transaction(name="litellm test"):
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with mock.patch.object(
+            client.completions._client._client,
+            "send",
+            return_value=model_response,
+        ), start_transaction(name="litellm test"):
+            litellm.completion(
+                model="gpt-3.5-turbo",
+                messages=messages,
+                client=client,
+            )
+
+            litellm_utils.executor.shutdown(wait=True)
+
+        spans = [item.payload for item in items if item.type == "span"]
+        chat_spans = list(
+            x
+            for x in spans
+            if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+            and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
+        )
+    else:
+        events = capture_events()
+
+        with mock.patch.object(
+            client.completions._client._client,
+            "send",
+            return_value=model_response,
+        ), start_transaction(name="litellm test"):
             litellm.completion(
                 model="gpt-3.5-turbo",
                 messages=messages,
@@ -1357,28 +2144,32 @@ def test_no_integration(
 
             litellm_utils.executor.shutdown(wait=True)
 
-    spans = [item.payload for item in items if item.type == "span"]
-    chat_spans = list(
-        x
-        for x in spans
-        if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-        and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
-    )
+        (event,) = events
+        # Should still have the transaction, but no child spans since integration is off
+        assert event["type"] == "transaction"
+        chat_spans = list(
+            x
+            for x in event["spans"]
+            if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm"
+        )
     assert len(chat_spans) == 0
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio(loop_scope="session")
 async def test_async_no_integration(
     sentry_init,
+    capture_events,
     capture_items,
     get_model_response,
     nonstreaming_chat_completions_model_response,
+    stream_gen_ai_spans,
 ):
     """Test that when integration is not enabled, callbacks don't break."""
     sentry_init(
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     messages = [{"role": "user", "content": "Hello!"}]
     client = AsyncOpenAI(api_key="test-key")
@@ -1399,12 +2190,14 @@ async def test_async_no_integration(
         request_headers={"X-Stainless-Raw-Response": "true"},
     )
 
-    with mock.patch.object(
-        client.completions._client._client,
-        "send",
-        return_value=model_response,
-    ):
-        with start_transaction(name="litellm test"):
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with mock.patch.object(
+            client.completions._client._client,
+            "send",
+            return_value=model_response,
+        ), start_transaction(name="litellm test"):
             await litellm.acompletion(
                 model="gpt-3.5-turbo",
                 messages=messages,
@@ -1414,23 +2207,54 @@ async def test_async_no_integration(
             await GLOBAL_LOGGING_WORKER.flush()
             await asyncio.sleep(0.5)
 
-    spans = [item.payload for item in items if item.type == "span"]
-    chat_spans = list(
-        x
-        for x in spans
-        if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-        and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
-    )
+        spans = [item.payload for item in items if item.type == "span"]
+        chat_spans = list(
+            x
+            for x in spans
+            if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+            and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
+        )
+    else:
+        events = capture_events()
+
+        with mock.patch.object(
+            client.completions._client._client,
+            "send",
+            return_value=model_response,
+        ), start_transaction(name="litellm test"):
+            await litellm.acompletion(
+                model="gpt-3.5-turbo",
+                messages=messages,
+                client=client,
+            )
+
+            await GLOBAL_LOGGING_WORKER.flush()
+            await asyncio.sleep(0.5)
+
+        (event,) = events
+        # Should still have the transaction, but no child spans since integration is off
+        assert event["type"] == "transaction"
+        chat_spans = list(
+            x
+            for x in event["spans"]
+            if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm"
+        )
     assert len(chat_spans) == 0
 
 
-def test_response_without_usage(sentry_init, capture_items):
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_response_without_usage(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     """Test handling of responses without usage information."""
     sentry_init(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     messages = [{"role": "user", "content": "Hello!"}]
 
@@ -1444,25 +2268,51 @@ def test_response_without_usage(sentry_init, capture_items):
         },
     )()
 
-    with start_transaction(name="litellm test"):
-        kwargs = {
-            "model": "gpt-3.5-turbo",
-            "messages": messages,
-        }
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
 
-        _input_callback(kwargs)
-        _success_callback(
-            kwargs,
-            mock_response,
-            datetime.now(),
-            datetime.now(),
-        )
+        with start_transaction(name="litellm test"):
+            kwargs = {
+                "model": "gpt-3.5-turbo",
+                "messages": messages,
+            }
+
+            _input_callback(kwargs)
+            _success_callback(
+                kwargs,
+                mock_response,
+                datetime.now(),
+                datetime.now(),
+            )
+
+        (span,) = (item.payload for item in items if item.type == "span")
+
+        # Span should still be created even without usage info
+        assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        assert span["name"] == "chat gpt-3.5-turbo"
+    else:
+        events = capture_events()
 
-    (span,) = (item.payload for item in items if item.type == "span")
+        with start_transaction(name="litellm test"):
+            kwargs = {
+                "model": "gpt-3.5-turbo",
+                "messages": messages,
+            }
+
+            _input_callback(kwargs)
+            _success_callback(
+                kwargs,
+                mock_response,
+                datetime.now(),
+                datetime.now(),
+            )
 
-    # Span should still be created even without usage info
-    assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-    assert span["name"] == "chat gpt-3.5-turbo"
+        (event,) = events
+        (span,) = event["spans"]
+
+        # Span should still be created even without usage info
+        assert span["op"] == OP.GEN_AI_CHAT
+        assert span["description"] == "chat gpt-3.5-turbo"
 
 
 def test_integration_setup(sentry_init):
@@ -1478,14 +2328,20 @@ def test_integration_setup(sentry_init):
     assert _failure_callback in (litellm.failure_callback or [])
 
 
-def test_litellm_message_truncation(sentry_init, capture_items):
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_litellm_message_truncation(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     """Test that large messages are truncated properly in LiteLLM integration."""
     sentry_init(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     large_content = (
         "This is a very long message that will exceed our size limits. " * 1000
@@ -1499,38 +2355,76 @@ def test_litellm_message_truncation(sentry_init, capture_items):
     ]
     mock_response = MockCompletionResponse()
 
-    with start_transaction(name="litellm test"):
-        kwargs = {
-            "model": "gpt-3.5-turbo",
-            "messages": messages,
-        }
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
 
-        _input_callback(kwargs)
-        _success_callback(
-            kwargs,
-            mock_response,
-            datetime.now(),
-            datetime.now(),
-        )
+        with start_transaction(name="litellm test"):
+            kwargs = {
+                "model": "gpt-3.5-turbo",
+                "messages": messages,
+            }
+
+            _input_callback(kwargs)
+            _success_callback(
+                kwargs,
+                mock_response,
+                datetime.now(),
+                datetime.now(),
+            )
 
-    spans = [item.payload for item in items if item.type == "span"]
-    chat_spans = [
-        span for span in spans if span["attributes"].get("sentry.op") == OP.GEN_AI_CHAT
-    ]
-    assert len(chat_spans) > 0
+        spans = [item.payload for item in items if item.type == "span"]
+        chat_spans = [
+            span
+            for span in spans
+            if span["attributes"].get("sentry.op") == OP.GEN_AI_CHAT
+        ]
+
+        assert len(chat_spans) > 0
+
+        chat_span = chat_spans[0]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["attributes"]
+
+        messages_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+    else:
+        events = capture_events()
+
+        with start_transaction(name="litellm test"):
+            kwargs = {
+                "model": "gpt-3.5-turbo",
+                "messages": messages,
+            }
+
+            _input_callback(kwargs)
+            _success_callback(
+                kwargs,
+                mock_response,
+                datetime.now(),
+                datetime.now(),
+            )
+
+        assert len(events) > 0
+        tx = events[0]
+        assert tx["type"] == "transaction"
+
+        chat_spans = [
+            span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT
+        ]
+        assert len(chat_spans) > 0
 
-    chat_span = chat_spans[0]
-    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["attributes"]
+        chat_span = chat_spans[0]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"]
 
-    messages_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
     assert isinstance(messages_data, str)
 
     parsed_messages = json.loads(messages_data)
     assert isinstance(parsed_messages, list)
     assert len(parsed_messages) == 1
     assert "small message 5" in str(parsed_messages[0])
-
-    tx = next(item.payload for item in items if item.type == "transaction")
+    if stream_gen_ai_spans:
+        tx = next(item.payload for item in items if item.type == "transaction")
+    else:
+        pass
     assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5
 
 
@@ -1539,19 +2433,22 @@ def test_litellm_message_truncation(sentry_init, capture_items):
 IMAGE_DATA_URI = f"data:image/png;base64,{IMAGE_B64}"
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 def test_binary_content_encoding_image_url(
     reset_litellm_executor,
     sentry_init,
+    capture_events,
     capture_items,
     get_model_response,
     nonstreaming_chat_completions_model_response,
+    stream_gen_ai_spans,
 ):
     sentry_init(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -1583,12 +2480,14 @@ def test_binary_content_encoding_image_url(
         request_headers={"X-Stainless-Raw-Response": "true"},
     )
 
-    with mock.patch.object(
-        client.completions._client._client,
-        "send",
-        return_value=model_response,
-    ):
-        with start_transaction(name="litellm test"):
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with mock.patch.object(
+            client.completions._client._client,
+            "send",
+            return_value=model_response,
+        ), start_transaction(name="litellm test"):
             litellm.completion(
                 model="gpt-4-vision-preview",
                 messages=messages,
@@ -1598,16 +2497,42 @@ def test_binary_content_encoding_image_url(
 
             litellm_utils.executor.shutdown(wait=True)
 
-    spans = [item.payload for item in items if item.type == "span"]
-    chat_spans = list(
-        x
-        for x in spans
-        if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-        and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
-    )
-    assert len(chat_spans) == 1
-    span = chat_spans[0]
-    messages_data = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+        spans = [item.payload for item in items if item.type == "span"]
+        chat_spans = list(
+            x
+            for x in spans
+            if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+            and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
+        )
+        assert len(chat_spans) == 1
+        span = chat_spans[0]
+        messages_data = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    else:
+        events = capture_events()
+
+        with mock.patch.object(
+            client.completions._client._client,
+            "send",
+            return_value=model_response,
+        ), start_transaction(name="litellm test"):
+            litellm.completion(
+                model="gpt-4-vision-preview",
+                messages=messages,
+                client=client,
+                custom_llm_provider="openai",
+            )
+
+            litellm_utils.executor.shutdown(wait=True)
+
+        (event,) = events
+        chat_spans = list(
+            x
+            for x in event["spans"]
+            if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm"
+        )
+        assert len(chat_spans) == 1
+        span = chat_spans[0]
+        messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
 
     blob_item = next(
         (
@@ -1628,19 +2553,22 @@ def test_binary_content_encoding_image_url(
     )
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio(loop_scope="session")
 async def test_async_binary_content_encoding_image_url(
     sentry_init,
+    capture_events,
     capture_items,
     get_model_response,
     nonstreaming_chat_completions_model_response,
+    stream_gen_ai_spans,
 ):
     sentry_init(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -1672,12 +2600,14 @@ async def test_async_binary_content_encoding_image_url(
         request_headers={"X-Stainless-Raw-Response": "true"},
     )
 
-    with mock.patch.object(
-        client.completions._client._client,
-        "send",
-        return_value=model_response,
-    ):
-        with start_transaction(name="litellm test"):
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with mock.patch.object(
+            client.completions._client._client,
+            "send",
+            return_value=model_response,
+        ), start_transaction(name="litellm test"):
             await litellm.acompletion(
                 model="gpt-4-vision-preview",
                 messages=messages,
@@ -1688,16 +2618,43 @@ async def test_async_binary_content_encoding_image_url(
             await GLOBAL_LOGGING_WORKER.flush()
             await asyncio.sleep(0.5)
 
-    spans = [item.payload for item in items if item.type == "span"]
-    chat_spans = list(
-        x
-        for x in spans
-        if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-        and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
-    )
-    assert len(chat_spans) == 1
-    span = chat_spans[0]
-    messages_data = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+        spans = [item.payload for item in items if item.type == "span"]
+        chat_spans = list(
+            x
+            for x in spans
+            if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+            and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
+        )
+        assert len(chat_spans) == 1
+        span = chat_spans[0]
+        messages_data = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    else:
+        events = capture_events()
+
+        with mock.patch.object(
+            client.completions._client._client,
+            "send",
+            return_value=model_response,
+        ), start_transaction(name="litellm test"):
+            await litellm.acompletion(
+                model="gpt-4-vision-preview",
+                messages=messages,
+                client=client,
+                custom_llm_provider="openai",
+            )
+
+            await GLOBAL_LOGGING_WORKER.flush()
+            await asyncio.sleep(0.5)
+
+        (event,) = events
+        chat_spans = list(
+            x
+            for x in event["spans"]
+            if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm"
+        )
+        assert len(chat_spans) == 1
+        span = chat_spans[0]
+        messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
 
     blob_item = next(
         (
@@ -1718,19 +2675,22 @@ async def test_async_binary_content_encoding_image_url(
     )
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 def test_binary_content_encoding_mixed_content(
     reset_litellm_executor,
     sentry_init,
+    capture_events,
     capture_items,
     get_model_response,
     nonstreaming_chat_completions_model_response,
+    stream_gen_ai_spans,
 ):
     sentry_init(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -1763,12 +2723,14 @@ def test_binary_content_encoding_mixed_content(
         request_headers={"X-Stainless-Raw-Response": "true"},
     )
 
-    with mock.patch.object(
-        client.completions._client._client,
-        "send",
-        return_value=model_response,
-    ):
-        with start_transaction(name="litellm test"):
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with mock.patch.object(
+            client.completions._client._client,
+            "send",
+            return_value=model_response,
+        ), start_transaction(name="litellm test"):
             litellm.completion(
                 model="gpt-4-vision-preview",
                 messages=messages,
@@ -1778,16 +2740,42 @@ def test_binary_content_encoding_mixed_content(
 
             litellm_utils.executor.shutdown(wait=True)
 
-    spans = [item.payload for item in items if item.type == "span"]
-    chat_spans = list(
-        x
-        for x in spans
-        if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-        and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
-    )
-    assert len(chat_spans) == 1
-    span = chat_spans[0]
-    messages_data = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+        spans = [item.payload for item in items if item.type == "span"]
+        chat_spans = list(
+            x
+            for x in spans
+            if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+            and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
+        )
+        assert len(chat_spans) == 1
+        span = chat_spans[0]
+        messages_data = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    else:
+        events = capture_events()
+
+        with mock.patch.object(
+            client.completions._client._client,
+            "send",
+            return_value=model_response,
+        ), start_transaction(name="litellm test"):
+            litellm.completion(
+                model="gpt-4-vision-preview",
+                messages=messages,
+                client=client,
+                custom_llm_provider="openai",
+            )
+
+            litellm_utils.executor.shutdown(wait=True)
+
+        (event,) = events
+        chat_spans = list(
+            x
+            for x in event["spans"]
+            if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm"
+        )
+        assert len(chat_spans) == 1
+        span = chat_spans[0]
+        messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
 
     content_items = [
         item for msg in messages_data if "content" in msg for item in msg["content"]
@@ -1796,19 +2784,22 @@ def test_binary_content_encoding_mixed_content(
     assert any(item.get("type") == "blob" for item in content_items)
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio(loop_scope="session")
 async def test_async_binary_content_encoding_mixed_content(
     sentry_init,
+    capture_events,
     capture_items,
     get_model_response,
     nonstreaming_chat_completions_model_response,
+    stream_gen_ai_spans,
 ):
     sentry_init(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -1841,12 +2832,14 @@ async def test_async_binary_content_encoding_mixed_content(
         request_headers={"X-Stainless-Raw-Response": "true"},
     )
 
-    with mock.patch.object(
-        client.completions._client._client,
-        "send",
-        return_value=model_response,
-    ):
-        with start_transaction(name="litellm test"):
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with mock.patch.object(
+            client.completions._client._client,
+            "send",
+            return_value=model_response,
+        ), start_transaction(name="litellm test"):
             await litellm.acompletion(
                 model="gpt-4-vision-preview",
                 messages=messages,
@@ -1857,16 +2850,44 @@ async def test_async_binary_content_encoding_mixed_content(
             await GLOBAL_LOGGING_WORKER.flush()
             await asyncio.sleep(0.5)
 
-    spans = [item.payload for item in items if item.type == "span"]
-    chat_spans = list(
-        x
-        for x in spans
-        if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-        and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
-    )
+        spans = [item.payload for item in items if item.type == "span"]
+        chat_spans = list(
+            x
+            for x in spans
+            if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+            and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
+        )
+    else:
+        events = capture_events()
+
+        with mock.patch.object(
+            client.completions._client._client,
+            "send",
+            return_value=model_response,
+        ), start_transaction(name="litellm test"):
+            await litellm.acompletion(
+                model="gpt-4-vision-preview",
+                messages=messages,
+                client=client,
+                custom_llm_provider="openai",
+            )
+
+            await GLOBAL_LOGGING_WORKER.flush()
+            await asyncio.sleep(0.5)
+
+        (event,) = events
+        chat_spans = list(
+            x
+            for x in event["spans"]
+            if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm"
+        )
+
     assert len(chat_spans) == 1
     span = chat_spans[0]
-    messages_data = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    if stream_gen_ai_spans:
+        messages_data = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    else:
+        messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
 
     content_items = [
         item for msg in messages_data if "content" in msg for item in msg["content"]
@@ -1875,19 +2896,22 @@ async def test_async_binary_content_encoding_mixed_content(
     assert any(item.get("type") == "blob" for item in content_items)
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 def test_binary_content_encoding_uri_type(
     reset_litellm_executor,
     sentry_init,
+    capture_events,
     capture_items,
     get_model_response,
     nonstreaming_chat_completions_model_response,
+    stream_gen_ai_spans,
 ):
     sentry_init(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -1918,12 +2942,13 @@ def test_binary_content_encoding_uri_type(
         request_headers={"X-Stainless-Raw-Response": "true"},
     )
 
-    with mock.patch.object(
-        client.completions._client._client,
-        "send",
-        return_value=model_response,
-    ):
-        with start_transaction(name="litellm test"):
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+        with mock.patch.object(
+            client.completions._client._client,
+            "send",
+            return_value=model_response,
+        ), start_transaction(name="litellm test"):
             litellm.completion(
                 model="gpt-4-vision-preview",
                 messages=messages,
@@ -1933,16 +2958,46 @@ def test_binary_content_encoding_uri_type(
 
             litellm_utils.executor.shutdown(wait=True)
 
-    spans = [item.payload for item in items if item.type == "span"]
-    chat_spans = list(
-        x
-        for x in spans
-        if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-        and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
-    )
-    assert len(chat_spans) == 1
-    span = chat_spans[0]
-    messages_data = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+        spans = [item.payload for item in items if item.type == "span"]
+        chat_spans = list(
+            x
+            for x in spans
+            if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+            and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
+        )
+
+        assert len(chat_spans) == 1
+        span = chat_spans[0]
+
+        messages_data = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    else:
+        events = capture_events()
+
+        with mock.patch.object(
+            client.completions._client._client,
+            "send",
+            return_value=model_response,
+        ), start_transaction(name="litellm test"):
+            litellm.completion(
+                model="gpt-4-vision-preview",
+                messages=messages,
+                client=client,
+                custom_llm_provider="openai",
+            )
+
+            litellm_utils.executor.shutdown(wait=True)
+
+        (event,) = events
+        chat_spans = list(
+            x
+            for x in event["spans"]
+            if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm"
+        )
+
+        assert len(chat_spans) == 1
+        span = chat_spans[0]
+
+        messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
 
     uri_item = next(
         (
@@ -1958,19 +3013,22 @@ def test_binary_content_encoding_uri_type(
     assert uri_item["uri"] == "https://example.com/image.jpg"
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio(loop_scope="session")
 async def test_async_binary_content_encoding_uri_type(
     sentry_init,
+    capture_events,
     capture_items,
     get_model_response,
     nonstreaming_chat_completions_model_response,
+    stream_gen_ai_spans,
 ):
     sentry_init(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     messages = [
         {
@@ -2001,12 +3059,14 @@ async def test_async_binary_content_encoding_uri_type(
         request_headers={"X-Stainless-Raw-Response": "true"},
     )
 
-    with mock.patch.object(
-        client.completions._client._client,
-        "send",
-        return_value=model_response,
-    ):
-        with start_transaction(name="litellm test"):
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with mock.patch.object(
+            client.completions._client._client,
+            "send",
+            return_value=model_response,
+        ), start_transaction(name="litellm test"):
             await litellm.acompletion(
                 model="gpt-4-vision-preview",
                 messages=messages,
@@ -2017,16 +3077,47 @@ async def test_async_binary_content_encoding_uri_type(
             await GLOBAL_LOGGING_WORKER.flush()
             await asyncio.sleep(0.5)
 
-    spans = [item.payload for item in items if item.type == "span"]
-    chat_spans = list(
-        x
-        for x in spans
-        if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-        and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
-    )
-    assert len(chat_spans) == 1
-    span = chat_spans[0]
-    messages_data = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+        spans = [item.payload for item in items if item.type == "span"]
+        chat_spans = list(
+            x
+            for x in spans
+            if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+            and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
+        )
+
+        assert len(chat_spans) == 1
+        span = chat_spans[0]
+
+        messages_data = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    else:
+        events = capture_events()
+
+        with mock.patch.object(
+            client.completions._client._client,
+            "send",
+            return_value=model_response,
+        ), start_transaction(name="litellm test"):
+            await litellm.acompletion(
+                model="gpt-4-vision-preview",
+                messages=messages,
+                client=client,
+                custom_llm_provider="openai",
+            )
+
+            await GLOBAL_LOGGING_WORKER.flush()
+            await asyncio.sleep(0.5)
+
+        (event,) = events
+        chat_spans = list(
+            x
+            for x in event["spans"]
+            if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm"
+        )
+
+        assert len(chat_spans) == 1
+        span = chat_spans[0]
+
+        messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
 
     uri_item = next(
         (
diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py
index d5e78bad99..c80b2df513 100644
--- a/tests/integrations/openai/test_openai.py
+++ b/tests/integrations/openai/test_openai.py
@@ -103,6 +103,7 @@ async def __call__(self, *args, **kwargs):
     )
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.parametrize(
     "send_default_pii, include_prompts",
     [
@@ -113,17 +114,19 @@ async def __call__(self, *args, **kwargs):
 )
 def test_nonstreaming_chat_completion_no_prompts(
     sentry_init,
+    capture_events,
     capture_items,
     send_default_pii,
     include_prompts,
     nonstreaming_chat_completions_model_response,
+    stream_gen_ai_spans,
 ):
     sentry_init(
         integrations=[OpenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
 
     client = OpenAI(api_key="z")
     client.chat.completions._post = mock.Mock(
@@ -140,52 +143,100 @@ def test_nonstreaming_chat_completion_no_prompts(
         )
     )
 
-    with start_transaction(name="openai tx"):
-        response = (
-            client.chat.completions.create(
-                model="some-model",
-                messages=[
-                    {"role": "system", "content": "You are a helpful assistant."},
-                    {"role": "user", "content": "hello"},
-                ],
-                max_tokens=100,
-                presence_penalty=0.1,
-                frequency_penalty=0.2,
-                temperature=0.7,
-                top_p=0.9,
+    if stream_gen_ai_spans:
+        items = capture_items("span")
+
+        with start_transaction(name="openai tx"):
+            response = (
+                client.chat.completions.create(
+                    model="some-model",
+                    messages=[
+                        {"role": "system", "content": "You are a helpful assistant."},
+                        {"role": "user", "content": "hello"},
+                    ],
+                    max_tokens=100,
+                    presence_penalty=0.1,
+                    frequency_penalty=0.2,
+                    temperature=0.7,
+                    top_p=0.9,
+                )
+                .choices[0]
+                .message.content
+            )
+
+        assert response == "the model response"
+
+        span = next(item.payload for item in items if item.type == "span")
+        assert span["attributes"]["sentry.op"] == "gen_ai.chat"
+        assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False
+
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model"
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
+
+        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
+
+        assert span["attributes"]["gen_ai.usage.output_tokens"] == 10
+        assert span["attributes"]["gen_ai.usage.input_tokens"] == 20
+        assert span["attributes"]["gen_ai.usage.total_tokens"] == 30
+    else:
+        events = capture_events()
+
+        with start_transaction(name="openai tx"):
+            response = (
+                client.chat.completions.create(
+                    model="some-model",
+                    messages=[
+                        {"role": "system", "content": "You are a helpful assistant."},
+                        {"role": "user", "content": "hello"},
+                    ],
+                    max_tokens=100,
+                    presence_penalty=0.1,
+                    frequency_penalty=0.2,
+                    temperature=0.7,
+                    top_p=0.9,
+                )
+                .choices[0]
+                .message.content
             )
-            .choices[0]
-            .message.content
-        )
 
-    assert response == "the model response"
+        assert response == "the model response"
 
-    span = next(item.payload for item in items if item.type == "span")
-    assert span["attributes"]["sentry.op"] == "gen_ai.chat"
-    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
-    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False
+        tx = events[0]
+        assert tx["type"] == "transaction"
+        span = tx["spans"][0]
+        assert span["op"] == "gen_ai.chat"
+        assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai"
+        assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False
 
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model"
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model"
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
 
-    assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"]
-    assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
-    assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
+        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
 
-    assert span["attributes"]["gen_ai.usage.output_tokens"] == 10
-    assert span["attributes"]["gen_ai.usage.input_tokens"] == 20
-    assert span["attributes"]["gen_ai.usage.total_tokens"] == 30
+        assert span["data"]["gen_ai.usage.output_tokens"] == 10
+        assert span["data"]["gen_ai.usage.input_tokens"] == 20
+        assert span["data"]["gen_ai.usage.total_tokens"] == 30
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.parametrize(
-    "messages",
+    "get_messages",
     [
         pytest.param(
-            [
+            lambda: [
                 {
                     "role": "system",
                     "content": "You are a helpful assistant.",
@@ -195,7 +246,7 @@ def test_nonstreaming_chat_completion_no_prompts(
             id="blocks",
         ),
         pytest.param(
-            [
+            lambda: [
                 {
                     "role": "system",
                     "content": [
@@ -208,7 +259,7 @@ def test_nonstreaming_chat_completion_no_prompts(
             id="parts",
         ),
         pytest.param(
-            iter(
+            lambda: iter(
                 [
                     {
                         "role": "system",
@@ -226,17 +277,19 @@ def test_nonstreaming_chat_completion_no_prompts(
 )
 def test_nonstreaming_chat_completion(
     sentry_init,
+    capture_events,
     capture_items,
-    messages,
+    get_messages,
     request,
     nonstreaming_chat_completions_model_response,
+    stream_gen_ai_spans,
 ):
     sentry_init(
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
 
     client = OpenAI(api_key="z")
     client.chat.completions._post = mock.Mock(
@@ -253,63 +306,131 @@ def test_nonstreaming_chat_completion(
         )
     )
 
-    with start_transaction(name="openai tx"):
-        response = (
-            client.chat.completions.create(
-                model="some-model",
-                messages=messages,
-                max_tokens=100,
-                presence_penalty=0.1,
-                frequency_penalty=0.2,
-                temperature=0.7,
-                top_p=0.9,
+    if stream_gen_ai_spans:
+        items = capture_items("span")
+
+        with start_transaction(name="openai tx"):
+            response = (
+                client.chat.completions.create(
+                    model="some-model",
+                    messages=get_messages(),
+                    max_tokens=100,
+                    presence_penalty=0.1,
+                    frequency_penalty=0.2,
+                    temperature=0.7,
+                    top_p=0.9,
+                )
+                .choices[0]
+                .message.content
             )
-            .choices[0]
-            .message.content
-        )
 
-    assert response == "the model response"
+        assert response == "the model response"
 
-    span = next(item.payload for item in items if item.type == "span")
-    assert span["attributes"]["sentry.op"] == "gen_ai.chat"
-    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
-    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False
+        span = next(item.payload for item in items if item.type == "span")
+        assert span["attributes"]["sentry.op"] == "gen_ai.chat"
+        assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False
 
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model"
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model"
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
 
-    param_id = request.node.callspec.id
-    if "blocks" in param_id:
-        assert json.loads(span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [
-            {
-                "type": "text",
-                "content": "You are a helpful assistant.",
-            }
-        ]
+        param_id = request.node.callspec.id
+        if "blocks" in param_id:
+            assert json.loads(
+                span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
+            ) == [
+                {
+                    "type": "text",
+                    "content": "You are a helpful assistant.",
+                }
+            ]
+        else:
+            assert json.loads(
+                span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
+            ) == [
+                {
+                    "type": "text",
+                    "content": "You are a helpful assistant.",
+                },
+                {
+                    "type": "text",
+                    "content": "Be concise and clear.",
+                },
+            ]
+
+        assert "hello" in span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        assert "the model response" in span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+
+        assert span["attributes"]["gen_ai.usage.output_tokens"] == 10
+        assert span["attributes"]["gen_ai.usage.input_tokens"] == 20
+        assert span["attributes"]["gen_ai.usage.total_tokens"] == 30
     else:
-        assert json.loads(span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [
-            {
-                "type": "text",
-                "content": "You are a helpful assistant.",
-            },
-            {
-                "type": "text",
-                "content": "Be concise and clear.",
-            },
-        ]
+        events = capture_events()
+
+        with start_transaction(name="openai tx"):
+            response = (
+                client.chat.completions.create(
+                    model="some-model",
+                    messages=get_messages(),
+                    max_tokens=100,
+                    presence_penalty=0.1,
+                    frequency_penalty=0.2,
+                    temperature=0.7,
+                    top_p=0.9,
+                )
+                .choices[0]
+                .message.content
+            )
+
+        assert response == "the model response"
+
+        tx = events[0]
+        assert tx["type"] == "transaction"
+        span = tx["spans"][0]
+        assert span["op"] == "gen_ai.chat"
+        assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai"
+        assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False
+
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model"
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
+
+        param_id = request.node.callspec.id
+        if "blocks" in param_id:
+            assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [
+                {
+                    "type": "text",
+                    "content": "You are a helpful assistant.",
+                }
+            ]
+        else:
+            assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [
+                {
+                    "type": "text",
+                    "content": "You are a helpful assistant.",
+                },
+                {
+                    "type": "text",
+                    "content": "Be concise and clear.",
+                },
+            ]
 
-    assert "hello" in span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-    assert "the model response" in span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+        assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        assert "the model response" in span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
 
-    assert span["attributes"]["gen_ai.usage.output_tokens"] == 10
-    assert span["attributes"]["gen_ai.usage.input_tokens"] == 20
-    assert span["attributes"]["gen_ai.usage.total_tokens"] == 30
+        assert span["data"]["gen_ai.usage.output_tokens"] == 10
+        assert span["data"]["gen_ai.usage.input_tokens"] == 20
+        assert span["data"]["gen_ai.usage.total_tokens"] == 30
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
 @pytest.mark.parametrize(
     "send_default_pii, include_prompts",
@@ -321,17 +442,19 @@ def test_nonstreaming_chat_completion(
 )
 async def test_nonstreaming_chat_completion_async_no_prompts(
     sentry_init,
+    capture_events,
     capture_items,
     send_default_pii,
     include_prompts,
     nonstreaming_chat_completions_model_response,
+    stream_gen_ai_spans,
 ):
     sentry_init(
         integrations=[OpenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
 
     client = AsyncOpenAI(api_key="z")
     client.chat.completions._post = mock.AsyncMock(
@@ -348,50 +471,95 @@ async def test_nonstreaming_chat_completion_async_no_prompts(
         )
     )
 
-    with start_transaction(name="openai tx"):
-        response = await client.chat.completions.create(
-            model="some-model",
-            messages=[
-                {"role": "system", "content": "You are a helpful assistant."},
-                {"role": "user", "content": "hello"},
-            ],
-            max_tokens=100,
-            presence_penalty=0.1,
-            frequency_penalty=0.2,
-            temperature=0.7,
-            top_p=0.9,
-        )
-        response = response.choices[0].message.content
+    if stream_gen_ai_spans:
+        items = capture_items("span")
+
+        with start_transaction(name="openai tx"):
+            response = await client.chat.completions.create(
+                model="some-model",
+                messages=[
+                    {"role": "system", "content": "You are a helpful assistant."},
+                    {"role": "user", "content": "hello"},
+                ],
+                max_tokens=100,
+                presence_penalty=0.1,
+                frequency_penalty=0.2,
+                temperature=0.7,
+                top_p=0.9,
+            )
+            response = response.choices[0].message.content
+
+        assert response == "the model response"
+
+        span = next(item.payload for item in items if item.type == "span")
+        assert span["attributes"]["sentry.op"] == "gen_ai.chat"
+        assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False
+
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model"
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
+
+        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
+
+        assert span["attributes"]["gen_ai.usage.output_tokens"] == 10
+        assert span["attributes"]["gen_ai.usage.input_tokens"] == 20
+        assert span["attributes"]["gen_ai.usage.total_tokens"] == 30
+    else:
+        events = capture_events()
+
+        with start_transaction(name="openai tx"):
+            response = await client.chat.completions.create(
+                model="some-model",
+                messages=[
+                    {"role": "system", "content": "You are a helpful assistant."},
+                    {"role": "user", "content": "hello"},
+                ],
+                max_tokens=100,
+                presence_penalty=0.1,
+                frequency_penalty=0.2,
+                temperature=0.7,
+                top_p=0.9,
+            )
+            response = response.choices[0].message.content
 
-    assert response == "the model response"
+        assert response == "the model response"
 
-    span = next(item.payload for item in items if item.type == "span")
-    assert span["attributes"]["sentry.op"] == "gen_ai.chat"
-    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
-    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False
+        tx = events[0]
+        assert tx["type"] == "transaction"
+        span = tx["spans"][0]
+        assert span["op"] == "gen_ai.chat"
+        assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai"
+        assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False
 
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model"
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model"
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
 
-    assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"]
-    assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
-    assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
+        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
 
-    assert span["attributes"]["gen_ai.usage.output_tokens"] == 10
-    assert span["attributes"]["gen_ai.usage.input_tokens"] == 20
-    assert span["attributes"]["gen_ai.usage.total_tokens"] == 30
+        assert span["data"]["gen_ai.usage.output_tokens"] == 10
+        assert span["data"]["gen_ai.usage.input_tokens"] == 20
+        assert span["data"]["gen_ai.usage.total_tokens"] == 30
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
 @pytest.mark.parametrize(
-    "messages",
+    "get_messages",
     [
         pytest.param(
-            [
+            lambda: [
                 {
                     "role": "system",
                     "content": "You are a helpful assistant.",
@@ -401,7 +569,7 @@ async def test_nonstreaming_chat_completion_async_no_prompts(
             id="blocks",
         ),
         pytest.param(
-            [
+            lambda: [
                 {
                     "role": "system",
                     "content": [
@@ -414,7 +582,7 @@ async def test_nonstreaming_chat_completion_async_no_prompts(
             id="parts",
         ),
         pytest.param(
-            iter(
+            lambda: iter(
                 [
                     {
                         "role": "system",
@@ -432,17 +600,19 @@ async def test_nonstreaming_chat_completion_async_no_prompts(
 )
 async def test_nonstreaming_chat_completion_async(
     sentry_init,
+    capture_events,
     capture_items,
-    messages,
+    get_messages,
     request,
     nonstreaming_chat_completions_model_response,
+    stream_gen_ai_spans,
 ):
     sentry_init(
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
 
     client = AsyncOpenAI(api_key="z")
     client.chat.completions._post = AsyncMock(
@@ -459,58 +629,122 @@ async def test_nonstreaming_chat_completion_async(
         )
     )
 
-    with start_transaction(name="openai tx"):
-        response = await client.chat.completions.create(
-            model="some-model",
-            messages=messages,
-            max_tokens=100,
-            presence_penalty=0.1,
-            frequency_penalty=0.2,
-            temperature=0.7,
-            top_p=0.9,
-        )
-        response = response.choices[0].message.content
+    if stream_gen_ai_spans:
+        items = capture_items("span")
 
-    assert response == "the model response"
+        with start_transaction(name="openai tx"):
+            response = await client.chat.completions.create(
+                model="some-model",
+                messages=get_messages(),
+                max_tokens=100,
+                presence_penalty=0.1,
+                frequency_penalty=0.2,
+                temperature=0.7,
+                top_p=0.9,
+            )
+            response = response.choices[0].message.content
 
-    span = next(item.payload for item in items if item.type == "span")
-    assert span["attributes"]["sentry.op"] == "gen_ai.chat"
-    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
-    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False
+        assert response == "the model response"
 
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model"
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
+        span = next(item.payload for item in items if item.type == "span")
+        assert span["attributes"]["sentry.op"] == "gen_ai.chat"
+        assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False
 
-    param_id = request.node.callspec.id
-    if "blocks" in param_id:
-        assert json.loads(span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [
-            {
-                "type": "text",
-                "content": "You are a helpful assistant.",
-            }
-        ]
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model"
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
+
+        param_id = request.node.callspec.id
+        if "blocks" in param_id:
+            assert json.loads(
+                span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
+            ) == [
+                {
+                    "type": "text",
+                    "content": "You are a helpful assistant.",
+                }
+            ]
+        else:
+            assert json.loads(
+                span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
+            ) == [
+                {
+                    "type": "text",
+                    "content": "You are a helpful assistant.",
+                },
+                {
+                    "type": "text",
+                    "content": "Be concise and clear.",
+                },
+            ]
+
+        assert "hello" in span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        assert "the model response" in span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+
+        assert span["attributes"]["gen_ai.usage.output_tokens"] == 10
+        assert span["attributes"]["gen_ai.usage.input_tokens"] == 20
+        assert span["attributes"]["gen_ai.usage.total_tokens"] == 30
     else:
-        assert json.loads(span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [
-            {
-                "type": "text",
-                "content": "You are a helpful assistant.",
-            },
-            {
-                "type": "text",
-                "content": "Be concise and clear.",
-            },
-        ]
+        events = capture_events()
+
+        with start_transaction(name="openai tx"):
+            response = await client.chat.completions.create(
+                model="some-model",
+                messages=get_messages(),
+                max_tokens=100,
+                presence_penalty=0.1,
+                frequency_penalty=0.2,
+                temperature=0.7,
+                top_p=0.9,
+            )
+            response = response.choices[0].message.content
+
+        assert response == "the model response"
+
+        tx = events[0]
+        assert tx["type"] == "transaction"
+        span = tx["spans"][0]
+        assert span["op"] == "gen_ai.chat"
+        assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai"
+        assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False
+
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model"
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
+
+        param_id = request.node.callspec.id
+        if "blocks" in param_id:
+            assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [
+                {
+                    "type": "text",
+                    "content": "You are a helpful assistant.",
+                }
+            ]
+        else:
+            assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [
+                {
+                    "type": "text",
+                    "content": "You are a helpful assistant.",
+                },
+                {
+                    "type": "text",
+                    "content": "Be concise and clear.",
+                },
+            ]
 
-    assert "hello" in span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-    assert "the model response" in span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+        assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        assert "the model response" in span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
 
-    assert span["attributes"]["gen_ai.usage.output_tokens"] == 10
-    assert span["attributes"]["gen_ai.usage.input_tokens"] == 20
-    assert span["attributes"]["gen_ai.usage.total_tokens"] == 30
+        assert span["data"]["gen_ai.usage.output_tokens"] == 10
+        assert span["data"]["gen_ai.usage.input_tokens"] == 20
+        assert span["data"]["gen_ai.usage.total_tokens"] == 30
 
 
 def tiktoken_encoding_if_installed():
@@ -523,6 +757,7 @@ def tiktoken_encoding_if_installed():
 
 
 # noinspection PyTypeChecker
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.parametrize(
     "send_default_pii, include_prompts",
     [
@@ -533,11 +768,13 @@ def tiktoken_encoding_if_installed():
 )
 def test_streaming_chat_completion_no_prompts(
     sentry_init,
+    capture_events,
     capture_items,
     send_default_pii,
     include_prompts,
     get_model_response,
     server_side_event_chunks,
+    stream_gen_ai_spans,
 ):
     sentry_init(
         integrations=[
@@ -548,8 +785,8 @@ def test_streaming_chat_completion_no_prompts(
         ],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
 
     client = OpenAI(api_key="z")
     returned_stream = get_model_response(
@@ -599,12 +836,14 @@ def test_streaming_chat_completion_no_prompts(
         )
     )
 
-    with mock.patch.object(
-        client.chat._client._client,
-        "send",
-        return_value=returned_stream,
-    ):
-        with start_transaction(name="openai tx"):
+    if stream_gen_ai_spans:
+        items = capture_items("span")
+
+        with mock.patch.object(
+            client.chat._client._client,
+            "send",
+            return_value=returned_stream,
+        ), start_transaction(name="openai tx"):
             response_stream = client.chat.completions.create(
                 model="some-model",
                 messages=[
@@ -622,53 +861,111 @@ def test_streaming_chat_completion_no_prompts(
                 map(lambda x: x.choices[0].delta.content, response_stream)
             )
 
-    assert response_string == "hello world"
+        assert response_string == "hello world"
 
-    span = next(item.payload for item in items if item.type == "span")
-    assert span["attributes"]["sentry.op"] == "gen_ai.chat"
-    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
-    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+        span = next(item.payload for item in items if item.type == "span")
+        assert span["attributes"]["sentry.op"] == "gen_ai.chat"
+        assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
 
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model"
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model"
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
 
-    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "model-id"
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "model-id"
 
-    assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"]
-    assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
-    assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
+        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
 
-    try:
-        import tiktoken  # type: ignore # noqa # pylint: disable=unused-import
+        try:
+            import tiktoken  # type: ignore # noqa # pylint: disable=unused-import
 
-        assert span["attributes"]["gen_ai.usage.output_tokens"] == 2
-        assert span["attributes"]["gen_ai.usage.input_tokens"] == 7
-        assert span["attributes"]["gen_ai.usage.total_tokens"] == 9
-    except ImportError:
-        pass  # if tiktoken is not installed, we can't guarantee token usage will be calculated properly
+            assert span["attributes"]["gen_ai.usage.output_tokens"] == 2
+            assert span["attributes"]["gen_ai.usage.input_tokens"] == 7
+            assert span["attributes"]["gen_ai.usage.total_tokens"] == 9
+        except ImportError:
+            pass  # if tiktoken is not installed, we can't guarantee token usage will be calculated properly
+    else:
+        events = capture_events()
+
+        with mock.patch.object(
+            client.chat._client._client,
+            "send",
+            return_value=returned_stream,
+        ), start_transaction(name="openai tx"):
+            response_stream = client.chat.completions.create(
+                model="some-model",
+                messages=[
+                    {"role": "system", "content": "You are a helpful assistant."},
+                    {"role": "user", "content": "hello"},
+                ],
+                stream=True,
+                max_tokens=100,
+                presence_penalty=0.1,
+                frequency_penalty=0.2,
+                temperature=0.7,
+                top_p=0.9,
+            )
+            response_string = "".join(
+                map(lambda x: x.choices[0].delta.content, response_stream)
+            )
+
+        assert response_string == "hello world"
+
+        tx = events[0]
+        assert tx["type"] == "transaction"
+        span = tx["spans"][0]
+        assert span["op"] == "gen_ai.chat"
+        assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai"
+        assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model"
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
+
+        assert span["data"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "model-id"
 
+        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
 
+        try:
+            import tiktoken  # type: ignore # noqa # pylint: disable=unused-import
+
+            assert span["data"]["gen_ai.usage.output_tokens"] == 2
+            assert span["data"]["gen_ai.usage.input_tokens"] == 7
+            assert span["data"]["gen_ai.usage.total_tokens"] == 9
+        except ImportError:
+            pass  # if tiktoken is not installed, we can't guarantee token usage will be calculated properly
+
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.skipif(
     OPENAI_VERSION <= (1, 1, 0),
     reason="OpenAI versions <=1.1.0 do not support the stream_options parameter.",
 )
 def test_streaming_chat_completion_with_usage_in_stream(
     sentry_init,
+    capture_events,
     capture_items,
     get_model_response,
     server_side_event_chunks,
+    stream_gen_ai_spans,
 ):
     """When stream_options=include_usage is set, token usage comes from the final chunk's usage field."""
     sentry_init(
         integrations=[OpenAIIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
         send_default_pii=False,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
 
     client = OpenAI(api_key="z")
     returned_stream = get_model_response(
@@ -710,12 +1007,14 @@ def test_streaming_chat_completion_with_usage_in_stream(
         )
     )
 
-    with mock.patch.object(
-        client.chat._client._client,
-        "send",
-        return_value=returned_stream,
-    ):
-        with start_transaction(name="openai tx"):
+    if stream_gen_ai_spans:
+        items = capture_items("span")
+
+        with mock.patch.object(
+            client.chat._client._client,
+            "send",
+            return_value=returned_stream,
+        ), start_transaction(name="openai tx"):
             response_stream = client.chat.completions.create(
                 model="some-model",
                 messages=[{"role": "user", "content": "hello"}],
@@ -725,30 +1024,57 @@ def test_streaming_chat_completion_with_usage_in_stream(
             for _ in response_stream:
                 pass
 
-    span = next(item.payload for item in items if item.type == "span")
-    assert span["attributes"]["sentry.op"] == "gen_ai.chat"
-    assert span["attributes"]["gen_ai.usage.input_tokens"] == 20
-    assert span["attributes"]["gen_ai.usage.output_tokens"] == 10
-    assert span["attributes"]["gen_ai.usage.total_tokens"] == 30
+        span = next(item.payload for item in items if item.type == "span")
+        assert span["attributes"]["sentry.op"] == "gen_ai.chat"
+        assert span["attributes"]["gen_ai.usage.input_tokens"] == 20
+        assert span["attributes"]["gen_ai.usage.output_tokens"] == 10
+        assert span["attributes"]["gen_ai.usage.total_tokens"] == 30
+    else:
+        events = capture_events()
 
+        with mock.patch.object(
+            client.chat._client._client,
+            "send",
+            return_value=returned_stream,
+        ), start_transaction(name="openai tx"):
+            response_stream = client.chat.completions.create(
+                model="some-model",
+                messages=[{"role": "user", "content": "hello"}],
+                stream=True,
+                stream_options={"include_usage": True},
+            )
+            for _ in response_stream:
+                pass
+
+        tx = events[0]
+        assert tx["type"] == "transaction"
+        span = tx["spans"][0]
+        assert span["op"] == "gen_ai.chat"
+        assert span["data"]["gen_ai.usage.input_tokens"] == 20
+        assert span["data"]["gen_ai.usage.output_tokens"] == 10
+        assert span["data"]["gen_ai.usage.total_tokens"] == 30
 
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.skipif(
     OPENAI_VERSION <= (1, 1, 0),
     reason="OpenAI versions <=1.1.0 do not support the stream_options parameter.",
 )
 def test_streaming_chat_completion_empty_content_preserves_token_usage(
     sentry_init,
+    capture_events,
     capture_items,
     get_model_response,
     server_side_event_chunks,
+    stream_gen_ai_spans,
 ):
     """Token usage from the stream is recorded even when no content is produced (e.g. content filter)."""
     sentry_init(
         integrations=[OpenAIIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
         send_default_pii=False,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
 
     client = OpenAI(api_key="z")
     returned_stream = get_model_response(
@@ -771,12 +1097,14 @@ def test_streaming_chat_completion_empty_content_preserves_token_usage(
         )
     )
 
-    with mock.patch.object(
-        client.chat._client._client,
-        "send",
-        return_value=returned_stream,
-    ):
-        with start_transaction(name="openai tx"):
+    if stream_gen_ai_spans:
+        items = capture_items("span")
+
+        with mock.patch.object(
+            client.chat._client._client,
+            "send",
+            return_value=returned_stream,
+        ), start_transaction(name="openai tx"):
             response_stream = client.chat.completions.create(
                 model="some-model",
                 messages=[{"role": "user", "content": "hello"}],
@@ -786,13 +1114,38 @@ def test_streaming_chat_completion_empty_content_preserves_token_usage(
             for _ in response_stream:
                 pass
 
-    span = next(item.payload for item in items if item.type == "span")
-    assert span["attributes"]["sentry.op"] == "gen_ai.chat"
-    assert span["attributes"]["gen_ai.usage.input_tokens"] == 20
-    assert "gen_ai.usage.output_tokens" not in span["attributes"]
-    assert span["attributes"]["gen_ai.usage.total_tokens"] == 20
+        span = next(item.payload for item in items if item.type == "span")
+        assert span["attributes"]["sentry.op"] == "gen_ai.chat"
+        assert span["attributes"]["gen_ai.usage.input_tokens"] == 20
+        assert "gen_ai.usage.output_tokens" not in span["attributes"]
+        assert span["attributes"]["gen_ai.usage.total_tokens"] == 20
+    else:
+        events = capture_events()
+
+        with mock.patch.object(
+            client.chat._client._client,
+            "send",
+            return_value=returned_stream,
+        ), start_transaction(name="openai tx"):
+            response_stream = client.chat.completions.create(
+                model="some-model",
+                messages=[{"role": "user", "content": "hello"}],
+                stream=True,
+                stream_options={"include_usage": True},
+            )
+            for _ in response_stream:
+                pass
+
+        tx = events[0]
+        assert tx["type"] == "transaction"
+        span = tx["spans"][0]
+        assert span["op"] == "gen_ai.chat"
+        assert span["data"]["gen_ai.usage.input_tokens"] == 20
+        assert "gen_ai.usage.output_tokens" not in span["data"]
+        assert span["data"]["gen_ai.usage.total_tokens"] == 20
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.skipif(
     OPENAI_VERSION <= (1, 1, 0),
     reason="OpenAI versions <=1.1.0 do not support the stream_options parameter.",
@@ -800,18 +1153,20 @@ def test_streaming_chat_completion_empty_content_preserves_token_usage(
 @pytest.mark.asyncio
 async def test_streaming_chat_completion_empty_content_preserves_token_usage_async(
     sentry_init,
+    capture_events,
     capture_items,
     get_model_response,
     async_iterator,
     server_side_event_chunks,
+    stream_gen_ai_spans,
 ):
     """Token usage from the stream is recorded even when no content is produced - async variant."""
     sentry_init(
         integrations=[OpenAIIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
         send_default_pii=False,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
 
     client = AsyncOpenAI(api_key="z")
     returned_stream = get_model_response(
@@ -836,12 +1191,14 @@ async def test_streaming_chat_completion_empty_content_preserves_token_usage_asy
         )
     )
 
-    with mock.patch.object(
-        client.chat._client._client,
-        "send",
-        return_value=returned_stream,
-    ):
-        with start_transaction(name="openai tx"):
+    if stream_gen_ai_spans:
+        items = capture_items("span")
+
+        with mock.patch.object(
+            client.chat._client._client,
+            "send",
+            return_value=returned_stream,
+        ), start_transaction(name="openai tx"):
             response_stream = await client.chat.completions.create(
                 model="some-model",
                 messages=[{"role": "user", "content": "hello"}],
@@ -851,13 +1208,38 @@ async def test_streaming_chat_completion_empty_content_preserves_token_usage_asy
             async for _ in response_stream:
                 pass
 
-    span = next(item.payload for item in items if item.type == "span")
-    assert span["attributes"]["sentry.op"] == "gen_ai.chat"
-    assert span["attributes"]["gen_ai.usage.input_tokens"] == 20
-    assert "gen_ai.usage.output_tokens" not in span["attributes"]
-    assert span["attributes"]["gen_ai.usage.total_tokens"] == 20
+        span = next(item.payload for item in items if item.type == "span")
+        assert span["attributes"]["sentry.op"] == "gen_ai.chat"
+        assert span["attributes"]["gen_ai.usage.input_tokens"] == 20
+        assert "gen_ai.usage.output_tokens" not in span["attributes"]
+        assert span["attributes"]["gen_ai.usage.total_tokens"] == 20
+    else:
+        events = capture_events()
 
+        with mock.patch.object(
+            client.chat._client._client,
+            "send",
+            return_value=returned_stream,
+        ), start_transaction(name="openai tx"):
+            response_stream = await client.chat.completions.create(
+                model="some-model",
+                messages=[{"role": "user", "content": "hello"}],
+                stream=True,
+                stream_options={"include_usage": True},
+            )
+            async for _ in response_stream:
+                pass
 
+        tx = events[0]
+        assert tx["type"] == "transaction"
+        span = tx["spans"][0]
+        assert span["op"] == "gen_ai.chat"
+        assert span["data"]["gen_ai.usage.input_tokens"] == 20
+        assert "gen_ai.usage.output_tokens" not in span["data"]
+        assert span["data"]["gen_ai.usage.total_tokens"] == 20
+
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.skipif(
     OPENAI_VERSION <= (1, 1, 0),
     reason="OpenAI versions <=1.1.0 do not support the stream_options parameter.",
@@ -865,18 +1247,20 @@ async def test_streaming_chat_completion_empty_content_preserves_token_usage_asy
 @pytest.mark.asyncio
 async def test_streaming_chat_completion_async_with_usage_in_stream(
     sentry_init,
+    capture_events,
     capture_items,
     get_model_response,
     async_iterator,
     server_side_event_chunks,
+    stream_gen_ai_spans,
 ):
     """When stream_options=include_usage is set, token usage comes from the final chunk's usage field (async)."""
     sentry_init(
         integrations=[OpenAIIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
         send_default_pii=False,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
 
     client = AsyncOpenAI(api_key="z")
     returned_stream = get_model_response(
@@ -920,12 +1304,36 @@ async def test_streaming_chat_completion_async_with_usage_in_stream(
         )
     )
 
-    with mock.patch.object(
-        client.chat._client._client,
-        "send",
-        return_value=returned_stream,
-    ):
-        with start_transaction(name="openai tx"):
+    if stream_gen_ai_spans:
+        items = capture_items("span")
+
+        with mock.patch.object(
+            client.chat._client._client,
+            "send",
+            return_value=returned_stream,
+        ), start_transaction(name="openai tx"):
+            response_stream = await client.chat.completions.create(
+                model="some-model",
+                messages=[{"role": "user", "content": "hello"}],
+                stream=True,
+                stream_options={"include_usage": True},
+            )
+            async for _ in response_stream:
+                pass
+
+        span = next(item.payload for item in items if item.type == "span")
+        assert span["attributes"]["sentry.op"] == "gen_ai.chat"
+        assert span["attributes"]["gen_ai.usage.input_tokens"] == 20
+        assert span["attributes"]["gen_ai.usage.output_tokens"] == 10
+        assert span["attributes"]["gen_ai.usage.total_tokens"] == 30
+    else:
+        events = capture_events()
+
+        with mock.patch.object(
+            client.chat._client._client,
+            "send",
+            return_value=returned_stream,
+        ), start_transaction(name="openai tx"):
             response_stream = await client.chat.completions.create(
                 model="some-model",
                 messages=[{"role": "user", "content": "hello"}],
@@ -935,19 +1343,22 @@ async def test_streaming_chat_completion_async_with_usage_in_stream(
             async for _ in response_stream:
                 pass
 
-    span = next(item.payload for item in items if item.type == "span")
-    assert span["attributes"]["sentry.op"] == "gen_ai.chat"
-    assert span["attributes"]["gen_ai.usage.input_tokens"] == 20
-    assert span["attributes"]["gen_ai.usage.output_tokens"] == 10
-    assert span["attributes"]["gen_ai.usage.total_tokens"] == 30
+        tx = events[0]
+        assert tx["type"] == "transaction"
+        span = tx["spans"][0]
+        assert span["op"] == "gen_ai.chat"
+        assert span["data"]["gen_ai.usage.input_tokens"] == 20
+        assert span["data"]["gen_ai.usage.output_tokens"] == 10
+        assert span["data"]["gen_ai.usage.total_tokens"] == 30
 
 
 # noinspection PyTypeChecker
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.parametrize(
-    "messages",
+    "get_messages",
     [
         pytest.param(
-            [
+            lambda: [
                 {
                     "role": "system",
                     "content": "You are a helpful assistant.",
@@ -957,7 +1368,7 @@ async def test_streaming_chat_completion_async_with_usage_in_stream(
             id="blocks",
         ),
         pytest.param(
-            [
+            lambda: [
                 {
                     "role": "system",
                     "content": [
@@ -970,7 +1381,7 @@ async def test_streaming_chat_completion_async_with_usage_in_stream(
             id="parts",
         ),
         pytest.param(
-            iter(
+            lambda: iter(
                 [
                     {
                         "role": "system",
@@ -988,11 +1399,13 @@ async def test_streaming_chat_completion_async_with_usage_in_stream(
 )
 def test_streaming_chat_completion(
     sentry_init,
+    capture_events,
     capture_items,
-    messages,
+    get_messages,
     request,
     get_model_response,
     server_side_event_chunks,
+    stream_gen_ai_spans,
 ):
     sentry_init(
         integrations=[
@@ -1003,8 +1416,8 @@ def test_streaming_chat_completion(
         ],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
 
     client = OpenAI(api_key="z")
     returned_stream = get_model_response(
@@ -1054,15 +1467,17 @@ def test_streaming_chat_completion(
         )
     )
 
-    with mock.patch.object(
-        client.chat._client._client,
-        "send",
-        return_value=returned_stream,
-    ):
-        with start_transaction(name="openai tx"):
+    if stream_gen_ai_spans:
+        items = capture_items("span")
+
+        with mock.patch.object(
+            client.chat._client._client,
+            "send",
+            return_value=returned_stream,
+        ), start_transaction(name="openai tx"):
             response_stream = client.chat.completions.create(
                 model="some-model",
-                messages=messages,
+                messages=get_messages(),
                 stream=True,
                 max_tokens=100,
                 presence_penalty=0.1,
@@ -1073,61 +1488,145 @@ def test_streaming_chat_completion(
             response_string = "".join(
                 map(lambda x: x.choices[0].delta.content, response_stream)
             )
-    assert response_string == "hello world"
 
-    span = next(item.payload for item in items if item.type == "span")
-    assert span["attributes"]["sentry.op"] == "gen_ai.chat"
-    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
-    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+        assert response_string == "hello world"
 
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model"
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
+        span = next(item.payload for item in items if item.type == "span")
+        assert span["attributes"]["sentry.op"] == "gen_ai.chat"
+        assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
 
-    param_id = request.node.callspec.id
-    if "blocks" in param_id:
-        assert json.loads(span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [
-            {
-                "type": "text",
-                "content": "You are a helpful assistant.",
-            }
-        ]
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model"
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
+
+        param_id = request.node.callspec.id
+        if "blocks" in param_id:
+            assert json.loads(
+                span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
+            ) == [
+                {
+                    "type": "text",
+                    "content": "You are a helpful assistant.",
+                }
+            ]
+        else:
+            assert json.loads(
+                span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
+            ) == [
+                {
+                    "type": "text",
+                    "content": "You are a helpful assistant.",
+                },
+                {
+                    "type": "text",
+                    "content": "Be concise and clear.",
+                },
+            ]
+
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "model-id"
+
+        assert "hello" in span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        assert "hello world" in span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+
+        try:
+            import tiktoken  # type: ignore # noqa # pylint: disable=unused-import
+
+            if "blocks" in param_id:
+                assert span["attributes"]["gen_ai.usage.output_tokens"] == 2
+                assert span["attributes"]["gen_ai.usage.input_tokens"] == 7
+                assert span["attributes"]["gen_ai.usage.total_tokens"] == 9
+            else:
+                assert span["attributes"]["gen_ai.usage.output_tokens"] == 2
+                assert span["attributes"]["gen_ai.usage.input_tokens"] == 12
+                assert span["attributes"]["gen_ai.usage.total_tokens"] == 14
+
+        except ImportError:
+            pass  # if tiktoken is not installed, we can't guarantee token usage will be calculated properly
     else:
-        assert json.loads(span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [
-            {
-                "type": "text",
-                "content": "You are a helpful assistant.",
-            },
-            {
-                "type": "text",
-                "content": "Be concise and clear.",
-            },
-        ]
+        events = capture_events()
 
-    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "model-id"
+        with mock.patch.object(
+            client.chat._client._client,
+            "send",
+            return_value=returned_stream,
+        ), start_transaction(name="openai tx"):
+            response_stream = client.chat.completions.create(
+                model="some-model",
+                messages=get_messages(),
+                stream=True,
+                max_tokens=100,
+                presence_penalty=0.1,
+                frequency_penalty=0.2,
+                temperature=0.7,
+                top_p=0.9,
+            )
+            response_string = "".join(
+                map(lambda x: x.choices[0].delta.content, response_stream)
+            )
 
-    assert "hello" in span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-    assert "hello world" in span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+        assert response_string == "hello world"
 
-    try:
-        import tiktoken  # type: ignore # noqa # pylint: disable=unused-import
+        tx = events[0]
+        assert tx["type"] == "transaction"
+        span = tx["spans"][0]
+        assert span["op"] == "gen_ai.chat"
+        assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai"
+        assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model"
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
 
+        param_id = request.node.callspec.id
         if "blocks" in param_id:
-            assert span["attributes"]["gen_ai.usage.output_tokens"] == 2
-            assert span["attributes"]["gen_ai.usage.input_tokens"] == 7
-            assert span["attributes"]["gen_ai.usage.total_tokens"] == 9
+            assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [
+                {
+                    "type": "text",
+                    "content": "You are a helpful assistant.",
+                }
+            ]
         else:
-            assert span["attributes"]["gen_ai.usage.output_tokens"] == 2
-            assert span["attributes"]["gen_ai.usage.input_tokens"] == 12
-            assert span["attributes"]["gen_ai.usage.total_tokens"] == 14
-    except ImportError:
-        pass  # if tiktoken is not installed, we can't guarantee token usage will be calculated properly
+            assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [
+                {
+                    "type": "text",
+                    "content": "You are a helpful assistant.",
+                },
+                {
+                    "type": "text",
+                    "content": "Be concise and clear.",
+                },
+            ]
+
+        assert span["data"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "model-id"
+
+        assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        assert "hello world" in span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+
+        try:
+            import tiktoken  # type: ignore # noqa # pylint: disable=unused-import
+
+            if "blocks" in param_id:
+                assert span["data"]["gen_ai.usage.output_tokens"] == 2
+                assert span["data"]["gen_ai.usage.input_tokens"] == 7
+                assert span["data"]["gen_ai.usage.total_tokens"] == 9
+            else:
+                assert span["data"]["gen_ai.usage.output_tokens"] == 2
+                assert span["data"]["gen_ai.usage.input_tokens"] == 12
+                assert span["data"]["gen_ai.usage.total_tokens"] == 14
+
+        except ImportError:
+            pass  # if tiktoken is not installed, we can't guarantee token usage will be calculated properly
 
 
 # noinspection PyTypeChecker
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
 @pytest.mark.parametrize(
     "send_default_pii, include_prompts",
@@ -1139,12 +1638,14 @@ def test_streaming_chat_completion(
 )
 async def test_streaming_chat_completion_async_no_prompts(
     sentry_init,
+    capture_events,
     capture_items,
     send_default_pii,
     include_prompts,
     get_model_response,
     async_iterator,
     server_side_event_chunks,
+    stream_gen_ai_spans,
 ):
     sentry_init(
         integrations=[
@@ -1155,8 +1656,8 @@ async def test_streaming_chat_completion_async_no_prompts(
         ],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
 
     client = AsyncOpenAI(api_key="z")
     returned_stream = get_model_response(
@@ -1208,12 +1709,14 @@ async def test_streaming_chat_completion_async_no_prompts(
         )
     )
 
-    with mock.patch.object(
-        client.chat._client._client,
-        "send",
-        return_value=returned_stream,
-    ):
-        with start_transaction(name="openai tx"):
+    if stream_gen_ai_spans:
+        items = capture_items("span")
+
+        with mock.patch.object(
+            client.chat._client._client,
+            "send",
+            return_value=returned_stream,
+        ), start_transaction(name="openai tx"):
             response_stream = await client.chat.completions.create(
                 model="some-model",
                 messages=[
@@ -1232,44 +1735,102 @@ async def test_streaming_chat_completion_async_no_prompts(
             async for x in response_stream:
                 response_string += x.choices[0].delta.content
 
-    assert response_string == "hello world"
+        assert response_string == "hello world"
 
-    span = next(item.payload for item in items if item.type == "span")
-    assert span["attributes"]["sentry.op"] == "gen_ai.chat"
-    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
-    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+        span = next(item.payload for item in items if item.type == "span")
+        assert span["attributes"]["sentry.op"] == "gen_ai.chat"
+        assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
 
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model"
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model"
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
 
-    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "model-id"
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "model-id"
 
-    assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"]
-    assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
-    assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
+        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
 
-    try:
-        import tiktoken  # type: ignore # noqa # pylint: disable=unused-import
+        try:
+            import tiktoken  # type: ignore # noqa # pylint: disable=unused-import
 
-        assert span["attributes"]["gen_ai.usage.output_tokens"] == 2
-        assert span["attributes"]["gen_ai.usage.input_tokens"] == 7
-        assert span["attributes"]["gen_ai.usage.total_tokens"] == 9
+            assert span["attributes"]["gen_ai.usage.output_tokens"] == 2
+            assert span["attributes"]["gen_ai.usage.input_tokens"] == 7
+            assert span["attributes"]["gen_ai.usage.total_tokens"] == 9
 
-    except ImportError:
-        pass  # if tiktoken is not installed, we can't guarantee token usage will be calculated properly
+        except ImportError:
+            pass  # if tiktoken is not installed, we can't guarantee token usage will be calculated properly
+    else:
+        events = capture_events()
+
+        with mock.patch.object(
+            client.chat._client._client,
+            "send",
+            return_value=returned_stream,
+        ), start_transaction(name="openai tx"):
+            response_stream = await client.chat.completions.create(
+                model="some-model",
+                messages=[
+                    {"role": "system", "content": "You are a helpful assistant."},
+                    {"role": "user", "content": "hello"},
+                ],
+                stream=True,
+                max_tokens=100,
+                presence_penalty=0.1,
+                frequency_penalty=0.2,
+                temperature=0.7,
+                top_p=0.9,
+            )
+
+            response_string = ""
+            async for x in response_stream:
+                response_string += x.choices[0].delta.content
+
+        assert response_string == "hello world"
+
+        tx = events[0]
+        assert tx["type"] == "transaction"
+        span = tx["spans"][0]
+        assert span["op"] == "gen_ai.chat"
+        assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai"
+        assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model"
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
+
+        assert span["data"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "model-id"
+
+        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+
+        try:
+            import tiktoken  # type: ignore # noqa # pylint: disable=unused-import
+
+            assert span["data"]["gen_ai.usage.output_tokens"] == 2
+            assert span["data"]["gen_ai.usage.input_tokens"] == 7
+            assert span["data"]["gen_ai.usage.total_tokens"] == 9
+
+        except ImportError:
+            pass  # if tiktoken is not installed, we can't guarantee token usage will be calculated properly
 
 
 # noinspection PyTypeChecker
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
 @pytest.mark.parametrize(
-    "messages",
+    "get_messages",
     [
         pytest.param(
-            [
+            lambda: [
                 {
                     "role": "system",
                     "content": "You are a helpful assistant.",
@@ -1279,7 +1840,7 @@ async def test_streaming_chat_completion_async_no_prompts(
             id="blocks",
         ),
         pytest.param(
-            [
+            lambda: [
                 {
                     "role": "system",
                     "content": [
@@ -1292,7 +1853,7 @@ async def test_streaming_chat_completion_async_no_prompts(
             id="parts",
         ),
         pytest.param(
-            iter(
+            lambda: iter(
                 [
                     {
                         "role": "system",
@@ -1310,12 +1871,14 @@ async def test_streaming_chat_completion_async_no_prompts(
 )
 async def test_streaming_chat_completion_async(
     sentry_init,
+    capture_events,
     capture_items,
-    messages,
+    get_messages,
     request,
     get_model_response,
     async_iterator,
     server_side_event_chunks,
+    stream_gen_ai_spans,
 ):
     sentry_init(
         integrations=[
@@ -1326,8 +1889,8 @@ async def test_streaming_chat_completion_async(
         ],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
 
     client = AsyncOpenAI(api_key="z")
 
@@ -1380,15 +1943,17 @@ async def test_streaming_chat_completion_async(
         )
     )
 
-    with mock.patch.object(
-        client.chat._client._client,
-        "send",
-        return_value=returned_stream,
-    ):
-        with start_transaction(name="openai tx"):
+    if stream_gen_ai_spans:
+        items = capture_items("span")
+
+        with mock.patch.object(
+            client.chat._client._client,
+            "send",
+            return_value=returned_stream,
+        ), start_transaction(name="openai tx"):
             response_stream = await client.chat.completions.create(
                 model="some-model",
-                messages=messages,
+                messages=get_messages(),
                 stream=True,
                 max_tokens=100,
                 presence_penalty=0.1,
@@ -1401,127 +1966,299 @@ async def test_streaming_chat_completion_async(
             async for x in response_stream:
                 response_string += x.choices[0].delta.content
 
-    assert response_string == "hello world"
+        assert response_string == "hello world"
 
-    span = next(item.payload for item in items if item.type == "span")
-    assert span["attributes"]["sentry.op"] == "gen_ai.chat"
-    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
-    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+        span = next(item.payload for item in items if item.type == "span")
+        assert span["attributes"]["sentry.op"] == "gen_ai.chat"
+        assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
 
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model"
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model"
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
 
-    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "model-id"
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "model-id"
 
-    param_id = request.node.callspec.id
-    if "blocks" in param_id:
-        assert json.loads(span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [
-            {
-                "type": "text",
-                "content": "You are a helpful assistant.",
-            }
-        ]
+        param_id = request.node.callspec.id
+        if "blocks" in param_id:
+            assert json.loads(
+                span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
+            ) == [
+                {
+                    "type": "text",
+                    "content": "You are a helpful assistant.",
+                }
+            ]
+
+            assert "hello" in span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+            assert "hello world" in span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+
+            try:
+                import tiktoken  # type: ignore # noqa # pylint: disable=unused-import
+
+                if "blocks" in param_id:
+                    assert span["attributes"]["gen_ai.usage.output_tokens"] == 2
+                    assert span["attributes"]["gen_ai.usage.input_tokens"] == 7
+                    assert span["attributes"]["gen_ai.usage.total_tokens"] == 9
+                else:
+                    assert span["attributes"]["gen_ai.usage.output_tokens"] == 2
+                    assert span["attributes"]["gen_ai.usage.input_tokens"] == 12
+                    assert span["attributes"]["gen_ai.usage.total_tokens"] == 14
+
+            except ImportError:
+                pass  # if tiktoken is not installed, we can't guarantee token usage will be calculated properly
+        else:
+            assert json.loads(
+                span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
+            ) == [
+                {
+                    "type": "text",
+                    "content": "You are a helpful assistant.",
+                },
+                {
+                    "type": "text",
+                    "content": "Be concise and clear.",
+                },
+            ]
+
+            assert "hello" in span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+            assert "hello world" in span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+
+            try:
+                import tiktoken  # type: ignore # noqa # pylint: disable=unused-import
+
+                if "blocks" in param_id:
+                    assert span["attributes"]["gen_ai.usage.output_tokens"] == 2
+                    assert span["attributes"]["gen_ai.usage.input_tokens"] == 7
+                    assert span["attributes"]["gen_ai.usage.total_tokens"] == 9
+                else:
+                    assert span["attributes"]["gen_ai.usage.output_tokens"] == 2
+                    assert span["attributes"]["gen_ai.usage.input_tokens"] == 12
+                    assert span["attributes"]["gen_ai.usage.total_tokens"] == 14
+
+            except ImportError:
+                pass  # if tiktoken is not installed, we can't guarantee token usage will be calculated properly
     else:
-        assert json.loads(span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [
-            {
-                "type": "text",
-                "content": "You are a helpful assistant.",
-            },
-            {
-                "type": "text",
-                "content": "Be concise and clear.",
-            },
-        ]
+        events = capture_events()
 
-    assert "hello" in span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-    assert "hello world" in span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+        with mock.patch.object(
+            client.chat._client._client,
+            "send",
+            return_value=returned_stream,
+        ), start_transaction(name="openai tx"):
+            response_stream = await client.chat.completions.create(
+                model="some-model",
+                messages=get_messages(),
+                stream=True,
+                max_tokens=100,
+                presence_penalty=0.1,
+                frequency_penalty=0.2,
+                temperature=0.7,
+                top_p=0.9,
+            )
 
-    try:
-        import tiktoken  # type: ignore # noqa # pylint: disable=unused-import
+            response_string = ""
+            async for x in response_stream:
+                response_string += x.choices[0].delta.content
+
+        assert response_string == "hello world"
 
+        tx = events[0]
+        assert tx["type"] == "transaction"
+        span = tx["spans"][0]
+        assert span["op"] == "gen_ai.chat"
+        assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai"
+        assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model"
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
+
+        assert span["data"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "model-id"
+
+        param_id = request.node.callspec.id
         if "blocks" in param_id:
-            assert span["attributes"]["gen_ai.usage.output_tokens"] == 2
-            assert span["attributes"]["gen_ai.usage.input_tokens"] == 7
-            assert span["attributes"]["gen_ai.usage.total_tokens"] == 9
+            assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [
+                {
+                    "type": "text",
+                    "content": "You are a helpful assistant.",
+                }
+            ]
         else:
-            assert span["attributes"]["gen_ai.usage.output_tokens"] == 2
-            assert span["attributes"]["gen_ai.usage.input_tokens"] == 12
-            assert span["attributes"]["gen_ai.usage.total_tokens"] == 14
+            assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [
+                {
+                    "type": "text",
+                    "content": "You are a helpful assistant.",
+                },
+                {
+                    "type": "text",
+                    "content": "Be concise and clear.",
+                },
+            ]
 
-    except ImportError:
-        pass  # if tiktoken is not installed, we can't guarantee token usage will be calculated properly
+            assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+            assert "hello world" in span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
 
+            try:
+                import tiktoken  # type: ignore # noqa # pylint: disable=unused-import
 
-def test_bad_chat_completion(sentry_init, capture_items):
+                if "blocks" in param_id:
+                    assert span["data"]["gen_ai.usage.output_tokens"] == 2
+                    assert span["data"]["gen_ai.usage.input_tokens"] == 7
+                    assert span["data"]["gen_ai.usage.total_tokens"] == 9
+                else:
+                    assert span["data"]["gen_ai.usage.output_tokens"] == 2
+                    assert span["data"]["gen_ai.usage.input_tokens"] == 12
+                    assert span["data"]["gen_ai.usage.total_tokens"] == 14
+
+            except ImportError:
+                pass  # if tiktoken is not installed, we can't guarantee token usage will be calculated properly
+
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_bad_chat_completion(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("event")
 
-    client = OpenAI(api_key="z")
-    client.chat.completions._post = mock.Mock(
-        side_effect=OpenAIError("API rate limit reached")
-    )
-    with pytest.raises(OpenAIError):
-        client.chat.completions.create(
-            model="some-model",
-            messages=[{"role": "system", "content": "hello"}],
+    if stream_gen_ai_spans:
+        items = capture_items("event")
+
+        client = OpenAI(api_key="z")
+        client.chat.completions._post = mock.Mock(
+            side_effect=OpenAIError("API rate limit reached")
         )
+        with pytest.raises(OpenAIError):
+            client.chat.completions.create(
+                model="some-model",
+                messages=[{"role": "system", "content": "hello"}],
+            )
+
+        (event,) = (item.payload for item in items if item.type == "event")
+    else:
+        events = capture_events()
+
+        client = OpenAI(api_key="z")
+        client.chat.completions._post = mock.Mock(
+            side_effect=OpenAIError("API rate limit reached")
+        )
+        with pytest.raises(OpenAIError):
+            client.chat.completions.create(
+                model="some-model",
+                messages=[{"role": "system", "content": "hello"}],
+            )
+
+        (event,) = events
 
-    (event,) = (item.payload for item in items if item.type == "event")
     assert event["level"] == "error"
 
 
-def test_span_status_error(sentry_init, capture_items):
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_span_status_error(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("event", "transaction", "span")
 
-    with start_transaction(name="test"):
-        client = OpenAI(api_key="z")
-        client.chat.completions._post = mock.Mock(
-            side_effect=OpenAIError("API rate limit reached")
-        )
-        with pytest.raises(OpenAIError):
-            client.chat.completions.create(
-                model="some-model", messages=[{"role": "system", "content": "hello"}]
+    if stream_gen_ai_spans:
+        items = capture_items("event", "transaction", "span")
+
+        with start_transaction(name="test"):
+            client = OpenAI(api_key="z")
+            client.chat.completions._post = mock.Mock(
+                side_effect=OpenAIError("API rate limit reached")
             )
+            with pytest.raises(OpenAIError):
+                client.chat.completions.create(
+                    model="some-model",
+                    messages=[{"role": "system", "content": "hello"}],
+                )
+
+        (error,) = (item.payload for item in items if item.type == "event")
+        assert error["level"] == "error"
+
+        spans = [item.payload for item in items if item.type == "span"]
+        assert spans[0]["status"] == "error"
+    else:
+        events = capture_events()
 
-    (error,) = (item.payload for item in items if item.type == "event")
-    assert error["level"] == "error"
+        with start_transaction(name="test"):
+            client = OpenAI(api_key="z")
+            client.chat.completions._post = mock.Mock(
+                side_effect=OpenAIError("API rate limit reached")
+            )
+            with pytest.raises(OpenAIError):
+                client.chat.completions.create(
+                    model="some-model",
+                    messages=[{"role": "system", "content": "hello"}],
+                )
 
-    spans = [item.payload for item in items if item.type == "span"]
-    assert spans[0]["status"] == "error"
+        (error, transaction) = events
+        assert error["level"] == "error"
+        assert transaction["spans"][0]["status"] == "internal_error"
+        assert transaction["spans"][0]["tags"]["status"] == "internal_error"
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
-async def test_bad_chat_completion_async(sentry_init, capture_items):
+async def test_bad_chat_completion_async(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("event")
 
     client = AsyncOpenAI(api_key="z")
     client.chat.completions._post = AsyncMock(
         side_effect=OpenAIError("API rate limit reached")
     )
-    with pytest.raises(OpenAIError):
-        await client.chat.completions.create(
-            model="some-model", messages=[{"role": "system", "content": "hello"}]
-        )
+    if stream_gen_ai_spans:
+        items = capture_items("event")
+
+        with pytest.raises(OpenAIError):
+            await client.chat.completions.create(
+                model="some-model", messages=[{"role": "system", "content": "hello"}]
+            )
+
+        (event,) = (item.payload for item in items if item.type == "event")
+    else:
+        events = capture_events()
+
+        with pytest.raises(OpenAIError):
+            await client.chat.completions.create(
+                model="some-model", messages=[{"role": "system", "content": "hello"}]
+            )
+
+        (event,) = events
 
-    (event,) = (item.payload for item in items if item.type == "event")
     assert event["level"] == "error"
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.parametrize(
     "send_default_pii, include_prompts",
     [
@@ -1531,14 +2268,19 @@ async def test_bad_chat_completion_async(sentry_init, capture_items):
     ],
 )
 def test_embeddings_create_no_pii(
-    sentry_init, capture_items, send_default_pii, include_prompts
+    sentry_init,
+    capture_events,
+    capture_items,
+    send_default_pii,
+    include_prompts,
+    stream_gen_ai_spans,
 ):
     sentry_init(
         integrations=[OpenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
 
     client = OpenAI(api_key="z")
 
@@ -1552,59 +2294,88 @@ def test_embeddings_create_no_pii(
         ),
     )
 
-    client.embeddings._post = mock.Mock(return_value=returned_embedding)
-    with start_transaction(name="openai tx"):
-        response = client.embeddings.create(
-            input="hello", model="text-embedding-3-large"
+    client.embeddings._post = mock.Mock(return_value=returned_embedding)
+
+    if stream_gen_ai_spans:
+        items = capture_items("span")
+
+        with start_transaction(name="openai tx"):
+            response = client.embeddings.create(
+                input="hello", model="text-embedding-3-large"
+            )
+
+        assert len(response.data[0].embedding) == 3
+
+        span = next(item.payload for item in items if item.type == "span")
+        assert span["attributes"]["sentry.op"] == "gen_ai.embeddings"
+        assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
+        assert (
+            span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL]
+            == "text-embedding-3-large"
         )
 
-    assert len(response.data[0].embedding) == 3
+        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["attributes"]
+
+        assert span["attributes"]["gen_ai.usage.input_tokens"] == 20
+        assert span["attributes"]["gen_ai.usage.total_tokens"] == 30
+    else:
+        events = capture_events()
+
+        with start_transaction(name="openai tx"):
+            response = client.embeddings.create(
+                input="hello", model="text-embedding-3-large"
+            )
+
+        assert len(response.data[0].embedding) == 3
 
-    span = next(item.payload for item in items if item.type == "span")
-    assert span["attributes"]["sentry.op"] == "gen_ai.embeddings"
-    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-3-large"
+        tx = events[0]
+        assert tx["type"] == "transaction"
+        span = tx["spans"][0]
+        assert span["op"] == "gen_ai.embeddings"
+        assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai"
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-3-large"
 
-    assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["attributes"]
+        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["data"]
 
-    assert span["attributes"]["gen_ai.usage.input_tokens"] == 20
-    assert span["attributes"]["gen_ai.usage.total_tokens"] == 30
+        assert span["data"]["gen_ai.usage.input_tokens"] == 20
+        assert span["data"]["gen_ai.usage.total_tokens"] == 30
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.parametrize(
-    "input",
+    "get_input",
     [
         pytest.param(
-            "hello",
+            lambda: "hello",
             id="string",
         ),
         pytest.param(
-            ["First text", "Second text", "Third text"],
+            lambda: ["First text", "Second text", "Third text"],
             id="string_sequence",
         ),
         pytest.param(
-            iter(["First text", "Second text", "Third text"]),
+            lambda: iter(["First text", "Second text", "Third text"]),
             id="string_iterable",
         ),
         pytest.param(
-            [5, 8, 13, 21, 34],
+            lambda: [5, 8, 13, 21, 34],
             id="tokens",
         ),
         pytest.param(
-            iter(
+            lambda: iter(
                 [5, 8, 13, 21, 34],
             ),
             id="token_iterable",
         ),
         pytest.param(
-            [
+            lambda: [
                 [5, 8, 13, 21, 34],
                 [8, 13, 21, 34, 55],
             ],
             id="tokens_sequence",
         ),
         pytest.param(
-            iter(
+            lambda: iter(
                 [
                     [5, 8, 13, 21, 34],
                     [8, 13, 21, 34, 55],
@@ -1614,13 +2385,20 @@ def test_embeddings_create_no_pii(
         ),
     ],
 )
-def test_embeddings_create(sentry_init, capture_items, input, request):
+def test_embeddings_create(
+    sentry_init,
+    capture_events,
+    capture_items,
+    get_input,
+    request,
+    stream_gen_ai_spans,
+):
     sentry_init(
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
 
     client = OpenAI(api_key="z")
 
@@ -1635,45 +2413,111 @@ def test_embeddings_create(sentry_init, capture_items, input, request):
     )
 
     client.embeddings._post = mock.Mock(return_value=returned_embedding)
-    with start_transaction(name="openai tx"):
-        response = client.embeddings.create(input=input, model="text-embedding-3-large")
 
-    assert len(response.data[0].embedding) == 3
+    if stream_gen_ai_spans:
+        items = capture_items("span")
 
-    span = next(item.payload for item in items if item.type == "span")
-    assert span["attributes"]["sentry.op"] == "gen_ai.embeddings"
-    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-3-large"
+        with start_transaction(name="openai tx"):
+            response = client.embeddings.create(
+                input=get_input(), model="text-embedding-3-large"
+            )
 
-    param_id = request.node.callspec.id
-    if param_id == "string":
-        assert json.loads(span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [
-            "hello"
-        ]
-    elif param_id == "string_sequence" or param_id == "string_iterable":
-        assert json.loads(span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [
-            "First text",
-            "Second text",
-            "Third text",
-        ]
-    elif param_id == "tokens" or param_id == "token_iterable":
-        assert json.loads(span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [
-            5,
-            8,
-            13,
-            21,
-            34,
-        ]
+        assert len(response.data[0].embedding) == 3
+
+        span = next(item.payload for item in items if item.type == "span")
+        assert span["attributes"]["sentry.op"] == "gen_ai.embeddings"
+        assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
+        assert (
+            span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL]
+            == "text-embedding-3-large"
+        )
+
+        param_id = request.node.callspec.id
+        if (
+            "string" in param_id
+            and "string_sequence" not in param_id
+            and "string_iterable" not in param_id
+        ):
+            assert json.loads(span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [
+                "hello"
+            ]
+        elif "string_sequence" in param_id or "string_iterable" in param_id:
+            assert json.loads(span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [
+                "First text",
+                "Second text",
+                "Third text",
+            ]
+        elif (
+            "tokens" in param_id or "token_iterable" in param_id
+        ) and "tokens_sequence" not in param_id:
+            assert json.loads(span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [
+                5,
+                8,
+                13,
+                21,
+                34,
+            ]
+        else:
+            assert json.loads(span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [
+                [5, 8, 13, 21, 34],
+                [8, 13, 21, 34, 55],
+            ]
+
+        assert span["attributes"]["gen_ai.usage.input_tokens"] == 20
+        assert span["attributes"]["gen_ai.usage.total_tokens"] == 30
     else:
-        assert json.loads(span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [
-            [5, 8, 13, 21, 34],
-            [8, 13, 21, 34, 55],
-        ]
+        events = capture_events()
+
+        with start_transaction(name="openai tx"):
+            response = client.embeddings.create(
+                input=get_input(), model="text-embedding-3-large"
+            )
+
+        assert len(response.data[0].embedding) == 3
+
+        tx = events[0]
+        assert tx["type"] == "transaction"
+        span = tx["spans"][0]
+        assert span["op"] == "gen_ai.embeddings"
+        assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai"
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-3-large"
+
+        param_id = request.node.callspec.id
+        if (
+            "string" in param_id
+            and "string_sequence" not in param_id
+            and "string_iterable" not in param_id
+        ):
+            assert json.loads(span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [
+                "hello"
+            ]
+        elif "string_sequence" in param_id or "string_iterable" in param_id:
+            assert json.loads(span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [
+                "First text",
+                "Second text",
+                "Third text",
+            ]
+        elif (
+            "tokens" in param_id or "token_iterable" in param_id
+        ) and "tokens_sequence" not in param_id:
+            assert json.loads(span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [
+                5,
+                8,
+                13,
+                21,
+                34,
+            ]
+        else:
+            assert json.loads(span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [
+                [5, 8, 13, 21, 34],
+                [8, 13, 21, 34, 55],
+            ]
 
-    assert span["attributes"]["gen_ai.usage.input_tokens"] == 20
-    assert span["attributes"]["gen_ai.usage.total_tokens"] == 30
+        assert span["data"]["gen_ai.usage.input_tokens"] == 20
+        assert span["data"]["gen_ai.usage.total_tokens"] == 30
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
 @pytest.mark.parametrize(
     "send_default_pii, include_prompts",
@@ -1684,14 +2528,19 @@ def test_embeddings_create(sentry_init, capture_items, input, request):
     ],
 )
 async def test_embeddings_create_async_no_pii(
-    sentry_init, capture_items, send_default_pii, include_prompts
+    sentry_init,
+    capture_events,
+    capture_items,
+    send_default_pii,
+    include_prompts,
+    stream_gen_ai_spans,
 ):
     sentry_init(
         integrations=[OpenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
 
     client = AsyncOpenAI(api_key="z")
 
@@ -1706,59 +2555,88 @@ async def test_embeddings_create_async_no_pii(
     )
 
     client.embeddings._post = AsyncMock(return_value=returned_embedding)
-    with start_transaction(name="openai tx"):
-        response = await client.embeddings.create(
-            input="hello", model="text-embedding-3-large"
+
+    if stream_gen_ai_spans:
+        items = capture_items("span")
+
+        with start_transaction(name="openai tx"):
+            response = await client.embeddings.create(
+                input="hello", model="text-embedding-3-large"
+            )
+
+        assert len(response.data[0].embedding) == 3
+
+        span = next(item.payload for item in items if item.type == "span")
+        assert span["attributes"]["sentry.op"] == "gen_ai.embeddings"
+        assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
+        assert (
+            span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL]
+            == "text-embedding-3-large"
         )
 
-    assert len(response.data[0].embedding) == 3
+        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["attributes"]
+
+        assert span["attributes"]["gen_ai.usage.input_tokens"] == 20
+        assert span["attributes"]["gen_ai.usage.total_tokens"] == 30
+    else:
+        events = capture_events()
+
+        with start_transaction(name="openai tx"):
+            response = await client.embeddings.create(
+                input="hello", model="text-embedding-3-large"
+            )
+
+        assert len(response.data[0].embedding) == 3
 
-    span = next(item.payload for item in items if item.type == "span")
-    assert span["attributes"]["sentry.op"] == "gen_ai.embeddings"
-    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-3-large"
+        tx = events[0]
+        assert tx["type"] == "transaction"
+        span = tx["spans"][0]
+        assert span["op"] == "gen_ai.embeddings"
+        assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai"
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-3-large"
 
-    assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["attributes"]
+        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["data"]
 
-    assert span["attributes"]["gen_ai.usage.input_tokens"] == 20
-    assert span["attributes"]["gen_ai.usage.total_tokens"] == 30
+        assert span["data"]["gen_ai.usage.input_tokens"] == 20
+        assert span["data"]["gen_ai.usage.total_tokens"] == 30
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
 @pytest.mark.parametrize(
-    "input",
+    "get_input",
     [
         pytest.param(
-            "hello",
+            lambda: "hello",
             id="string",
         ),
         pytest.param(
-            ["First text", "Second text", "Third text"],
+            lambda: ["First text", "Second text", "Third text"],
             id="string_sequence",
         ),
         pytest.param(
-            iter(["First text", "Second text", "Third text"]),
+            lambda: iter(["First text", "Second text", "Third text"]),
             id="string_iterable",
         ),
         pytest.param(
-            [5, 8, 13, 21, 34],
+            lambda: [5, 8, 13, 21, 34],
             id="tokens",
         ),
         pytest.param(
-            iter(
+            lambda: iter(
                 [5, 8, 13, 21, 34],
             ),
             id="token_iterable",
         ),
         pytest.param(
-            [
+            lambda: [
                 [5, 8, 13, 21, 34],
                 [8, 13, 21, 34, 55],
             ],
             id="tokens_sequence",
         ),
         pytest.param(
-            iter(
+            lambda: iter(
                 [
                     [5, 8, 13, 21, 34],
                     [8, 13, 21, 34, 55],
@@ -1768,13 +2646,20 @@ async def test_embeddings_create_async_no_pii(
         ),
     ],
 )
-async def test_embeddings_create_async(sentry_init, capture_items, input, request):
+async def test_embeddings_create_async(
+    sentry_init,
+    capture_events,
+    capture_items,
+    get_input,
+    request,
+    stream_gen_ai_spans,
+):
     sentry_init(
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
 
     client = AsyncOpenAI(api_key="z")
 
@@ -1789,60 +2674,131 @@ async def test_embeddings_create_async(sentry_init, capture_items, input, reques
     )
 
     client.embeddings._post = AsyncMock(return_value=returned_embedding)
-    with start_transaction(name="openai tx"):
-        response = await client.embeddings.create(
-            input=input, model="text-embedding-3-large"
-        )
 
-    assert len(response.data[0].embedding) == 3
+    if stream_gen_ai_spans:
+        items = capture_items("span")
 
-    span = next(item.payload for item in items if item.type == "span")
-    assert span["attributes"]["sentry.op"] == "gen_ai.embeddings"
-    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-3-large"
+        with start_transaction(name="openai tx"):
+            response = await client.embeddings.create(
+                input=get_input(), model="text-embedding-3-large"
+            )
 
-    param_id = request.node.callspec.id
-    if param_id == "string":
-        assert json.loads(span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [
-            "hello"
-        ]
-    elif param_id == "string_sequence" or param_id == "string_iterable":
-        assert json.loads(span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [
-            "First text",
-            "Second text",
-            "Third text",
-        ]
-    elif param_id == "tokens" or param_id == "token_iterable":
-        assert json.loads(span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [
-            5,
-            8,
-            13,
-            21,
-            34,
-        ]
+        assert len(response.data[0].embedding) == 3
+
+        span = next(item.payload for item in items if item.type == "span")
+        assert span["attributes"]["sentry.op"] == "gen_ai.embeddings"
+        assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
+        assert (
+            span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL]
+            == "text-embedding-3-large"
+        )
+
+        param_id = request.node.callspec.id
+
+        if (
+            "string" in param_id
+            and "string_sequence" not in param_id
+            and "string_iterable" not in param_id
+        ):
+            assert json.loads(span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [
+                "hello"
+            ]
+        elif "string_sequence" in param_id or "string_iterable" in param_id:
+            assert json.loads(span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [
+                "First text",
+                "Second text",
+                "Third text",
+            ]
+        elif (
+            "tokens" in param_id or "token_iterable" in param_id
+        ) and "tokens_sequence" not in param_id:
+            assert json.loads(span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [
+                5,
+                8,
+                13,
+                21,
+                34,
+            ]
+        else:
+            assert json.loads(span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [
+                [5, 8, 13, 21, 34],
+                [8, 13, 21, 34, 55],
+            ]
+
+        assert span["attributes"]["gen_ai.usage.input_tokens"] == 20
+        assert span["attributes"]["gen_ai.usage.total_tokens"] == 30
     else:
-        assert json.loads(span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [
-            [5, 8, 13, 21, 34],
-            [8, 13, 21, 34, 55],
-        ]
+        events = capture_events()
+
+        with start_transaction(name="openai tx"):
+            response = await client.embeddings.create(
+                input=get_input(), model="text-embedding-3-large"
+            )
+
+        assert len(response.data[0].embedding) == 3
+
+        tx = events[0]
+        assert tx["type"] == "transaction"
+        span = tx["spans"][0]
+        assert span["op"] == "gen_ai.embeddings"
+        assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai"
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-3-large"
+
+        param_id = request.node.callspec.id
+
+        if (
+            "string" in param_id
+            and "string_sequence" not in param_id
+            and "string_iterable" not in param_id
+        ):
+            assert json.loads(span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [
+                "hello"
+            ]
+        elif "string_sequence" in param_id or "string_iterable" in param_id:
+            assert json.loads(span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [
+                "First text",
+                "Second text",
+                "Third text",
+            ]
+        elif (
+            "tokens" in param_id or "token_iterable" in param_id
+        ) and "tokens_sequence" not in param_id:
+            assert json.loads(span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [
+                5,
+                8,
+                13,
+                21,
+                34,
+            ]
+        else:
+            assert json.loads(span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [
+                [5, 8, 13, 21, 34],
+                [8, 13, 21, 34, 55],
+            ]
 
-    assert span["attributes"]["gen_ai.usage.input_tokens"] == 20
-    assert span["attributes"]["gen_ai.usage.total_tokens"] == 30
+        assert span["data"]["gen_ai.usage.input_tokens"] == 20
+        assert span["data"]["gen_ai.usage.total_tokens"] == 30
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.parametrize(
     "send_default_pii, include_prompts",
     [(True, True), (True, False), (False, True), (False, False)],
 )
 def test_embeddings_create_raises_error(
-    sentry_init, capture_items, send_default_pii, include_prompts
+    sentry_init,
+    capture_events,
+    capture_items,
+    send_default_pii,
+    include_prompts,
+    stream_gen_ai_spans,
 ):
     sentry_init(
         integrations=[OpenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("event")
 
     client = OpenAI(api_key="z")
 
@@ -1850,27 +2806,44 @@ def test_embeddings_create_raises_error(
         side_effect=OpenAIError("API rate limit reached")
     )
 
-    with pytest.raises(OpenAIError):
-        client.embeddings.create(input="hello", model="text-embedding-3-large")
+    if stream_gen_ai_spans:
+        items = capture_items("event")
+
+        with pytest.raises(OpenAIError):
+            client.embeddings.create(input="hello", model="text-embedding-3-large")
+
+        (event,) = (item.payload for item in items if item.type == "event")
+    else:
+        events = capture_events()
+
+        with pytest.raises(OpenAIError):
+            client.embeddings.create(input="hello", model="text-embedding-3-large")
+
+        (event,) = events
 
-    (event,) = (item.payload for item in items if item.type == "event")
     assert event["level"] == "error"
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
 @pytest.mark.parametrize(
     "send_default_pii, include_prompts",
     [(True, True), (True, False), (False, True), (False, False)],
 )
 async def test_embeddings_create_raises_error_async(
-    sentry_init, capture_items, send_default_pii, include_prompts
+    sentry_init,
+    capture_events,
+    capture_items,
+    send_default_pii,
+    include_prompts,
+    stream_gen_ai_spans,
 ):
     sentry_init(
         integrations=[OpenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("event")
 
     client = AsyncOpenAI(api_key="z")
 
@@ -1878,21 +2851,41 @@ async def test_embeddings_create_raises_error_async(
         side_effect=OpenAIError("API rate limit reached")
     )
 
-    with pytest.raises(OpenAIError):
-        await client.embeddings.create(input="hello", model="text-embedding-3-large")
+    if stream_gen_ai_spans:
+        items = capture_items("event")
+
+        with pytest.raises(OpenAIError):
+            await client.embeddings.create(
+                input="hello", model="text-embedding-3-large"
+            )
+
+        (event,) = (item.payload for item in items if item.type == "event")
+    else:
+        events = capture_events()
+
+        with pytest.raises(OpenAIError):
+            await client.embeddings.create(
+                input="hello", model="text-embedding-3-large"
+            )
+
+        (event,) = events
 
-    (event,) = (item.payload for item in items if item.type == "event")
     assert event["level"] == "error"
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 def test_span_origin_nonstreaming_chat(
-    sentry_init, capture_items, nonstreaming_chat_completions_model_response
+    sentry_init,
+    capture_events,
+    capture_items,
+    nonstreaming_chat_completions_model_response,
+    stream_gen_ai_spans,
 ):
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     client = OpenAI(api_key="z")
     client.chat.completions._post = mock.Mock(
@@ -1909,27 +2902,47 @@ def test_span_origin_nonstreaming_chat(
         )
     )
 
-    with start_transaction(name="openai tx"):
-        client.chat.completions.create(
-            model="some-model", messages=[{"role": "system", "content": "hello"}]
-        )
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with start_transaction(name="openai tx"):
+            client.chat.completions.create(
+                model="some-model", messages=[{"role": "system", "content": "hello"}]
+            )
+
+        (event,) = (item.payload for item in items if item.type == "transaction")
+        assert event["contexts"]["trace"]["origin"] == "manual"
+
+        spans = [item.payload for item in items if item.type == "span"]
+        assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.openai"
+    else:
+        events = capture_events()
+
+        with start_transaction(name="openai tx"):
+            client.chat.completions.create(
+                model="some-model", messages=[{"role": "system", "content": "hello"}]
+            )
 
-    (event,) = (item.payload for item in items if item.type == "transaction")
-    assert event["contexts"]["trace"]["origin"] == "manual"
+        (event,) = events
 
-    spans = [item.payload for item in items if item.type == "span"]
-    assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.openai"
+        assert event["contexts"]["trace"]["origin"] == "manual"
+        assert event["spans"][0]["origin"] == "auto.ai.openai"
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
 async def test_span_origin_nonstreaming_chat_async(
-    sentry_init, capture_items, nonstreaming_chat_completions_model_response
+    sentry_init,
+    capture_events,
+    capture_items,
+    nonstreaming_chat_completions_model_response,
+    stream_gen_ai_spans,
 ):
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     client = AsyncOpenAI(api_key="z")
     client.chat.completions._post = AsyncMock(
@@ -1946,24 +2959,45 @@ async def test_span_origin_nonstreaming_chat_async(
         )
     )
 
-    with start_transaction(name="openai tx"):
-        await client.chat.completions.create(
-            model="some-model", messages=[{"role": "system", "content": "hello"}]
-        )
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with start_transaction(name="openai tx"):
+            await client.chat.completions.create(
+                model="some-model", messages=[{"role": "system", "content": "hello"}]
+            )
+
+        (event,) = (item.payload for item in items if item.type == "transaction")
+        assert event["contexts"]["trace"]["origin"] == "manual"
+
+        spans = [item.payload for item in items if item.type == "span"]
+        assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.openai"
+    else:
+        events = capture_events()
 
-    (event,) = (item.payload for item in items if item.type == "transaction")
-    assert event["contexts"]["trace"]["origin"] == "manual"
+        with start_transaction(name="openai tx"):
+            await client.chat.completions.create(
+                model="some-model", messages=[{"role": "system", "content": "hello"}]
+            )
+
+        (event,) = events
 
-    spans = [item.payload for item in items if item.type == "span"]
-    assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.openai"
+        assert event["contexts"]["trace"]["origin"] == "manual"
+        assert event["spans"][0]["origin"] == "auto.ai.openai"
 
 
-def test_span_origin_streaming_chat(sentry_init, capture_items):
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_span_origin_streaming_chat(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     client = OpenAI(api_key="z")
     returned_stream = Stream(cast_to=None, response=None, client=client)
@@ -2003,6 +3037,11 @@ def test_span_origin_streaming_chat(sentry_init, capture_items):
         ),
     ]
 
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+    else:
+        events = capture_events()
+
     client.chat.completions._post = mock.Mock(return_value=returned_stream)
     with start_transaction(name="openai tx"):
         response_stream = client.chat.completions.create(
@@ -2011,22 +3050,33 @@ def test_span_origin_streaming_chat(sentry_init, capture_items):
 
         "".join(map(lambda x: x.choices[0].delta.content, response_stream))
 
-    (event,) = (item.payload for item in items if item.type == "transaction")
-    assert event["contexts"]["trace"]["origin"] == "manual"
+    if stream_gen_ai_spans:
+        (event,) = (item.payload for item in items if item.type == "transaction")
+        assert event["contexts"]["trace"]["origin"] == "manual"
+
+        spans = [item.payload for item in items if item.type == "span"]
+        assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.openai"
+    else:
+        (event,) = events
 
-    spans = [item.payload for item in items if item.type == "span"]
-    assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.openai"
+        assert event["contexts"]["trace"]["origin"] == "manual"
+        assert event["spans"][0]["origin"] == "auto.ai.openai"
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
 async def test_span_origin_streaming_chat_async(
-    sentry_init, capture_items, async_iterator
+    sentry_init,
+    capture_events,
+    capture_items,
+    async_iterator,
+    stream_gen_ai_spans,
 ):
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     client = AsyncOpenAI(api_key="z")
     returned_stream = AsyncStream(cast_to=None, response=None, client=client)
@@ -2071,6 +3121,12 @@ async def test_span_origin_streaming_chat_async(
     )
 
     client.chat.completions._post = AsyncMock(return_value=returned_stream)
+
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+    else:
+        events = capture_events()
+
     with start_transaction(name="openai tx"):
         response_stream = await client.chat.completions.create(
             model="some-model", messages=[{"role": "system", "content": "hello"}]
@@ -2080,19 +3136,31 @@ async def test_span_origin_streaming_chat_async(
 
         # "".join(map(lambda x: x.choices[0].delta.content, response_stream))
 
-    (event,) = (item.payload for item in items if item.type == "transaction")
-    assert event["contexts"]["trace"]["origin"] == "manual"
+    if stream_gen_ai_spans:
+        (event,) = (item.payload for item in items if item.type == "transaction")
+        assert event["contexts"]["trace"]["origin"] == "manual"
+
+        spans = [item.payload for item in items if item.type == "span"]
+        assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.openai"
+    else:
+        (event,) = events
 
-    spans = [item.payload for item in items if item.type == "span"]
-    assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.openai"
+        assert event["contexts"]["trace"]["origin"] == "manual"
+        assert event["spans"][0]["origin"] == "auto.ai.openai"
 
 
-def test_span_origin_embeddings(sentry_init, capture_items):
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_span_origin_embeddings(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     client = OpenAI(api_key="z")
 
@@ -2107,23 +3175,41 @@ def test_span_origin_embeddings(sentry_init, capture_items):
     )
 
     client.embeddings._post = mock.Mock(return_value=returned_embedding)
+
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+    else:
+        events = capture_events()
+
     with start_transaction(name="openai tx"):
         client.embeddings.create(input="hello", model="text-embedding-3-large")
 
-    (event,) = [item.payload for item in items if item.type == "transaction"]
-    assert event["contexts"]["trace"]["origin"] == "manual"
+    if stream_gen_ai_spans:
+        (event,) = [item.payload for item in items if item.type == "transaction"]
+        assert event["contexts"]["trace"]["origin"] == "manual"
+
+        spans = [item.payload for item in items if item.type == "span"]
+        assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.openai"
+    else:
+        (event,) = events
 
-    spans = [item.payload for item in items if item.type == "span"]
-    assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.openai"
+        assert event["contexts"]["trace"]["origin"] == "manual"
+        assert event["spans"][0]["origin"] == "auto.ai.openai"
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
-async def test_span_origin_embeddings_async(sentry_init, capture_items):
+async def test_span_origin_embeddings_async(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     client = AsyncOpenAI(api_key="z")
 
@@ -2138,14 +3224,26 @@ async def test_span_origin_embeddings_async(sentry_init, capture_items):
     )
 
     client.embeddings._post = AsyncMock(return_value=returned_embedding)
+
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+    else:
+        events = capture_events()
+
     with start_transaction(name="openai tx"):
         await client.embeddings.create(input="hello", model="text-embedding-3-large")
 
-    (event,) = [item.payload for item in items if item.type == "transaction"]
-    assert event["contexts"]["trace"]["origin"] == "manual"
+    if stream_gen_ai_spans:
+        (event,) = [item.payload for item in items if item.type == "transaction"]
+        assert event["contexts"]["trace"]["origin"] == "manual"
+
+        spans = [item.payload for item in items if item.type == "span"]
+        assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.openai"
+    else:
+        (event,) = events
 
-    spans = [item.payload for item in items if item.type == "span"]
-    assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.openai"
+        assert event["contexts"]["trace"]["origin"] == "manual"
+        assert event["spans"][0]["origin"] == "auto.ai.openai"
 
 
 def test_completions_token_usage_from_response():
@@ -2510,61 +3608,111 @@ def count_tokens(msg):
         )
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available")
-def test_ai_client_span_responses_api_no_pii(sentry_init, capture_items):
+def test_ai_client_span_responses_api_no_pii(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
 
     client = OpenAI(api_key="z")
     client.responses._post = mock.Mock(return_value=EXAMPLE_RESPONSE)
 
-    with start_transaction(name="openai tx"):
-        client.responses.create(
-            model="gpt-4o",
-            instructions="You are a coding assistant that talks like a pirate.",
-            input="How do I check if a Python object is an instance of a class?",
-            max_output_tokens=100,
-            temperature=0.7,
-            top_p=0.9,
-        )
+    if stream_gen_ai_spans:
+        items = capture_items("span")
+
+        with start_transaction(name="openai tx"):
+            client.responses.create(
+                model="gpt-4o",
+                instructions="You are a coding assistant that talks like a pirate.",
+                input="How do I check if a Python object is an instance of a class?",
+                max_output_tokens=100,
+                temperature=0.7,
+                top_p=0.9,
+            )
 
-    spans = [item.payload for item in items if item.type == "span"]
-
-    assert len(spans) == 1
-    assert spans[0]["attributes"] == {
-        "gen_ai.operation.name": "responses",
-        "gen_ai.request.max_tokens": 100,
-        "gen_ai.request.temperature": 0.7,
-        "gen_ai.request.top_p": 0.9,
-        "gen_ai.request.model": "gpt-4o",
-        "gen_ai.response.model": "response-model-id",
-        "gen_ai.response.streaming": False,
-        "gen_ai.system": "openai",
-        "gen_ai.usage.input_tokens": 20,
-        "gen_ai.usage.input_tokens.cached": 5,
-        "gen_ai.usage.output_tokens": 10,
-        "gen_ai.usage.output_tokens.reasoning": 8,
-        "gen_ai.usage.total_tokens": 30,
-        "sentry.environment": "production",
-        "sentry.op": "gen_ai.responses",
-        "sentry.origin": "auto.ai.openai",
-        "sentry.release": mock.ANY,
-        "sentry.sdk.name": "sentry.python",
-        "sentry.sdk.version": mock.ANY,
-        "sentry.segment.id": mock.ANY,
-        "sentry.segment.name": "openai tx",
-        "thread.id": mock.ANY,
-        "thread.name": mock.ANY,
-    }
-
-    assert "gen_ai.system_instructions" not in spans[0]["attributes"]
-    assert "gen_ai.request.messages" not in spans[0]["attributes"]
-    assert "gen_ai.response.text" not in spans[0]["attributes"]
+        spans = [item.payload for item in items if item.type == "span"]
+
+        assert len(spans) == 1
+        assert spans[0]["attributes"] == {
+            "gen_ai.operation.name": "responses",
+            "gen_ai.request.max_tokens": 100,
+            "gen_ai.request.temperature": 0.7,
+            "gen_ai.request.top_p": 0.9,
+            "gen_ai.request.model": "gpt-4o",
+            "gen_ai.response.model": "response-model-id",
+            "gen_ai.response.streaming": False,
+            "gen_ai.system": "openai",
+            "gen_ai.usage.input_tokens": 20,
+            "gen_ai.usage.input_tokens.cached": 5,
+            "gen_ai.usage.output_tokens": 10,
+            "gen_ai.usage.output_tokens.reasoning": 8,
+            "gen_ai.usage.total_tokens": 30,
+            "sentry.environment": "production",
+            "sentry.op": "gen_ai.responses",
+            "sentry.origin": "auto.ai.openai",
+            "sentry.release": mock.ANY,
+            "sentry.sdk.name": "sentry.python",
+            "sentry.sdk.version": mock.ANY,
+            "sentry.segment.id": mock.ANY,
+            "sentry.segment.name": "openai tx",
+            "thread.id": mock.ANY,
+            "thread.name": mock.ANY,
+        }
+
+        assert "gen_ai.system_instructions" not in spans[0]["attributes"]
+        assert "gen_ai.request.messages" not in spans[0]["attributes"]
+        assert "gen_ai.response.text" not in spans[0]["attributes"]
+    else:
+        events = capture_events()
 
+        with start_transaction(name="openai tx"):
+            client.responses.create(
+                model="gpt-4o",
+                instructions="You are a coding assistant that talks like a pirate.",
+                input="How do I check if a Python object is an instance of a class?",
+                max_output_tokens=100,
+                temperature=0.7,
+                top_p=0.9,
+            )
 
+        (transaction,) = events
+        spans = transaction["spans"]
+
+        assert len(spans) == 1
+        assert spans[0]["op"] == "gen_ai.responses"
+        assert spans[0]["origin"] == "auto.ai.openai"
+        assert spans[0]["data"] == {
+            "gen_ai.operation.name": "responses",
+            "gen_ai.request.max_tokens": 100,
+            "gen_ai.request.temperature": 0.7,
+            "gen_ai.request.top_p": 0.9,
+            "gen_ai.request.model": "gpt-4o",
+            "gen_ai.response.model": "response-model-id",
+            "gen_ai.response.streaming": False,
+            "gen_ai.system": "openai",
+            "gen_ai.usage.input_tokens": 20,
+            "gen_ai.usage.input_tokens.cached": 5,
+            "gen_ai.usage.output_tokens": 10,
+            "gen_ai.usage.output_tokens.reasoning": 8,
+            "gen_ai.usage.total_tokens": 30,
+            "thread.id": mock.ANY,
+            "thread.name": mock.ANY,
+        }
+
+        assert "gen_ai.system_instructions" not in spans[0]["data"]
+        assert "gen_ai.request.messages" not in spans[0]["data"]
+        assert "gen_ai.response.text" not in spans[0]["data"]
+
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.parametrize(
     "instructions",
     (
@@ -2631,58 +3779,105 @@ def test_ai_client_span_responses_api_no_pii(sentry_init, capture_items):
 )
 @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available")
 def test_ai_client_span_responses_api(
-    sentry_init, capture_items, instructions, input, request
+    sentry_init,
+    capture_events,
+    capture_items,
+    instructions,
+    input,
+    request,
+    stream_gen_ai_spans,
 ):
     sentry_init(
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
 
     client = OpenAI(api_key="z")
     client.responses._post = mock.Mock(return_value=EXAMPLE_RESPONSE)
 
-    with start_transaction(name="openai tx"):
-        client.responses.create(
-            model="gpt-4o",
-            instructions=instructions,
-            input=input,
-            max_output_tokens=100,
-            temperature=0.7,
-            top_p=0.9,
-        )
+    if stream_gen_ai_spans:
+        items = capture_items("span")
+
+        with start_transaction(name="openai tx"):
+            client.responses.create(
+                model="gpt-4o",
+                instructions=instructions,
+                input=input,
+                max_output_tokens=100,
+                temperature=0.7,
+                top_p=0.9,
+            )
+
+        spans = [item.payload for item in items if item.type == "span"]
+
+        assert len(spans) == 1
+
+        expected_data = {
+            "gen_ai.operation.name": "responses",
+            "gen_ai.request.max_tokens": 100,
+            "gen_ai.request.temperature": 0.7,
+            "gen_ai.request.top_p": 0.9,
+            "gen_ai.system": "openai",
+            "gen_ai.response.model": "response-model-id",
+            "gen_ai.response.streaming": False,
+            "gen_ai.usage.input_tokens": 20,
+            "gen_ai.usage.input_tokens.cached": 5,
+            "gen_ai.usage.output_tokens": 10,
+            "gen_ai.usage.output_tokens.reasoning": 8,
+            "gen_ai.usage.total_tokens": 30,
+            "gen_ai.request.model": "gpt-4o",
+            "gen_ai.response.text": "the model response",
+            "sentry.environment": "production",
+            "sentry.op": "gen_ai.responses",
+            "sentry.origin": "auto.ai.openai",
+            "sentry.release": mock.ANY,
+            "sentry.sdk.name": "sentry.python",
+            "sentry.sdk.version": mock.ANY,
+            "sentry.segment.id": mock.ANY,
+            "sentry.segment.name": "openai tx",
+            "thread.id": mock.ANY,
+            "thread.name": mock.ANY,
+        }
+    else:
+        events = capture_events()
+
+        with start_transaction(name="openai tx"):
+            client.responses.create(
+                model="gpt-4o",
+                instructions=instructions,
+                input=input,
+                max_output_tokens=100,
+                temperature=0.7,
+                top_p=0.9,
+            )
 
-    spans = [item.payload for item in items if item.type == "span"]
-
-    assert len(spans) == 1
-
-    expected_data = {
-        "gen_ai.operation.name": "responses",
-        "gen_ai.request.max_tokens": 100,
-        "gen_ai.request.temperature": 0.7,
-        "gen_ai.request.top_p": 0.9,
-        "gen_ai.system": "openai",
-        "gen_ai.response.model": "response-model-id",
-        "gen_ai.response.streaming": False,
-        "gen_ai.usage.input_tokens": 20,
-        "gen_ai.usage.input_tokens.cached": 5,
-        "gen_ai.usage.output_tokens": 10,
-        "gen_ai.usage.output_tokens.reasoning": 8,
-        "gen_ai.usage.total_tokens": 30,
-        "gen_ai.request.model": "gpt-4o",
-        "gen_ai.response.text": "the model response",
-        "sentry.environment": "production",
-        "sentry.op": "gen_ai.responses",
-        "sentry.origin": "auto.ai.openai",
-        "sentry.release": mock.ANY,
-        "sentry.sdk.name": "sentry.python",
-        "sentry.sdk.version": mock.ANY,
-        "sentry.segment.id": mock.ANY,
-        "sentry.segment.name": "openai tx",
-        "thread.id": mock.ANY,
-        "thread.name": mock.ANY,
-    }
+        (transaction,) = events
+        spans = transaction["spans"]
+
+        assert len(spans) == 1
+        assert spans[0]["op"] == "gen_ai.responses"
+        assert spans[0]["origin"] == "auto.ai.openai"
+
+        expected_data = {
+            "gen_ai.operation.name": "responses",
+            "gen_ai.request.max_tokens": 100,
+            "gen_ai.request.temperature": 0.7,
+            "gen_ai.request.top_p": 0.9,
+            "gen_ai.system": "openai",
+            "gen_ai.response.model": "response-model-id",
+            "gen_ai.response.streaming": False,
+            "gen_ai.usage.input_tokens": 20,
+            "gen_ai.usage.input_tokens.cached": 5,
+            "gen_ai.usage.output_tokens": 10,
+            "gen_ai.usage.output_tokens.reasoning": 8,
+            "gen_ai.usage.total_tokens": 30,
+            "gen_ai.request.model": "gpt-4o",
+            "gen_ai.response.text": "the model response",
+            "thread.id": mock.ANY,
+            "thread.name": mock.ANY,
+        }
 
     param_id = request.node.callspec.id
     if "string" in param_id and (
@@ -2838,9 +4033,13 @@ def test_ai_client_span_responses_api(
             }
         )
 
-    assert spans[0]["attributes"] == expected_data
+    if stream_gen_ai_spans:
+        assert spans[0]["attributes"] == expected_data
+    else:
+        assert spans[0]["data"] == expected_data
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.parametrize(
     "conversation, expected_id",
     [
@@ -2852,71 +4051,125 @@ def test_ai_client_span_responses_api(
 )
 @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available")
 def test_responses_api_conversation_id(
-    sentry_init, capture_items, conversation, expected_id
+    sentry_init,
+    capture_events,
+    capture_items,
+    conversation,
+    expected_id,
+    stream_gen_ai_spans,
 ):
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
 
     client = OpenAI(api_key="z")
     client.responses._post = mock.Mock(return_value=EXAMPLE_RESPONSE)
 
-    with start_transaction(name="openai tx"):
-        client.responses.create(
-            model="gpt-4o",
-            input="hello",
-            conversation=conversation,
-        )
+    if stream_gen_ai_spans:
+        items = capture_items("span")
+
+        with start_transaction(name="openai tx"):
+            client.responses.create(
+                model="gpt-4o",
+                input="hello",
+                conversation=conversation,
+            )
 
-    (span,) = (item.payload for item in items if item.type == "span")
+        (span,) = (item.payload for item in items if item.type == "span")
 
-    if expected_id is None:
-        assert "gen_ai.conversation.id" not in span["attributes"]
+        if expected_id is None:
+            assert "gen_ai.conversation.id" not in span["attributes"]
+        else:
+            assert span["attributes"]["gen_ai.conversation.id"] == expected_id
     else:
-        assert span["attributes"]["gen_ai.conversation.id"] == expected_id
+        events = capture_events()
+
+        with start_transaction(name="openai tx"):
+            client.responses.create(
+                model="gpt-4o",
+                input="hello",
+                conversation=conversation,
+            )
+
+        (transaction,) = events
+        (span,) = transaction["spans"]
+
+        if expected_id is None:
+            assert "gen_ai.conversation.id" not in span["data"]
+        else:
+            assert span["data"]["gen_ai.conversation.id"] == expected_id
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available")
-def test_error_in_responses_api(sentry_init, capture_items):
+def test_error_in_responses_api(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     sentry_init(
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("event", "transaction", "span")
 
     client = OpenAI(api_key="z")
     client.responses._post = mock.Mock(
         side_effect=OpenAIError("API rate limit reached")
     )
 
-    with start_transaction(name="openai tx"):
-        with pytest.raises(OpenAIError):
+    if stream_gen_ai_spans:
+        items = capture_items("event", "transaction", "span")
+
+        with start_transaction(name="openai tx"), pytest.raises(OpenAIError):
             client.responses.create(
                 model="gpt-4o",
                 instructions="You are a coding assistant that talks like a pirate.",
                 input="How do I check if a Python object is an instance of a class?",
             )
 
-    # make sure the span where the error occurred is captured
-    spans = [item.payload for item in items if item.type == "span"]
-    assert spans[0]["attributes"]["sentry.op"] == "gen_ai.responses"
+        # make sure the span where the error occurred is captured
+        spans = [item.payload for item in items if item.type == "span"]
+        assert spans[0]["attributes"]["sentry.op"] == "gen_ai.responses"
 
-    (error_event,) = (item.payload for item in items if item.type == "event")
-    assert error_event["level"] == "error"
-    assert error_event["exception"]["values"][0]["type"] == "OpenAIError"
+        (error_event,) = (item.payload for item in items if item.type == "event")
+
+        assert error_event["level"] == "error"
+        assert error_event["exception"]["values"][0]["type"] == "OpenAIError"
+
+        (transaction_event,) = (
+            item.payload for item in items if item.type == "transaction"
+        )
+    else:
+        events = capture_events()
+
+        with start_transaction(name="openai tx"), pytest.raises(OpenAIError):
+            client.responses.create(
+                model="gpt-4o",
+                instructions="You are a coding assistant that talks like a pirate.",
+                input="How do I check if a Python object is an instance of a class?",
+            )
+
+        (error_event, transaction_event) = events
+
+        assert transaction_event["type"] == "transaction"
+        # make sure the span where the error occurred is captured
+        assert transaction_event["spans"][0]["op"] == "gen_ai.responses"
+
+        assert error_event["level"] == "error"
+        assert error_event["exception"]["values"][0]["type"] == "OpenAIError"
 
-    (transaction_event,) = (
-        item.payload for item in items if item.type == "transaction"
-    )
     assert (
         error_event["contexts"]["trace"]["trace_id"]
         == transaction_event["contexts"]["trace"]["trace_id"]
     )
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
 @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available")
 @pytest.mark.parametrize(
@@ -2984,59 +4237,107 @@ def test_error_in_responses_api(sentry_init, capture_items):
     ],
 )
 async def test_ai_client_span_responses_async_api(
-    sentry_init, capture_items, instructions, input, request
+    sentry_init,
+    capture_events,
+    capture_items,
+    instructions,
+    input,
+    request,
+    stream_gen_ai_spans,
 ):
     sentry_init(
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
 
     client = AsyncOpenAI(api_key="z")
     client.responses._post = AsyncMock(return_value=EXAMPLE_RESPONSE)
 
-    with start_transaction(name="openai tx"):
-        await client.responses.create(
-            model="gpt-4o",
-            instructions=instructions,
-            input=input,
-            max_output_tokens=100,
-            temperature=0.7,
-            top_p=0.9,
-        )
+    if stream_gen_ai_spans:
+        items = capture_items("span")
+
+        with start_transaction(name="openai tx"):
+            await client.responses.create(
+                model="gpt-4o",
+                instructions=instructions,
+                input=input,
+                max_output_tokens=100,
+                temperature=0.7,
+                top_p=0.9,
+            )
+
+        spans = [item.payload for item in items if item.type == "span"]
+
+        assert len(spans) == 1
+
+        expected_data = {
+            "gen_ai.operation.name": "responses",
+            "gen_ai.request.max_tokens": 100,
+            "gen_ai.request.temperature": 0.7,
+            "gen_ai.request.top_p": 0.9,
+            "gen_ai.request.messages": '["How do I check if a Python object is an instance of a class?"]',
+            "gen_ai.request.model": "gpt-4o",
+            "gen_ai.response.model": "response-model-id",
+            "gen_ai.response.streaming": False,
+            "gen_ai.system": "openai",
+            "gen_ai.usage.input_tokens": 20,
+            "gen_ai.usage.input_tokens.cached": 5,
+            "gen_ai.usage.output_tokens": 10,
+            "gen_ai.usage.output_tokens.reasoning": 8,
+            "gen_ai.usage.total_tokens": 30,
+            "gen_ai.response.text": "the model response",
+            "sentry.environment": "production",
+            "sentry.op": "gen_ai.responses",
+            "sentry.origin": "auto.ai.openai",
+            "sentry.release": mock.ANY,
+            "sentry.sdk.name": "sentry.python",
+            "sentry.sdk.version": mock.ANY,
+            "sentry.segment.id": mock.ANY,
+            "sentry.segment.name": "openai tx",
+            "thread.id": mock.ANY,
+            "thread.name": mock.ANY,
+        }
+    else:
+        events = capture_events()
+
+        with start_transaction(name="openai tx"):
+            await client.responses.create(
+                model="gpt-4o",
+                instructions=instructions,
+                input=input,
+                max_output_tokens=100,
+                temperature=0.7,
+                top_p=0.9,
+            )
 
-    spans = [item.payload for item in items if item.type == "span"]
-
-    assert len(spans) == 1
-
-    expected_data = {
-        "gen_ai.operation.name": "responses",
-        "gen_ai.request.max_tokens": 100,
-        "gen_ai.request.temperature": 0.7,
-        "gen_ai.request.top_p": 0.9,
-        "gen_ai.request.messages": '["How do I check if a Python object is an instance of a class?"]',
-        "gen_ai.request.model": "gpt-4o",
-        "gen_ai.response.model": "response-model-id",
-        "gen_ai.response.streaming": False,
-        "gen_ai.system": "openai",
-        "gen_ai.usage.input_tokens": 20,
-        "gen_ai.usage.input_tokens.cached": 5,
-        "gen_ai.usage.output_tokens": 10,
-        "gen_ai.usage.output_tokens.reasoning": 8,
-        "gen_ai.usage.total_tokens": 30,
-        "gen_ai.response.text": "the model response",
-        "sentry.environment": "production",
-        "sentry.op": "gen_ai.responses",
-        "sentry.origin": "auto.ai.openai",
-        "sentry.release": mock.ANY,
-        "sentry.sdk.name": "sentry.python",
-        "sentry.sdk.version": mock.ANY,
-        "sentry.segment.id": mock.ANY,
-        "sentry.segment.name": "openai tx",
-        "thread.id": mock.ANY,
-        "thread.name": mock.ANY,
-    }
+        (transaction,) = events
+        spans = transaction["spans"]
+
+        assert len(spans) == 1
+        assert spans[0]["op"] == "gen_ai.responses"
+        assert spans[0]["origin"] == "auto.ai.openai"
+
+        expected_data = {
+            "gen_ai.operation.name": "responses",
+            "gen_ai.request.max_tokens": 100,
+            "gen_ai.request.temperature": 0.7,
+            "gen_ai.request.top_p": 0.9,
+            "gen_ai.request.messages": '["How do I check if a Python object is an instance of a class?"]',
+            "gen_ai.request.model": "gpt-4o",
+            "gen_ai.response.model": "response-model-id",
+            "gen_ai.response.streaming": False,
+            "gen_ai.system": "openai",
+            "gen_ai.usage.input_tokens": 20,
+            "gen_ai.usage.input_tokens.cached": 5,
+            "gen_ai.usage.output_tokens": 10,
+            "gen_ai.usage.output_tokens.reasoning": 8,
+            "gen_ai.usage.total_tokens": 30,
+            "gen_ai.response.text": "the model response",
+            "thread.id": mock.ANY,
+            "thread.name": mock.ANY,
+        }
 
     param_id = request.node.callspec.id
     if "string" in param_id and (
@@ -3192,9 +4493,13 @@ async def test_ai_client_span_responses_async_api(
             }
         )
 
-    assert spans[0]["attributes"] == expected_data
+    if stream_gen_ai_spans:
+        assert spans[0]["attributes"] == expected_data
+    else:
+        assert spans[0]["data"] == expected_data
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
 @pytest.mark.parametrize(
     "instructions",
@@ -3263,6 +4568,7 @@ async def test_ai_client_span_responses_async_api(
 @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available")
 async def test_ai_client_span_streaming_responses_async_api(
     sentry_init,
+    capture_events,
     capture_items,
     instructions,
     input,
@@ -3270,25 +4576,28 @@ async def test_ai_client_span_streaming_responses_async_api(
     get_model_response,
     async_iterator,
     server_side_event_chunks,
+    stream_gen_ai_spans,
 ):
     sentry_init(
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
 
     client = AsyncOpenAI(api_key="z")
     returned_stream = get_model_response(
         async_iterator(server_side_event_chunks(EXAMPLE_RESPONSES_STREAM))
     )
 
-    with mock.patch.object(
-        client.responses._client._client,
-        "send",
-        return_value=returned_stream,
-    ):
-        with start_transaction(name="openai tx"):
+    if stream_gen_ai_spans:
+        items = capture_items("span")
+
+        with mock.patch.object(
+            client.responses._client._client,
+            "send",
+            return_value=returned_stream,
+        ), start_transaction(name="openai tx"):
             result = await client.responses.create(
                 model="gpt-4o",
                 instructions=instructions,
@@ -3301,40 +4610,89 @@ async def test_ai_client_span_streaming_responses_async_api(
             async for _ in result:
                 pass
 
-    spans = [item.payload for item in items if item.type == "span"]
-    spans = [
-        span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_RESPONSES
-    ]
+        spans = [item.payload for item in items if item.type == "span"]
+        spans = [
+            span
+            for span in spans
+            if span["attributes"]["sentry.op"] == OP.GEN_AI_RESPONSES
+        ]
+
+        assert len(spans) == 1
+
+        expected_data = {
+            "gen_ai.operation.name": "responses",
+            "gen_ai.request.max_tokens": 100,
+            "gen_ai.request.temperature": 0.7,
+            "gen_ai.request.top_p": 0.9,
+            "gen_ai.response.model": "response-model-id",
+            "gen_ai.response.streaming": True,
+            "gen_ai.system": "openai",
+            "gen_ai.response.time_to_first_token": mock.ANY,
+            "gen_ai.usage.input_tokens": 20,
+            "gen_ai.usage.input_tokens.cached": 5,
+            "gen_ai.usage.output_tokens": 10,
+            "gen_ai.usage.output_tokens.reasoning": 8,
+            "gen_ai.usage.total_tokens": 30,
+            "gen_ai.request.model": "gpt-4o",
+            "gen_ai.response.text": "hello world",
+            "sentry.environment": "production",
+            "sentry.op": "gen_ai.responses",
+            "sentry.origin": "auto.ai.openai",
+            "sentry.release": mock.ANY,
+            "sentry.sdk.name": "sentry.python",
+            "sentry.sdk.version": mock.ANY,
+            "sentry.segment.id": mock.ANY,
+            "sentry.segment.name": "openai tx",
+            "thread.id": mock.ANY,
+            "thread.name": mock.ANY,
+        }
+    else:
+        events = capture_events()
+
+        with mock.patch.object(
+            client.responses._client._client,
+            "send",
+            return_value=returned_stream,
+        ), start_transaction(name="openai tx"):
+            result = await client.responses.create(
+                model="gpt-4o",
+                instructions=instructions,
+                input=input,
+                stream=True,
+                max_output_tokens=100,
+                temperature=0.7,
+                top_p=0.9,
+            )
+            async for _ in result:
+                pass
+
+        (transaction,) = events
+        spans = [
+            span for span in transaction["spans"] if span["op"] == OP.GEN_AI_RESPONSES
+        ]
 
-    assert len(spans) == 1
-
-    expected_data = {
-        "gen_ai.operation.name": "responses",
-        "gen_ai.request.max_tokens": 100,
-        "gen_ai.request.temperature": 0.7,
-        "gen_ai.request.top_p": 0.9,
-        "gen_ai.response.model": "response-model-id",
-        "gen_ai.response.streaming": True,
-        "gen_ai.system": "openai",
-        "gen_ai.response.time_to_first_token": mock.ANY,
-        "gen_ai.usage.input_tokens": 20,
-        "gen_ai.usage.input_tokens.cached": 5,
-        "gen_ai.usage.output_tokens": 10,
-        "gen_ai.usage.output_tokens.reasoning": 8,
-        "gen_ai.usage.total_tokens": 30,
-        "gen_ai.request.model": "gpt-4o",
-        "gen_ai.response.text": "hello world",
-        "sentry.environment": "production",
-        "sentry.op": "gen_ai.responses",
-        "sentry.origin": "auto.ai.openai",
-        "sentry.release": mock.ANY,
-        "sentry.sdk.name": "sentry.python",
-        "sentry.sdk.version": mock.ANY,
-        "sentry.segment.id": mock.ANY,
-        "sentry.segment.name": "openai tx",
-        "thread.id": mock.ANY,
-        "thread.name": mock.ANY,
-    }
+        assert len(spans) == 1
+        assert spans[0]["origin"] == "auto.ai.openai"
+
+        expected_data = {
+            "gen_ai.operation.name": "responses",
+            "gen_ai.request.max_tokens": 100,
+            "gen_ai.request.temperature": 0.7,
+            "gen_ai.request.top_p": 0.9,
+            "gen_ai.response.model": "response-model-id",
+            "gen_ai.response.streaming": True,
+            "gen_ai.system": "openai",
+            "gen_ai.response.time_to_first_token": mock.ANY,
+            "gen_ai.usage.input_tokens": 20,
+            "gen_ai.usage.input_tokens.cached": 5,
+            "gen_ai.usage.output_tokens": 10,
+            "gen_ai.usage.output_tokens.reasoning": 8,
+            "gen_ai.usage.total_tokens": 30,
+            "gen_ai.request.model": "gpt-4o",
+            "gen_ai.response.text": "hello world",
+            "thread.id": mock.ANY,
+            "thread.name": mock.ANY,
+        }
 
     param_id = request.node.callspec.id
     if "string" in param_id and (
@@ -3490,43 +4848,74 @@ async def test_ai_client_span_streaming_responses_async_api(
             }
         )
 
-    assert spans[0]["attributes"] == expected_data
+    if stream_gen_ai_spans:
+        assert spans[0]["attributes"] == expected_data
+    else:
+        assert spans[0]["data"] == expected_data
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
 @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available")
-async def test_error_in_responses_async_api(sentry_init, capture_items):
+async def test_error_in_responses_async_api(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     sentry_init(
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("event", "transaction", "span")
 
     client = AsyncOpenAI(api_key="z")
     client.responses._post = AsyncMock(
         side_effect=OpenAIError("API rate limit reached")
     )
 
-    with start_transaction(name="openai tx"):
-        with pytest.raises(OpenAIError):
+    if stream_gen_ai_spans:
+        items = capture_items("event", "transaction", "span")
+
+        with start_transaction(name="openai tx"), pytest.raises(OpenAIError):
             await client.responses.create(
                 model="gpt-4o",
                 instructions="You are a coding assistant that talks like a pirate.",
                 input="How do I check if a Python object is an instance of a class?",
             )
 
-    # make sure the span where the error occurred is captured
-    spans = [item.payload for item in items if item.type == "span"]
-    assert spans[0]["attributes"]["sentry.op"] == "gen_ai.responses"
+        # make sure the span where the error occurred is captured
+        spans = [item.payload for item in items if item.type == "span"]
+        assert spans[0]["attributes"]["sentry.op"] == "gen_ai.responses"
 
-    (error_event,) = (item.payload for item in items if item.type == "event")
-    assert error_event["level"] == "error"
-    assert error_event["exception"]["values"][0]["type"] == "OpenAIError"
+        (error_event,) = (item.payload for item in items if item.type == "event")
+
+        assert error_event["level"] == "error"
+        assert error_event["exception"]["values"][0]["type"] == "OpenAIError"
+
+        (transaction_event,) = (
+            item.payload for item in items if item.type == "transaction"
+        )
+    else:
+        events = capture_events()
+
+        with start_transaction(name="openai tx"), pytest.raises(OpenAIError):
+            await client.responses.create(
+                model="gpt-4o",
+                instructions="You are a coding assistant that talks like a pirate.",
+                input="How do I check if a Python object is an instance of a class?",
+            )
+
+        (error_event, transaction_event) = events
+
+        assert transaction_event["type"] == "transaction"
+        # make sure the span where the error occurred is captured
+        assert transaction_event["spans"][0]["op"] == "gen_ai.responses"
+
+        assert error_event["level"] == "error"
+        assert error_event["exception"]["values"][0]["type"] == "OpenAIError"
 
-    (transaction_event,) = (
-        item.payload for item in items if item.type == "transaction"
-    )
     assert (
         error_event["contexts"]["trace"]["trace_id"]
         == transaction_event["contexts"]["trace"]["trace_id"]
@@ -3606,6 +4995,7 @@ async def test_error_in_responses_async_api(sentry_init, capture_items):
     ]
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.parametrize(
     "send_default_pii, include_prompts",
     [(True, True), (True, False), (False, True), (False, False)],
@@ -3613,11 +5003,13 @@ async def test_error_in_responses_async_api(sentry_init, capture_items):
 @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available")
 def test_streaming_responses_api(
     sentry_init,
+    capture_events,
     capture_items,
     send_default_pii,
     include_prompts,
     get_model_response,
     server_side_event_chunks,
+    stream_gen_ai_spans,
 ):
     sentry_init(
         integrations=[
@@ -3627,8 +5019,8 @@ def test_streaming_responses_api(
         ],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
 
     client = OpenAI(api_key="z")
     returned_stream = get_model_response(
@@ -3637,12 +5029,14 @@ def test_streaming_responses_api(
         )
     )
 
-    with mock.patch.object(
-        client.responses._client._client,
-        "send",
-        return_value=returned_stream,
-    ):
-        with start_transaction(name="openai tx"):
+    if stream_gen_ai_spans:
+        items = capture_items("span")
+
+        with mock.patch.object(
+            client.responses._client._client,
+            "send",
+            return_value=returned_stream,
+        ), start_transaction(name="openai tx"):
             response_stream = client.responses.create(
                 model="some-model",
                 input="hello",
@@ -3657,29 +5051,74 @@ def test_streaming_responses_api(
                 if hasattr(item, "delta"):
                     response_string += item.delta
 
-    assert response_string == "hello world"
+        assert response_string == "hello world"
+
+        (span,) = (item.payload for item in items if item.type == "span")
+        assert span["attributes"]["sentry.op"] == "gen_ai.responses"
+        assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
 
-    (span,) = (item.payload for item in items if item.type == "span")
-    assert span["attributes"]["sentry.op"] == "gen_ai.responses"
-    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "response-model-id"
 
-    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "response-model-id"
+        if send_default_pii and include_prompts:
+            assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == '["hello"]'
+            assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "hello world"
+        else:
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
 
-    if send_default_pii and include_prompts:
-        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == '["hello"]'
-        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "hello world"
+        assert span["attributes"]["gen_ai.usage.input_tokens"] == 20
+        assert span["attributes"]["gen_ai.usage.output_tokens"] == 10
+        assert span["attributes"]["gen_ai.usage.total_tokens"] == 30
     else:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
+        events = capture_events()
+
+        with mock.patch.object(
+            client.responses._client._client,
+            "send",
+            return_value=returned_stream,
+        ), start_transaction(name="openai tx"):
+            response_stream = client.responses.create(
+                model="some-model",
+                input="hello",
+                stream=True,
+                max_output_tokens=100,
+                temperature=0.7,
+                top_p=0.9,
+            )
+
+            response_string = ""
+            for item in response_stream:
+                if hasattr(item, "delta"):
+                    response_string += item.delta
+
+        assert response_string == "hello world"
+
+        (transaction,) = events
+        (span,) = transaction["spans"]
+        assert span["op"] == "gen_ai.responses"
+        assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai"
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
+
+        assert span["data"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "response-model-id"
 
-    assert span["attributes"]["gen_ai.usage.input_tokens"] == 20
-    assert span["attributes"]["gen_ai.usage.output_tokens"] == 10
-    assert span["attributes"]["gen_ai.usage.total_tokens"] == 30
+        if send_default_pii and include_prompts:
+            assert span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == '["hello"]'
+            assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "hello world"
+        else:
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+
+        assert span["data"]["gen_ai.usage.input_tokens"] == 20
+        assert span["data"]["gen_ai.usage.output_tokens"] == 10
+        assert span["data"]["gen_ai.usage.total_tokens"] == 30
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
 @pytest.mark.parametrize(
     "send_default_pii, include_prompts",
@@ -3688,12 +5127,14 @@ def test_streaming_responses_api(
 @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available")
 async def test_streaming_responses_api_async(
     sentry_init,
+    capture_events,
     capture_items,
     send_default_pii,
     include_prompts,
     get_model_response,
     async_iterator,
     server_side_event_chunks,
+    stream_gen_ai_spans,
 ):
     sentry_init(
         integrations=[
@@ -3703,20 +5144,22 @@ async def test_streaming_responses_api_async(
         ],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
 
     client = AsyncOpenAI(api_key="z")
     returned_stream = get_model_response(
         async_iterator(server_side_event_chunks(EXAMPLE_RESPONSES_STREAM))
     )
 
-    with mock.patch.object(
-        client.responses._client._client,
-        "send",
-        return_value=returned_stream,
-    ):
-        with start_transaction(name="openai tx"):
+    if stream_gen_ai_spans:
+        items = capture_items("span")
+
+        with mock.patch.object(
+            client.responses._client._client,
+            "send",
+            return_value=returned_stream,
+        ), start_transaction(name="openai tx"):
             response_stream = await client.responses.create(
                 model="some-model",
                 input="hello",
@@ -3731,29 +5174,74 @@ async def test_streaming_responses_api_async(
                 if hasattr(item, "delta"):
                     response_string += item.delta
 
-    assert response_string == "hello world"
+        assert response_string == "hello world"
 
-    (span,) = (item.payload for item in items if item.type == "span")
-    assert span["attributes"]["sentry.op"] == "gen_ai.responses"
-    assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
-    assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
+        (span,) = (item.payload for item in items if item.type == "span")
+        assert span["attributes"]["sentry.op"] == "gen_ai.responses"
+        assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
+        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
 
-    assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "response-model-id"
+        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "response-model-id"
 
-    if send_default_pii and include_prompts:
-        assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == '["hello"]'
-        assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "hello world"
+        if send_default_pii and include_prompts:
+            assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == '["hello"]'
+            assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "hello world"
+        else:
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
+
+        assert span["attributes"]["gen_ai.usage.input_tokens"] == 20
+        assert span["attributes"]["gen_ai.usage.output_tokens"] == 10
+        assert span["attributes"]["gen_ai.usage.total_tokens"] == 30
     else:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"]
+        events = capture_events()
+
+        with mock.patch.object(
+            client.responses._client._client,
+            "send",
+            return_value=returned_stream,
+        ), start_transaction(name="openai tx"):
+            response_stream = await client.responses.create(
+                model="some-model",
+                input="hello",
+                stream=True,
+                max_output_tokens=100,
+                temperature=0.7,
+                top_p=0.9,
+            )
+
+            response_string = ""
+            async for item in response_stream:
+                if hasattr(item, "delta"):
+                    response_string += item.delta
+
+        assert response_string == "hello world"
+
+        (transaction,) = events
+        (span,) = transaction["spans"]
+        assert span["op"] == "gen_ai.responses"
+        assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai"
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
 
-    assert span["attributes"]["gen_ai.usage.input_tokens"] == 20
-    assert span["attributes"]["gen_ai.usage.output_tokens"] == 10
-    assert span["attributes"]["gen_ai.usage.total_tokens"] == 30
+        assert span["data"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "response-model-id"
 
+        if send_default_pii and include_prompts:
+            assert span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == '["hello"]'
+            assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "hello world"
+        else:
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+
+        assert span["data"]["gen_ai.usage.input_tokens"] == 20
+        assert span["data"]["gen_ai.usage.output_tokens"] == 10
+        assert span["data"]["gen_ai.usage.total_tokens"] == 30
 
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.skipif(
     OPENAI_VERSION <= (1, 1, 0),
     reason="OpenAI versions <=1.1.0 do not support the tools parameter.",
@@ -3763,13 +5251,18 @@ async def test_streaming_responses_api_async(
     [[], None, NOT_GIVEN, omit],
 )
 def test_empty_tools_in_chat_completion(
-    sentry_init, capture_items, tools, nonstreaming_chat_completions_model_response
+    sentry_init,
+    capture_events,
+    capture_items,
+    tools,
+    nonstreaming_chat_completions_model_response,
+    stream_gen_ai_spans,
 ):
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
 
     client = OpenAI(api_key="z")
     client.chat.completions._post = mock.Mock(
@@ -3786,19 +5279,37 @@ def test_empty_tools_in_chat_completion(
         )
     )
 
-    with start_transaction(name="openai tx"):
-        client.chat.completions.create(
-            model="some-model",
-            messages=[{"role": "system", "content": "hello"}],
-            tools=tools,
-        )
+    if stream_gen_ai_spans:
+        items = capture_items("span")
 
-    span = next(item.payload for item in items if item.type == "span")
+        with start_transaction(name="openai tx"):
+            client.chat.completions.create(
+                model="some-model",
+                messages=[{"role": "system", "content": "hello"}],
+                tools=tools,
+            )
+
+        span = next(item.payload for item in items if item.type == "span")
+
+        assert "gen_ai.request.available_tools" not in span["attributes"]
+    else:
+        events = capture_events()
+
+        with start_transaction(name="openai tx"):
+            client.chat.completions.create(
+                model="some-model",
+                messages=[{"role": "system", "content": "hello"}],
+                tools=tools,
+            )
 
-    assert "gen_ai.request.available_tools" not in span["attributes"]
+        (event,) = events
+        span = event["spans"][0]
+
+        assert "gen_ai.request.available_tools" not in span["data"]
 
 
 # Test messages with mixed roles including "ai" that should be mapped to "assistant"
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.parametrize(
     "test_message,expected_role",
     [
@@ -3815,10 +5326,12 @@ def test_empty_tools_in_chat_completion(
 )
 def test_openai_message_role_mapping(
     sentry_init,
+    capture_events,
     capture_items,
     test_message,
     expected_role,
     nonstreaming_chat_completions_model_response,
+    stream_gen_ai_spans,
 ):
     """Test that OpenAI integration properly maps message roles like 'ai' to 'assistant'"""
 
@@ -3826,8 +5339,8 @@ def test_openai_message_role_mapping(
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
 
     client = OpenAI(api_key="z")
     client.chat.completions._post = mock.Mock(
@@ -3846,32 +5359,53 @@ def test_openai_message_role_mapping(
 
     test_messages = [test_message]
 
-    with start_transaction(name="openai tx"):
-        client.chat.completions.create(model="test-model", messages=test_messages)
-    # Verify that the span was created correctly
-    span = next(item.payload for item in items if item.type == "span")
-    assert span["attributes"]["sentry.op"] == "gen_ai.chat"
-    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]
+    if stream_gen_ai_spans:
+        items = capture_items("span")
+
+        with start_transaction(name="openai tx"):
+            client.chat.completions.create(model="test-model", messages=test_messages)
 
-    # Parse the stored messages
-    import json
+        # Verify that the span was created correctly
+        span = next(item.payload for item in items if item.type == "span")
+        assert span["attributes"]["sentry.op"] == "gen_ai.chat"
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]
 
-    stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+        stored_messages = json.loads(
+            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        )
+    else:
+        events = capture_events()
+
+        with start_transaction(name="openai tx"):
+            client.chat.completions.create(model="test-model", messages=test_messages)
+
+        # Verify that the span was created correctly
+        (event,) = events
+        span = event["spans"][0]
+        assert span["op"] == "gen_ai.chat"
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]
+
+        stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
 
     assert len(stored_messages) == 1
     assert stored_messages[0]["role"] == expected_role
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 def test_openai_message_truncation(
-    sentry_init, capture_items, nonstreaming_chat_completions_model_response
+    sentry_init,
+    capture_events,
+    capture_items,
+    nonstreaming_chat_completions_model_response,
+    stream_gen_ai_spans,
 ):
     """Test that large messages are truncated properly in OpenAI integration."""
     sentry_init(
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("transaction", "span")
 
     client = OpenAI(api_key="z")
     client.chat.completions._post = mock.Mock(
@@ -3898,23 +5432,48 @@ def test_openai_message_truncation(
         {"role": "user", "content": large_content},
     ]
 
-    with start_transaction(name="openai tx"):
-        client.chat.completions.create(
-            model="some-model",
-            messages=large_messages,
-        )
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with start_transaction(name="openai tx"):
+            client.chat.completions.create(
+                model="some-model",
+                messages=large_messages,
+            )
 
-    span = next(item.payload for item in items if item.type == "span")
-    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]
+        span = next(item.payload for item in items if item.type == "span")
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]
 
-    messages_data = span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-    assert isinstance(messages_data, str)
+        messages_data = span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
 
-    parsed_messages = json.loads(messages_data)
-    assert isinstance(parsed_messages, list)
-    assert len(parsed_messages) <= len(large_messages)
+        assert isinstance(messages_data, str)
+
+        parsed_messages = json.loads(messages_data)
+        assert isinstance(parsed_messages, list)
+        assert len(parsed_messages) <= len(large_messages)
+
+        (event,) = (item.payload for item in items if item.type == "transaction")
+    else:
+        events = capture_events()
+
+        with start_transaction(name="openai tx"):
+            client.chat.completions.create(
+                model="some-model",
+                messages=large_messages,
+            )
+
+        (event,) = events
+        span = event["spans"][0]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]
+
+        messages_data = span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+
+        assert isinstance(messages_data, str)
+
+        parsed_messages = json.loads(messages_data)
+        assert isinstance(parsed_messages, list)
+        assert len(parsed_messages) <= len(large_messages)
 
-    (event,) = (item.payload for item in items if item.type == "transaction")
     meta_path = event["_meta"]
     span_meta = meta_path["spans"]["0"]["data"]
     messages_meta = span_meta[SPANDATA.GEN_AI_REQUEST_MESSAGES]
@@ -3922,8 +5481,14 @@ def test_openai_message_truncation(
 
 
 # noinspection PyTypeChecker
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 def test_streaming_chat_completion_ttft(
-    sentry_init, capture_items, get_model_response, server_side_event_chunks
+    sentry_init,
+    capture_events,
+    capture_items,
+    get_model_response,
+    server_side_event_chunks,
+    stream_gen_ai_spans,
 ):
     """
     Test that streaming chat completions capture time-to-first-token (TTFT).
@@ -3931,8 +5496,8 @@ def test_streaming_chat_completion_ttft(
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
 
     client = OpenAI(api_key="z")
     returned_stream = get_model_response(
@@ -3969,12 +5534,37 @@ def test_streaming_chat_completion_ttft(
         ),
     )
 
-    with mock.patch.object(
-        client.chat._client._client,
-        "send",
-        return_value=returned_stream,
-    ):
-        with start_transaction(name="openai tx"):
+    if stream_gen_ai_spans:
+        items = capture_items("span")
+
+        with mock.patch.object(
+            client.chat._client._client,
+            "send",
+            return_value=returned_stream,
+        ), start_transaction(name="openai tx"):
+            response_stream = client.chat.completions.create(
+                model="some-model",
+                messages=[{"role": "user", "content": "Say hello"}],
+                stream=True,
+            )
+            # Consume the stream
+            for _ in response_stream:
+                pass
+
+        span = next(item.payload for item in items if item.type == "span")
+        assert span["attributes"]["sentry.op"] == "gen_ai.chat"
+
+        # Verify TTFT is captured
+        assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in span["attributes"]
+        ttft = span["attributes"][SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN]
+    else:
+        events = capture_events()
+
+        with mock.patch.object(
+            client.chat._client._client,
+            "send",
+            return_value=returned_stream,
+        ), start_transaction(name="openai tx"):
             response_stream = client.chat.completions.create(
                 model="some-model",
                 messages=[{"role": "user", "content": "Say hello"}],
@@ -3984,24 +5574,29 @@ def test_streaming_chat_completion_ttft(
             for _ in response_stream:
                 pass
 
-    span = next(item.payload for item in items if item.type == "span")
-    assert span["attributes"]["sentry.op"] == "gen_ai.chat"
+        (tx,) = events
+        span = tx["spans"][0]
+        assert span["op"] == "gen_ai.chat"
+
+        # Verify TTFT is captured
+        assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in span["data"]
+        ttft = span["data"][SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN]
 
-    # Verify TTFT is captured
-    assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in span["attributes"]
-    ttft = span["attributes"][SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN]
     assert isinstance(ttft, float)
     assert ttft > 0
 
 
 # noinspection PyTypeChecker
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
 async def test_streaming_chat_completion_ttft_async(
     sentry_init,
+    capture_events,
     capture_items,
     get_model_response,
     async_iterator,
     server_side_event_chunks,
+    stream_gen_ai_spans,
 ):
     """
     Test that async streaming chat completions capture time-to-first-token (TTFT).
@@ -4009,8 +5604,8 @@ async def test_streaming_chat_completion_ttft_async(
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
 
     client = AsyncOpenAI(api_key="z")
     returned_stream = get_model_response(
@@ -4049,12 +5644,37 @@ async def test_streaming_chat_completion_ttft_async(
         )
     )
 
-    with mock.patch.object(
-        client.chat._client._client,
-        "send",
-        return_value=returned_stream,
-    ):
-        with start_transaction(name="openai tx"):
+    if stream_gen_ai_spans:
+        items = capture_items("span")
+
+        with mock.patch.object(
+            client.chat._client._client,
+            "send",
+            return_value=returned_stream,
+        ), start_transaction(name="openai tx"):
+            response_stream = await client.chat.completions.create(
+                model="some-model",
+                messages=[{"role": "user", "content": "Say hello"}],
+                stream=True,
+            )
+            # Consume the stream
+            async for _ in response_stream:
+                pass
+
+        span = next(item.payload for item in items if item.type == "span")
+        assert span["attributes"]["sentry.op"] == "gen_ai.chat"
+
+        # Verify TTFT is captured
+        assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in span["attributes"]
+        ttft = span["attributes"][SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN]
+    else:
+        events = capture_events()
+
+        with mock.patch.object(
+            client.chat._client._client,
+            "send",
+            return_value=returned_stream,
+        ), start_transaction(name="openai tx"):
             response_stream = await client.chat.completions.create(
                 model="some-model",
                 messages=[{"role": "user", "content": "Say hello"}],
@@ -4064,20 +5684,28 @@ async def test_streaming_chat_completion_ttft_async(
             async for _ in response_stream:
                 pass
 
-    span = next(item.payload for item in items if item.type == "span")
-    assert span["attributes"]["sentry.op"] == "gen_ai.chat"
+        (tx,) = events
+        span = tx["spans"][0]
+        assert span["op"] == "gen_ai.chat"
+
+        # Verify TTFT is captured
+        assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in span["data"]
+        ttft = span["data"][SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN]
 
-    # Verify TTFT is captured
-    assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in span["attributes"]
-    ttft = span["attributes"][SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN]
     assert isinstance(ttft, float)
     assert ttft > 0
 
 
 # noinspection PyTypeChecker
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available")
 def test_streaming_responses_api_ttft(
-    sentry_init, capture_items, get_model_response, server_side_event_chunks
+    sentry_init,
+    capture_events,
+    capture_items,
+    get_model_response,
+    server_side_event_chunks,
+    stream_gen_ai_spans,
 ):
     """
     Test that streaming responses API captures time-to-first-token (TTFT).
@@ -4085,20 +5713,45 @@ def test_streaming_responses_api_ttft(
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
 
     client = OpenAI(api_key="z")
     returned_stream = get_model_response(
         server_side_event_chunks(EXAMPLE_RESPONSES_STREAM)
     )
 
-    with mock.patch.object(
-        client.responses._client._client,
-        "send",
-        return_value=returned_stream,
-    ):
-        with start_transaction(name="openai tx"):
+    if stream_gen_ai_spans:
+        items = capture_items("span")
+
+        with mock.patch.object(
+            client.responses._client._client,
+            "send",
+            return_value=returned_stream,
+        ), start_transaction(name="openai tx"):
+            response_stream = client.responses.create(
+                model="some-model",
+                input="hello",
+                stream=True,
+            )
+            # Consume the stream
+            for _ in response_stream:
+                pass
+
+        span = next(item.payload for item in items if item.type == "span")
+        assert span["attributes"]["sentry.op"] == "gen_ai.responses"
+
+        # Verify TTFT is captured
+        assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in span["attributes"]
+        ttft = span["attributes"][SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN]
+    else:
+        events = capture_events()
+
+        with mock.patch.object(
+            client.responses._client._client,
+            "send",
+            return_value=returned_stream,
+        ), start_transaction(name="openai tx"):
             response_stream = client.responses.create(
                 model="some-model",
                 input="hello",
@@ -4108,25 +5761,30 @@ def test_streaming_responses_api_ttft(
             for _ in response_stream:
                 pass
 
-    span = next(item.payload for item in items if item.type == "span")
-    assert span["attributes"]["sentry.op"] == "gen_ai.responses"
+        (tx,) = events
+        span = tx["spans"][0]
+        assert span["op"] == "gen_ai.responses"
+
+        # Verify TTFT is captured
+        assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in span["data"]
+        ttft = span["data"][SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN]
 
-    # Verify TTFT is captured
-    assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in span["attributes"]
-    ttft = span["attributes"][SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN]
     assert isinstance(ttft, float)
     assert ttft > 0
 
 
 # noinspection PyTypeChecker
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
 @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available")
 async def test_streaming_responses_api_ttft_async(
     sentry_init,
+    capture_events,
     capture_items,
     get_model_response,
     async_iterator,
     server_side_event_chunks,
+    stream_gen_ai_spans,
 ):
     """
     Test that async streaming responses API captures time-to-first-token (TTFT).
@@ -4134,20 +5792,45 @@ async def test_streaming_responses_api_ttft_async(
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    items = capture_items("span")
 
     client = AsyncOpenAI(api_key="z")
     returned_stream = get_model_response(
         async_iterator(server_side_event_chunks(EXAMPLE_RESPONSES_STREAM))
     )
 
-    with mock.patch.object(
-        client.responses._client._client,
-        "send",
-        return_value=returned_stream,
-    ):
-        with start_transaction(name="openai tx"):
+    if stream_gen_ai_spans:
+        items = capture_items("span")
+
+        with mock.patch.object(
+            client.responses._client._client,
+            "send",
+            return_value=returned_stream,
+        ), start_transaction(name="openai tx"):
+            response_stream = await client.responses.create(
+                model="some-model",
+                input="hello",
+                stream=True,
+            )
+            # Consume the stream
+            async for _ in response_stream:
+                pass
+
+        span = next(item.payload for item in items if item.type == "span")
+        assert span["attributes"]["sentry.op"] == "gen_ai.responses"
+
+        # Verify TTFT is captured
+        assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in span["attributes"]
+        ttft = span["attributes"][SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN]
+    else:
+        events = capture_events()
+
+        with mock.patch.object(
+            client.responses._client._client,
+            "send",
+            return_value=returned_stream,
+        ), start_transaction(name="openai tx"):
             response_stream = await client.responses.create(
                 model="some-model",
                 input="hello",
@@ -4157,11 +5840,13 @@ async def test_streaming_responses_api_ttft_async(
             async for _ in response_stream:
                 pass
 
-    span = next(item.payload for item in items if item.type == "span")
-    assert span["attributes"]["sentry.op"] == "gen_ai.responses"
+        (tx,) = events
+        span = tx["spans"][0]
+        assert span["op"] == "gen_ai.responses"
+
+        # Verify TTFT is captured
+        assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in span["data"]
+        ttft = span["data"][SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN]
 
-    # Verify TTFT is captured
-    assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in span["attributes"]
-    ttft = span["attributes"][SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN]
     assert isinstance(ttft, float)
     assert ttft > 0
diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py
index bde222274c..46196893d8 100644
--- a/tests/integrations/openai_agents/test_openai_agents.py
+++ b/tests/integrations/openai_agents/test_openai_agents.py
@@ -157,13 +157,16 @@ def test_agent_custom_model():
     )
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
 async def test_agent_invocation_span_no_pii(
     sentry_init,
+    capture_events,
     capture_items,
     test_agent,
     nonstreaming_responses_model_response,
     get_model_response,
+    stream_gen_ai_spans,
 ):
     client = AsyncOpenAI(api_key="test-key")
     model = OpenAIResponsesModel(model="gpt-4", openai_client=client)
@@ -173,64 +176,125 @@ async def test_agent_invocation_span_no_pii(
         nonstreaming_responses_model_response, serialize_pydantic=True
     )
 
-    with patch.object(
-        agent.model._client._client,
-        "send",
-        return_value=response,
-    ) as _:
-        sentry_init(
-            integrations=[OpenAIAgentsIntegration()],
-            traces_sample_rate=1.0,
-            send_default_pii=False,
+    if stream_gen_ai_spans:
+        with patch.object(
+            agent.model._client._client,
+            "send",
+            return_value=response,
+        ) as _:
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+                send_default_pii=False,
+                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+            )
+
+            items = capture_items("span", "transaction")
+
+            result = await agents.Runner.run(
+                agent, "Test input", run_config=test_run_config
+            )
+
+            assert result is not None
+            assert result.final_output == "Hello, how can I help you?"
+
+        spans = [item.payload for item in items if item.type == "span"]
+        invoke_agent_span = next(
+            span
+            for span in spans
+            if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
         )
+        ai_client_span = next(
+            span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        )
+
+        (transaction,) = (item.payload for item in items if item.type == "transaction")
+
+        assert transaction["transaction"] == "test_agent workflow"
+        assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
 
-        items = capture_items("span", "transaction")
+        assert invoke_agent_span["name"] == "invoke_agent test_agent"
+
+        assert (
+            SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in invoke_agent_span["attributes"]
+        )
+        assert "gen_ai.request.messages" not in invoke_agent_span["attributes"]
+        assert "gen_ai.response.text" not in invoke_agent_span["attributes"]
 
-        result = await agents.Runner.run(
-            agent, "Test input", run_config=test_run_config
+        assert (
+            invoke_agent_span["attributes"]["gen_ai.operation.name"] == "invoke_agent"
         )
+        assert invoke_agent_span["attributes"]["gen_ai.system"] == "openai"
+        assert invoke_agent_span["attributes"]["gen_ai.agent.name"] == "test_agent"
+        assert invoke_agent_span["attributes"]["gen_ai.request.max_tokens"] == 100
+        assert invoke_agent_span["attributes"]["gen_ai.request.model"] == "gpt-4"
+        assert invoke_agent_span["attributes"]["gen_ai.request.temperature"] == 0.7
+        assert invoke_agent_span["attributes"]["gen_ai.request.top_p"] == 1.0
+
+        assert ai_client_span["name"] == "chat gpt-4"
+        assert ai_client_span["attributes"]["gen_ai.operation.name"] == "chat"
+        assert ai_client_span["attributes"]["gen_ai.system"] == "openai"
+        assert ai_client_span["attributes"]["gen_ai.agent.name"] == "test_agent"
+        assert ai_client_span["attributes"]["gen_ai.request.max_tokens"] == 100
+        assert ai_client_span["attributes"]["gen_ai.request.model"] == "gpt-4"
+        assert ai_client_span["attributes"]["gen_ai.request.temperature"] == 0.7
+        assert ai_client_span["attributes"]["gen_ai.request.top_p"] == 1.0
+    else:
+        with patch.object(
+            agent.model._client._client,
+            "send",
+            return_value=response,
+        ) as _:
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+                send_default_pii=False,
+                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+            )
+            events = capture_events()
+
+            result = await agents.Runner.run(
+                agent, "Test input", run_config=test_run_config
+            )
 
-        assert result is not None
-        assert result.final_output == "Hello, how can I help you?"
+            assert result is not None
+            assert result.final_output == "Hello, how can I help you?"
 
-    spans = [item.payload for item in items if item.type == "span"]
-    invoke_agent_span = next(
-        span
-        for span in spans
-        if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
-    )
-    ai_client_span = next(
-        span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-    )
+        (transaction,) = events
+        spans = transaction["spans"]
+        invoke_agent_span = next(
+            span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT
+        )
+        ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT)
 
-    (transaction,) = (item.payload for item in items if item.type == "transaction")
-    assert transaction["transaction"] == "test_agent workflow"
-    assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
+        assert transaction["transaction"] == "test_agent workflow"
+        assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
 
-    assert invoke_agent_span["name"] == "invoke_agent test_agent"
+        assert invoke_agent_span["description"] == "invoke_agent test_agent"
 
-    assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in invoke_agent_span["attributes"]
-    assert "gen_ai.request.messages" not in invoke_agent_span["attributes"]
-    assert "gen_ai.response.text" not in invoke_agent_span["attributes"]
+        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in invoke_agent_span["data"]
+        assert "gen_ai.request.messages" not in invoke_agent_span["data"]
+        assert "gen_ai.response.text" not in invoke_agent_span["data"]
 
-    assert invoke_agent_span["attributes"]["gen_ai.operation.name"] == "invoke_agent"
-    assert invoke_agent_span["attributes"]["gen_ai.system"] == "openai"
-    assert invoke_agent_span["attributes"]["gen_ai.agent.name"] == "test_agent"
-    assert invoke_agent_span["attributes"]["gen_ai.request.max_tokens"] == 100
-    assert invoke_agent_span["attributes"]["gen_ai.request.model"] == "gpt-4"
-    assert invoke_agent_span["attributes"]["gen_ai.request.temperature"] == 0.7
-    assert invoke_agent_span["attributes"]["gen_ai.request.top_p"] == 1.0
+        assert invoke_agent_span["data"]["gen_ai.operation.name"] == "invoke_agent"
+        assert invoke_agent_span["data"]["gen_ai.system"] == "openai"
+        assert invoke_agent_span["data"]["gen_ai.agent.name"] == "test_agent"
+        assert invoke_agent_span["data"]["gen_ai.request.max_tokens"] == 100
+        assert invoke_agent_span["data"]["gen_ai.request.model"] == "gpt-4"
+        assert invoke_agent_span["data"]["gen_ai.request.temperature"] == 0.7
+        assert invoke_agent_span["data"]["gen_ai.request.top_p"] == 1.0
 
-    assert ai_client_span["name"] == "chat gpt-4"
-    assert ai_client_span["attributes"]["gen_ai.operation.name"] == "chat"
-    assert ai_client_span["attributes"]["gen_ai.system"] == "openai"
-    assert ai_client_span["attributes"]["gen_ai.agent.name"] == "test_agent"
-    assert ai_client_span["attributes"]["gen_ai.request.max_tokens"] == 100
-    assert ai_client_span["attributes"]["gen_ai.request.model"] == "gpt-4"
-    assert ai_client_span["attributes"]["gen_ai.request.temperature"] == 0.7
-    assert ai_client_span["attributes"]["gen_ai.request.top_p"] == 1.0
+        assert ai_client_span["description"] == "chat gpt-4"
+        assert ai_client_span["data"]["gen_ai.operation.name"] == "chat"
+        assert ai_client_span["data"]["gen_ai.system"] == "openai"
+        assert ai_client_span["data"]["gen_ai.agent.name"] == "test_agent"
+        assert ai_client_span["data"]["gen_ai.request.max_tokens"] == 100
+        assert ai_client_span["data"]["gen_ai.request.model"] == "gpt-4"
+        assert ai_client_span["data"]["gen_ai.request.temperature"] == 0.7
+        assert ai_client_span["data"]["gen_ai.request.top_p"] == 1.0
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
 @pytest.mark.parametrize(
     "instructions",
@@ -309,6 +373,7 @@ async def test_agent_invocation_span_no_pii(
 )
 async def test_agent_invocation_span(
     sentry_init,
+    capture_events,
     capture_items,
     test_agent_with_instructions,
     nonstreaming_responses_model_response,
@@ -316,6 +381,7 @@ async def test_agent_invocation_span(
     input,
     request,
     get_model_response,
+    stream_gen_ai_spans,
 ):
     """
     Test that the integration creates spans for agent invocations.
@@ -328,176 +394,360 @@ async def test_agent_invocation_span(
         nonstreaming_responses_model_response, serialize_pydantic=True
     )
 
-    with patch.object(
-        agent.model._client._client,
-        "send",
-        return_value=response,
-    ) as _:
-        sentry_init(
-            integrations=[OpenAIAgentsIntegration()],
-            traces_sample_rate=1.0,
-            send_default_pii=True,
-        )
+    if stream_gen_ai_spans:
+        with patch.object(
+            agent.model._client._client,
+            "send",
+            return_value=response,
+        ) as _:
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+                send_default_pii=True,
+                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+            )
 
-        items = capture_items("span", "transaction")
+            items = capture_items("span", "transaction")
 
-        result = await agents.Runner.run(
-            agent,
-            input,
-            run_config=test_run_config,
-        )
+            result = await agents.Runner.run(
+                agent,
+                input,
+                run_config=test_run_config,
+            )
 
-        assert result is not None
-        assert result.final_output == "Hello, how can I help you?"
+            assert result is not None
+            assert result.final_output == "Hello, how can I help you?"
 
-    (transaction,) = (item.payload for item in items if item.type == "transaction")
-    assert transaction["transaction"] == "test_agent workflow"
-    assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
+        (transaction,) = (item.payload for item in items if item.type == "transaction")
 
-    spans = [item.payload for item in items if item.type == "span"]
-    invoke_agent_span, ai_client_span = spans
+        assert transaction["transaction"] == "test_agent workflow"
+        assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
 
-    assert invoke_agent_span["name"] == "invoke_agent test_agent"
+        spans = [item.payload for item in items if item.type == "span"]
+        invoke_agent_span, ai_client_span = spans
+
+        assert invoke_agent_span["name"] == "invoke_agent test_agent"
+    else:
+        with patch.object(
+            agent.model._client._client,
+            "send",
+            return_value=response,
+        ) as _:
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+                send_default_pii=True,
+                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+            )
+            events = capture_events()
+
+            result = await agents.Runner.run(
+                agent,
+                input,
+                run_config=test_run_config,
+            )
+
+            assert result is not None
+            assert result.final_output == "Hello, how can I help you?"
+
+        (transaction,) = events
+        spans = transaction["spans"]
+        invoke_agent_span, ai_client_span = spans
+
+        assert transaction["transaction"] == "test_agent workflow"
+        assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
+
+        assert invoke_agent_span["description"] == "invoke_agent test_agent"
 
     # Only first case checks "gen_ai.request.messages" until further input handling work.
     param_id = request.node.callspec.id
     if "string" in param_id and instructions is None:  # type: ignore
-        assert "gen_ai.system_instructions" not in ai_client_span["attributes"]
+        if stream_gen_ai_spans:
+            assert "gen_ai.system_instructions" not in ai_client_span["attributes"]
 
-        assert invoke_agent_span["attributes"][
-            "gen_ai.request.messages"
-        ] == safe_serialize(
-            [
-                {"content": [{"text": "Test input", "type": "text"}], "role": "user"},
-            ]
-        )
+            assert invoke_agent_span["attributes"][
+                "gen_ai.request.messages"
+            ] == safe_serialize(
+                [
+                    {
+                        "content": [{"text": "Test input", "type": "text"}],
+                        "role": "user",
+                    },
+                ]
+            )
+        else:
+            assert "gen_ai.system_instructions" not in ai_client_span["data"]
+
+            assert invoke_agent_span["data"][
+                "gen_ai.request.messages"
+            ] == safe_serialize(
+                [
+                    {
+                        "content": [{"text": "Test input", "type": "text"}],
+                        "role": "user",
+                    },
+                ]
+            )
 
     elif "string" in param_id:
-        assert ai_client_span["attributes"][
-            "gen_ai.system_instructions"
-        ] == safe_serialize(
-            [
-                {
-                    "type": "text",
-                    "content": "You are a coding assistant that talks like a pirate.",
-                },
-            ]
-        )
+        if stream_gen_ai_spans:
+            assert ai_client_span["attributes"][
+                "gen_ai.system_instructions"
+            ] == safe_serialize(
+                [
+                    {
+                        "type": "text",
+                        "content": "You are a coding assistant that talks like a pirate.",
+                    },
+                ]
+            )
+        else:
+            assert ai_client_span["data"][
+                "gen_ai.system_instructions"
+            ] == safe_serialize(
+                [
+                    {
+                        "type": "text",
+                        "content": "You are a coding assistant that talks like a pirate.",
+                    },
+                ]
+            )
     elif "blocks_no_type" in param_id and instructions is None:  # type: ignore
-        assert ai_client_span["attributes"][
-            "gen_ai.system_instructions"
-        ] == safe_serialize(
-            [
-                {"type": "text", "content": "You are a helpful assistant."},
-            ]
-        )
+        if stream_gen_ai_spans:
+            assert ai_client_span["attributes"][
+                "gen_ai.system_instructions"
+            ] == safe_serialize(
+                [
+                    {"type": "text", "content": "You are a helpful assistant."},
+                ]
+            )
+        else:
+            assert ai_client_span["data"][
+                "gen_ai.system_instructions"
+            ] == safe_serialize(
+                [
+                    {"type": "text", "content": "You are a helpful assistant."},
+                ]
+            )
     elif "blocks_no_type" in param_id:
-        assert ai_client_span["attributes"][
-            "gen_ai.system_instructions"
-        ] == safe_serialize(
-            [
-                {
-                    "type": "text",
-                    "content": "You are a coding assistant that talks like a pirate.",
-                },
-                {"type": "text", "content": "You are a helpful assistant."},
-            ]
-        )
+        if stream_gen_ai_spans:
+            assert ai_client_span["attributes"][
+                "gen_ai.system_instructions"
+            ] == safe_serialize(
+                [
+                    {
+                        "type": "text",
+                        "content": "You are a coding assistant that talks like a pirate.",
+                    },
+                    {"type": "text", "content": "You are a helpful assistant."},
+                ]
+            )
+        else:
+            assert ai_client_span["data"][
+                "gen_ai.system_instructions"
+            ] == safe_serialize(
+                [
+                    {
+                        "type": "text",
+                        "content": "You are a coding assistant that talks like a pirate.",
+                    },
+                    {"type": "text", "content": "You are a helpful assistant."},
+                ]
+            )
     elif "blocks" in param_id and instructions is None:  # type: ignore
-        assert ai_client_span["attributes"][
-            "gen_ai.system_instructions"
-        ] == safe_serialize(
-            [
-                {"type": "text", "content": "You are a helpful assistant."},
-            ]
-        )
+        if stream_gen_ai_spans:
+            assert ai_client_span["attributes"][
+                "gen_ai.system_instructions"
+            ] == safe_serialize(
+                [
+                    {"type": "text", "content": "You are a helpful assistant."},
+                ]
+            )
+        else:
+            assert ai_client_span["data"][
+                "gen_ai.system_instructions"
+            ] == safe_serialize(
+                [
+                    {"type": "text", "content": "You are a helpful assistant."},
+                ]
+            )
     elif "blocks" in param_id:
-        assert ai_client_span["attributes"][
-            "gen_ai.system_instructions"
-        ] == safe_serialize(
-            [
-                {
-                    "type": "text",
-                    "content": "You are a coding assistant that talks like a pirate.",
-                },
-                {"type": "text", "content": "You are a helpful assistant."},
-            ]
-        )
+        if stream_gen_ai_spans:
+            assert ai_client_span["attributes"][
+                "gen_ai.system_instructions"
+            ] == safe_serialize(
+                [
+                    {
+                        "type": "text",
+                        "content": "You are a coding assistant that talks like a pirate.",
+                    },
+                    {"type": "text", "content": "You are a helpful assistant."},
+                ]
+            )
+        else:
+            assert ai_client_span["data"][
+                "gen_ai.system_instructions"
+            ] == safe_serialize(
+                [
+                    {
+                        "type": "text",
+                        "content": "You are a coding assistant that talks like a pirate.",
+                    },
+                    {"type": "text", "content": "You are a helpful assistant."},
+                ]
+            )
     elif "parts_no_type" in param_id and instructions is None:
-        assert ai_client_span["attributes"][
-            "gen_ai.system_instructions"
-        ] == safe_serialize(
-            [
-                {"type": "text", "content": "You are a helpful assistant."},
-                {"type": "text", "content": "Be concise and clear."},
-            ]
-        )
+        if stream_gen_ai_spans:
+            assert ai_client_span["attributes"][
+                "gen_ai.system_instructions"
+            ] == safe_serialize(
+                [
+                    {"type": "text", "content": "You are a helpful assistant."},
+                    {"type": "text", "content": "Be concise and clear."},
+                ]
+            )
+        else:
+            assert ai_client_span["data"][
+                "gen_ai.system_instructions"
+            ] == safe_serialize(
+                [
+                    {"type": "text", "content": "You are a helpful assistant."},
+                    {"type": "text", "content": "Be concise and clear."},
+                ]
+            )
     elif "parts_no_type" in param_id:
-        assert ai_client_span["attributes"][
-            "gen_ai.system_instructions"
-        ] == safe_serialize(
-            [
-                {
-                    "type": "text",
-                    "content": "You are a coding assistant that talks like a pirate.",
-                },
-                {"type": "text", "content": "You are a helpful assistant."},
-                {"type": "text", "content": "Be concise and clear."},
-            ]
-        )
+        if stream_gen_ai_spans:
+            assert ai_client_span["attributes"][
+                "gen_ai.system_instructions"
+            ] == safe_serialize(
+                [
+                    {
+                        "type": "text",
+                        "content": "You are a coding assistant that talks like a pirate.",
+                    },
+                    {"type": "text", "content": "You are a helpful assistant."},
+                    {"type": "text", "content": "Be concise and clear."},
+                ]
+            )
+        else:
+            assert ai_client_span["data"][
+                "gen_ai.system_instructions"
+            ] == safe_serialize(
+                [
+                    {
+                        "type": "text",
+                        "content": "You are a coding assistant that talks like a pirate.",
+                    },
+                    {"type": "text", "content": "You are a helpful assistant."},
+                    {"type": "text", "content": "Be concise and clear."},
+                ]
+            )
     elif instructions is None:  # type: ignore
-        assert ai_client_span["attributes"][
-            "gen_ai.system_instructions"
-        ] == safe_serialize(
-            [
-                {"type": "text", "content": "You are a helpful assistant."},
-                {"type": "text", "content": "Be concise and clear."},
-            ]
+        if stream_gen_ai_spans:
+            assert ai_client_span["attributes"][
+                "gen_ai.system_instructions"
+            ] == safe_serialize(
+                [
+                    {"type": "text", "content": "You are a helpful assistant."},
+                    {"type": "text", "content": "Be concise and clear."},
+                ]
+            )
+        else:
+            assert ai_client_span["data"][
+                "gen_ai.system_instructions"
+            ] == safe_serialize(
+                [
+                    {"type": "text", "content": "You are a helpful assistant."},
+                    {"type": "text", "content": "Be concise and clear."},
+                ]
+            )
+    else:
+        if stream_gen_ai_spans:
+            assert ai_client_span["attributes"][
+                "gen_ai.system_instructions"
+            ] == safe_serialize(
+                [
+                    {
+                        "type": "text",
+                        "content": "You are a coding assistant that talks like a pirate.",
+                    },
+                    {"type": "text", "content": "You are a helpful assistant."},
+                    {"type": "text", "content": "Be concise and clear."},
+                ]
+            )
+        else:
+            assert ai_client_span["data"][
+                "gen_ai.system_instructions"
+            ] == safe_serialize(
+                [
+                    {
+                        "type": "text",
+                        "content": "You are a coding assistant that talks like a pirate.",
+                    },
+                    {"type": "text", "content": "You are a helpful assistant."},
+                    {"type": "text", "content": "Be concise and clear."},
+                ]
+            )
+
+    if stream_gen_ai_spans:
+        assert (
+            invoke_agent_span["attributes"]["gen_ai.response.text"]
+            == "Hello, how can I help you?"
         )
     else:
-        assert ai_client_span["attributes"][
-            "gen_ai.system_instructions"
-        ] == safe_serialize(
-            [
-                {
-                    "type": "text",
-                    "content": "You are a coding assistant that talks like a pirate.",
-                },
-                {"type": "text", "content": "You are a helpful assistant."},
-                {"type": "text", "content": "Be concise and clear."},
-            ]
+        assert (
+            invoke_agent_span["data"]["gen_ai.response.text"]
+            == "Hello, how can I help you?"
         )
 
-    assert (
-        invoke_agent_span["attributes"]["gen_ai.response.text"]
-        == "Hello, how can I help you?"
-    )
-
-    assert invoke_agent_span["attributes"]["gen_ai.operation.name"] == "invoke_agent"
-    assert invoke_agent_span["attributes"]["gen_ai.system"] == "openai"
-    assert invoke_agent_span["attributes"]["gen_ai.agent.name"] == "test_agent"
-    assert invoke_agent_span["attributes"]["gen_ai.request.max_tokens"] == 100
-    assert invoke_agent_span["attributes"]["gen_ai.request.model"] == "gpt-4"
-    assert invoke_agent_span["attributes"]["gen_ai.request.temperature"] == 0.7
-    assert invoke_agent_span["attributes"]["gen_ai.request.top_p"] == 1.0
-
-    assert ai_client_span["name"] == "chat gpt-4"
-    assert ai_client_span["attributes"]["gen_ai.operation.name"] == "chat"
-    assert ai_client_span["attributes"]["gen_ai.system"] == "openai"
-    assert ai_client_span["attributes"]["gen_ai.agent.name"] == "test_agent"
-    assert ai_client_span["attributes"]["gen_ai.request.max_tokens"] == 100
-    assert ai_client_span["attributes"]["gen_ai.request.model"] == "gpt-4"
-    assert ai_client_span["attributes"]["gen_ai.request.temperature"] == 0.7
-    assert ai_client_span["attributes"]["gen_ai.request.top_p"] == 1.0
-
-
+    if stream_gen_ai_spans:
+        assert (
+            invoke_agent_span["attributes"]["gen_ai.operation.name"] == "invoke_agent"
+        )
+        assert invoke_agent_span["attributes"]["gen_ai.system"] == "openai"
+        assert invoke_agent_span["attributes"]["gen_ai.agent.name"] == "test_agent"
+        assert invoke_agent_span["attributes"]["gen_ai.request.max_tokens"] == 100
+        assert invoke_agent_span["attributes"]["gen_ai.request.model"] == "gpt-4"
+        assert invoke_agent_span["attributes"]["gen_ai.request.temperature"] == 0.7
+        assert invoke_agent_span["attributes"]["gen_ai.request.top_p"] == 1.0
+
+        assert ai_client_span["name"] == "chat gpt-4"
+        assert ai_client_span["attributes"]["gen_ai.operation.name"] == "chat"
+        assert ai_client_span["attributes"]["gen_ai.system"] == "openai"
+        assert ai_client_span["attributes"]["gen_ai.agent.name"] == "test_agent"
+        assert ai_client_span["attributes"]["gen_ai.request.max_tokens"] == 100
+        assert ai_client_span["attributes"]["gen_ai.request.model"] == "gpt-4"
+        assert ai_client_span["attributes"]["gen_ai.request.temperature"] == 0.7
+        assert ai_client_span["attributes"]["gen_ai.request.top_p"] == 1.0
+    else:
+        assert invoke_agent_span["data"]["gen_ai.operation.name"] == "invoke_agent"
+        assert invoke_agent_span["data"]["gen_ai.system"] == "openai"
+        assert invoke_agent_span["data"]["gen_ai.agent.name"] == "test_agent"
+        assert invoke_agent_span["data"]["gen_ai.request.max_tokens"] == 100
+        assert invoke_agent_span["data"]["gen_ai.request.model"] == "gpt-4"
+        assert invoke_agent_span["data"]["gen_ai.request.temperature"] == 0.7
+        assert invoke_agent_span["data"]["gen_ai.request.top_p"] == 1.0
+
+        assert ai_client_span["description"] == "chat gpt-4"
+        assert ai_client_span["data"]["gen_ai.operation.name"] == "chat"
+        assert ai_client_span["data"]["gen_ai.system"] == "openai"
+        assert ai_client_span["data"]["gen_ai.agent.name"] == "test_agent"
+        assert ai_client_span["data"]["gen_ai.request.max_tokens"] == 100
+        assert ai_client_span["data"]["gen_ai.request.model"] == "gpt-4"
+        assert ai_client_span["data"]["gen_ai.request.temperature"] == 0.7
+        assert ai_client_span["data"]["gen_ai.request.top_p"] == 1.0
+
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
 async def test_client_span_custom_model(
     sentry_init,
+    capture_events,
     capture_items,
     test_agent_custom_model,
     nonstreaming_responses_model_response,
     get_model_response,
+    stream_gen_ai_spans,
 ):
     """
     Test that the integration uses the correct model name if a custom model is used.
@@ -511,40 +761,78 @@ async def test_client_span_custom_model(
         nonstreaming_responses_model_response, serialize_pydantic=True
     )
 
-    with patch.object(
-        agent.model._client._client,
-        "send",
-        return_value=response,
-    ) as _:
-        sentry_init(
-            integrations=[OpenAIAgentsIntegration()],
-            traces_sample_rate=1.0,
-        )
+    if stream_gen_ai_spans:
+        with patch.object(
+            agent.model._client._client,
+            "send",
+            return_value=response,
+        ) as _:
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+            )
 
-        items = capture_items("span")
+            items = capture_items("span")
 
-        result = await agents.Runner.run(
-            agent, "Test input", run_config=test_run_config
-        )
+            result = await agents.Runner.run(
+                agent, "Test input", run_config=test_run_config
+            )
 
-        assert result is not None
-        assert result.final_output == "Hello, how can I help you?"
+            assert result is not None
+            assert result.final_output == "Hello, how can I help you?"
 
-    spans = [item.payload for item in items if item.type == "span"]
-    ai_client_span = next(
-        span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-    )
+            spans = [item.payload for item in items if item.type == "span"]
+            ai_client_span = next(
+                span
+                for span in spans
+                if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+            )
 
-    assert ai_client_span["name"] == "chat my-custom-model"
-    assert ai_client_span["attributes"]["gen_ai.request.model"] == "my-custom-model"
+            assert ai_client_span["name"] == "chat my-custom-model"
+            assert (
+                ai_client_span["attributes"]["gen_ai.request.model"]
+                == "my-custom-model"
+            )
+    else:
+        with patch.object(
+            agent.model._client._client,
+            "send",
+            return_value=response,
+        ) as _:
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+            )
+            events = capture_events()
 
+            result = await agents.Runner.run(
+                agent, "Test input", run_config=test_run_config
+            )
+
+            assert result is not None
+            assert result.final_output == "Hello, how can I help you?"
+
+            (transaction,) = events
+            spans = transaction["spans"]
+            ai_client_span = next(
+                span for span in spans if span["op"] == OP.GEN_AI_CHAT
+            )
 
+            assert ai_client_span["description"] == "chat my-custom-model"
+            assert ai_client_span["data"]["gen_ai.request.model"] == "my-custom-model"
+
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 def test_agent_invocation_span_sync_no_pii(
     sentry_init,
+    capture_events,
     capture_items,
     test_agent,
     nonstreaming_responses_model_response,
     get_model_response,
+    stream_gen_ai_spans,
 ):
     """
     Test that the integration creates spans for agent invocations.
@@ -557,59 +845,127 @@ def test_agent_invocation_span_sync_no_pii(
         nonstreaming_responses_model_response, serialize_pydantic=True
     )
 
-    with patch.object(
-        agent.model._client._client,
-        "send",
-        return_value=response,
-    ) as _:
-        sentry_init(
-            integrations=[OpenAIAgentsIntegration()],
-            traces_sample_rate=1.0,
-            send_default_pii=False,
-        )
+    if stream_gen_ai_spans:
+        with patch.object(
+            agent.model._client._client,
+            "send",
+            return_value=response,
+        ) as _:
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+                send_default_pii=False,
+                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+            )
 
-        items = capture_items("span", "transaction")
+            items = capture_items("span", "transaction")
 
-        result = agents.Runner.run_sync(agent, "Test input", run_config=test_run_config)
+            result = agents.Runner.run_sync(
+                agent, "Test input", run_config=test_run_config
+            )
 
-        assert result is not None
-        assert result.final_output == "Hello, how can I help you?"
+            assert result is not None
+            assert result.final_output == "Hello, how can I help you?"
 
-    (transaction,) = (item.payload for item in items if item.type == "transaction")
-    assert transaction["transaction"] == "test_agent workflow"
-    assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
+            (transaction,) = (
+                item.payload for item in items if item.type == "transaction"
+            )
 
-    spans = [item.payload for item in items if item.type == "span"]
-    invoke_agent_span = next(
-        span
-        for span in spans
-        if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
-    )
-    ai_client_span = next(
-        span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-    )
+            assert transaction["transaction"] == "test_agent workflow"
+            assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
 
-    assert invoke_agent_span["name"] == "invoke_agent test_agent"
-    assert invoke_agent_span["attributes"]["gen_ai.operation.name"] == "invoke_agent"
-    assert invoke_agent_span["attributes"]["gen_ai.system"] == "openai"
-    assert invoke_agent_span["attributes"]["gen_ai.agent.name"] == "test_agent"
-    assert invoke_agent_span["attributes"]["gen_ai.request.max_tokens"] == 100
-    assert invoke_agent_span["attributes"]["gen_ai.request.model"] == "gpt-4"
-    assert invoke_agent_span["attributes"]["gen_ai.request.temperature"] == 0.7
-    assert invoke_agent_span["attributes"]["gen_ai.request.top_p"] == 1.0
+            spans = [item.payload for item in items if item.type == "span"]
+            invoke_agent_span = next(
+                span
+                for span in spans
+                if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
+            )
+            ai_client_span = next(
+                span
+                for span in spans
+                if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+            )
+
+            assert invoke_agent_span["name"] == "invoke_agent test_agent"
+            assert (
+                invoke_agent_span["attributes"]["gen_ai.operation.name"]
+                == "invoke_agent"
+            )
+            assert invoke_agent_span["attributes"]["gen_ai.system"] == "openai"
+            assert invoke_agent_span["attributes"]["gen_ai.agent.name"] == "test_agent"
+            assert invoke_agent_span["attributes"]["gen_ai.request.max_tokens"] == 100
+            assert invoke_agent_span["attributes"]["gen_ai.request.model"] == "gpt-4"
+            assert invoke_agent_span["attributes"]["gen_ai.request.temperature"] == 0.7
+            assert invoke_agent_span["attributes"]["gen_ai.request.top_p"] == 1.0
+
+            assert ai_client_span["name"] == "chat gpt-4"
+            assert ai_client_span["attributes"]["gen_ai.operation.name"] == "chat"
+            assert ai_client_span["attributes"]["gen_ai.system"] == "openai"
+            assert ai_client_span["attributes"]["gen_ai.agent.name"] == "test_agent"
+            assert ai_client_span["attributes"]["gen_ai.request.max_tokens"] == 100
+            assert ai_client_span["attributes"]["gen_ai.request.model"] == "gpt-4"
+            assert ai_client_span["attributes"]["gen_ai.request.temperature"] == 0.7
+            assert ai_client_span["attributes"]["gen_ai.request.top_p"] == 1.0
+
+            assert (
+                SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS
+                not in invoke_agent_span["attributes"]
+            )
+    else:
+        with patch.object(
+            agent.model._client._client,
+            "send",
+            return_value=response,
+        ) as _:
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+                send_default_pii=False,
+                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+            )
+            events = capture_events()
+
+            result = agents.Runner.run_sync(
+                agent, "Test input", run_config=test_run_config
+            )
+
+            assert result is not None
+            assert result.final_output == "Hello, how can I help you?"
+
+            (transaction,) = events
+            spans = transaction["spans"]
+            invoke_agent_span = next(
+                span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT
+            )
+            ai_client_span = next(
+                span for span in spans if span["op"] == OP.GEN_AI_CHAT
+            )
 
-    assert ai_client_span["name"] == "chat gpt-4"
-    assert ai_client_span["attributes"]["gen_ai.operation.name"] == "chat"
-    assert ai_client_span["attributes"]["gen_ai.system"] == "openai"
-    assert ai_client_span["attributes"]["gen_ai.agent.name"] == "test_agent"
-    assert ai_client_span["attributes"]["gen_ai.request.max_tokens"] == 100
-    assert ai_client_span["attributes"]["gen_ai.request.model"] == "gpt-4"
-    assert ai_client_span["attributes"]["gen_ai.request.temperature"] == 0.7
-    assert ai_client_span["attributes"]["gen_ai.request.top_p"] == 1.0
+            assert transaction["transaction"] == "test_agent workflow"
+            assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
 
-    assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in invoke_agent_span["attributes"]
+            assert invoke_agent_span["description"] == "invoke_agent test_agent"
+            assert invoke_agent_span["data"]["gen_ai.operation.name"] == "invoke_agent"
+            assert invoke_agent_span["data"]["gen_ai.system"] == "openai"
+            assert invoke_agent_span["data"]["gen_ai.agent.name"] == "test_agent"
+            assert invoke_agent_span["data"]["gen_ai.request.max_tokens"] == 100
+            assert invoke_agent_span["data"]["gen_ai.request.model"] == "gpt-4"
+            assert invoke_agent_span["data"]["gen_ai.request.temperature"] == 0.7
+            assert invoke_agent_span["data"]["gen_ai.request.top_p"] == 1.0
 
+            assert ai_client_span["description"] == "chat gpt-4"
+            assert ai_client_span["data"]["gen_ai.operation.name"] == "chat"
+            assert ai_client_span["data"]["gen_ai.system"] == "openai"
+            assert ai_client_span["data"]["gen_ai.agent.name"] == "test_agent"
+            assert ai_client_span["data"]["gen_ai.request.max_tokens"] == 100
+            assert ai_client_span["data"]["gen_ai.request.model"] == "gpt-4"
+            assert ai_client_span["data"]["gen_ai.request.temperature"] == 0.7
+            assert ai_client_span["data"]["gen_ai.request.top_p"] == 1.0
 
+            assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in invoke_agent_span["data"]
+
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.parametrize(
     "instructions",
     (
@@ -659,202 +1015,362 @@ def test_agent_invocation_span_sync_no_pii(
                     ],
                 },
                 {
-                    "role": "user",
-                    "content": "Test input",
-                },
-            ],
-            id="parts_no_type",
-        ),
-        pytest.param(
-            [
-                {
-                    "type": "message",
-                    "role": "system",
-                    "content": [
-                        {"type": "text", "text": "You are a helpful assistant."},
-                        {"type": "text", "text": "Be concise and clear."},
-                    ],
-                },
-                {
-                    "type": "message",
-                    "role": "user",
-                    "content": "Test input",
-                },
-            ],
-            id="parts",
-        ),
-    ],
-)
-def test_agent_invocation_span_sync(
-    sentry_init,
-    capture_items,
-    test_agent_with_instructions,
-    nonstreaming_responses_model_response,
-    instructions,
-    input,
-    request,
-    get_model_response,
-):
-    """
-    Test that the integration creates spans for agent invocations.
-    """
-    client = AsyncOpenAI(api_key="test-key")
-    model = OpenAIResponsesModel(model="gpt-4", openai_client=client)
-    agent = test_agent_with_instructions(instructions).clone(model=model)
-
-    response = get_model_response(
-        nonstreaming_responses_model_response, serialize_pydantic=True
-    )
-
-    with patch.object(
-        agent.model._client._client,
-        "send",
-        return_value=response,
-    ) as _:
-        sentry_init(
-            integrations=[OpenAIAgentsIntegration()],
-            traces_sample_rate=1.0,
-            send_default_pii=True,
-        )
-
-        items = capture_items("span", "transaction")
-
-        result = agents.Runner.run_sync(
-            agent,
-            input,
-            run_config=test_run_config,
-        )
-
-        assert result is not None
-        assert result.final_output == "Hello, how can I help you?"
-
-    (transaction,) = (item.payload for item in items if item.type == "transaction")
-    assert transaction["transaction"] == "test_agent workflow"
-    assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
-
-    spans = [item.payload for item in items if item.type == "span"]
-    invoke_agent_span, ai_client_span = spans
-
-    assert invoke_agent_span["name"] == "invoke_agent test_agent"
-    assert invoke_agent_span["attributes"]["gen_ai.operation.name"] == "invoke_agent"
-    assert invoke_agent_span["attributes"]["gen_ai.system"] == "openai"
-    assert invoke_agent_span["attributes"]["gen_ai.agent.name"] == "test_agent"
-    assert invoke_agent_span["attributes"]["gen_ai.request.max_tokens"] == 100
-    assert invoke_agent_span["attributes"]["gen_ai.request.model"] == "gpt-4"
-    assert invoke_agent_span["attributes"]["gen_ai.request.temperature"] == 0.7
-    assert invoke_agent_span["attributes"]["gen_ai.request.top_p"] == 1.0
-
-    assert ai_client_span["name"] == "chat gpt-4"
-    assert ai_client_span["attributes"]["gen_ai.operation.name"] == "chat"
-    assert ai_client_span["attributes"]["gen_ai.system"] == "openai"
-    assert ai_client_span["attributes"]["gen_ai.agent.name"] == "test_agent"
-    assert ai_client_span["attributes"]["gen_ai.request.max_tokens"] == 100
-    assert ai_client_span["attributes"]["gen_ai.request.model"] == "gpt-4"
-    assert ai_client_span["attributes"]["gen_ai.request.temperature"] == 0.7
-    assert ai_client_span["attributes"]["gen_ai.request.top_p"] == 1.0
-
-    param_id = request.node.callspec.id
-    if "string" in param_id and instructions is None:  # type: ignore
-        assert "gen_ai.system_instructions" not in ai_client_span["attributes"]
-    elif "string" in param_id:
-        assert ai_client_span["attributes"][
-            "gen_ai.system_instructions"
-        ] == safe_serialize(
-            [
-                {
-                    "type": "text",
-                    "content": "You are a coding assistant that talks like a pirate.",
-                },
-            ]
-        )
-    elif "blocks_no_type" in param_id and instructions is None:  # type: ignore
-        assert ai_client_span["attributes"][
-            "gen_ai.system_instructions"
-        ] == safe_serialize(
-            [
-                {"type": "text", "content": "You are a helpful assistant."},
-            ]
-        )
-    elif "blocks_no_type" in param_id:
-        assert ai_client_span["attributes"][
-            "gen_ai.system_instructions"
-        ] == safe_serialize(
-            [
-                {
-                    "type": "text",
-                    "content": "You are a coding assistant that talks like a pirate.",
-                },
-                {"type": "text", "content": "You are a helpful assistant."},
-            ]
-        )
-    elif "blocks" in param_id and instructions is None:  # type: ignore
-        assert ai_client_span["attributes"][
-            "gen_ai.system_instructions"
-        ] == safe_serialize(
-            [
-                {"type": "text", "content": "You are a helpful assistant."},
-            ]
-        )
-    elif "blocks" in param_id:
-        assert ai_client_span["attributes"][
-            "gen_ai.system_instructions"
-        ] == safe_serialize(
-            [
-                {
-                    "type": "text",
-                    "content": "You are a coding assistant that talks like a pirate.",
+                    "role": "user",
+                    "content": "Test input",
                 },
-                {"type": "text", "content": "You are a helpful assistant."},
-            ]
-        )
-    elif "parts_no_type" in param_id and instructions is None:
-        assert ai_client_span["attributes"][
-            "gen_ai.system_instructions"
-        ] == safe_serialize(
-            [
-                {"type": "text", "content": "You are a helpful assistant."},
-                {"type": "text", "content": "Be concise and clear."},
-            ]
-        )
-    elif "parts_no_type" in param_id:
-        assert ai_client_span["attributes"][
-            "gen_ai.system_instructions"
-        ] == safe_serialize(
+            ],
+            id="parts_no_type",
+        ),
+        pytest.param(
             [
                 {
-                    "type": "text",
-                    "content": "You are a coding assistant that talks like a pirate.",
+                    "type": "message",
+                    "role": "system",
+                    "content": [
+                        {"type": "text", "text": "You are a helpful assistant."},
+                        {"type": "text", "text": "Be concise and clear."},
+                    ],
                 },
-                {"type": "text", "content": "You are a helpful assistant."},
-                {"type": "text", "content": "Be concise and clear."},
-            ]
-        )
-    elif instructions is None:  # type: ignore
-        assert ai_client_span["attributes"][
-            "gen_ai.system_instructions"
-        ] == safe_serialize(
-            [
-                {"type": "text", "content": "You are a helpful assistant."},
-                {"type": "text", "content": "Be concise and clear."},
-            ]
-        )
-    else:
-        assert ai_client_span["attributes"][
-            "gen_ai.system_instructions"
-        ] == safe_serialize(
-            [
                 {
-                    "type": "text",
-                    "content": "You are a coding assistant that talks like a pirate.",
+                    "type": "message",
+                    "role": "user",
+                    "content": "Test input",
                 },
-                {"type": "text", "content": "You are a helpful assistant."},
-                {"type": "text", "content": "Be concise and clear."},
-            ]
+            ],
+            id="parts",
+        ),
+    ],
+)
+def test_agent_invocation_span_sync(
+    sentry_init,
+    capture_events,
+    capture_items,
+    test_agent_with_instructions,
+    nonstreaming_responses_model_response,
+    instructions,
+    input,
+    request,
+    get_model_response,
+    stream_gen_ai_spans,
+):
+    """
+    Test that the integration creates spans for agent invocations.
+    """
+    client = AsyncOpenAI(api_key="test-key")
+    model = OpenAIResponsesModel(model="gpt-4", openai_client=client)
+    agent = test_agent_with_instructions(instructions).clone(model=model)
+
+    response = get_model_response(
+        nonstreaming_responses_model_response, serialize_pydantic=True
+    )
+
+    if stream_gen_ai_spans:
+        with patch.object(
+            agent.model._client._client,
+            "send",
+            return_value=response,
+        ) as _:
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+                send_default_pii=True,
+                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+            )
+
+            items = capture_items("span", "transaction")
+
+            result = agents.Runner.run_sync(
+                agent,
+                input,
+                run_config=test_run_config,
+            )
+
+            assert result is not None
+            assert result.final_output == "Hello, how can I help you?"
+
+        (transaction,) = (item.payload for item in items if item.type == "transaction")
+        assert transaction["transaction"] == "test_agent workflow"
+        assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
+
+        spans = [item.payload for item in items if item.type == "span"]
+        invoke_agent_span, ai_client_span = spans
+
+        assert invoke_agent_span["name"] == "invoke_agent test_agent"
+        assert (
+            invoke_agent_span["attributes"]["gen_ai.operation.name"] == "invoke_agent"
         )
+        assert invoke_agent_span["attributes"]["gen_ai.system"] == "openai"
+        assert invoke_agent_span["attributes"]["gen_ai.agent.name"] == "test_agent"
+        assert invoke_agent_span["attributes"]["gen_ai.request.max_tokens"] == 100
+        assert invoke_agent_span["attributes"]["gen_ai.request.model"] == "gpt-4"
+        assert invoke_agent_span["attributes"]["gen_ai.request.temperature"] == 0.7
+        assert invoke_agent_span["attributes"]["gen_ai.request.top_p"] == 1.0
+
+        assert ai_client_span["name"] == "chat gpt-4"
+        assert ai_client_span["attributes"]["gen_ai.operation.name"] == "chat"
+        assert ai_client_span["attributes"]["gen_ai.system"] == "openai"
+        assert ai_client_span["attributes"]["gen_ai.agent.name"] == "test_agent"
+        assert ai_client_span["attributes"]["gen_ai.request.max_tokens"] == 100
+        assert ai_client_span["attributes"]["gen_ai.request.model"] == "gpt-4"
+        assert ai_client_span["attributes"]["gen_ai.request.temperature"] == 0.7
+        assert ai_client_span["attributes"]["gen_ai.request.top_p"] == 1.0
+
+        param_id = request.node.callspec.id
+        if "string" in param_id and instructions is None:  # type: ignore
+            assert "gen_ai.system_instructions" not in ai_client_span["attributes"]
+        elif "string" in param_id:
+            assert ai_client_span["attributes"][
+                "gen_ai.system_instructions"
+            ] == safe_serialize(
+                [
+                    {
+                        "type": "text",
+                        "content": "You are a coding assistant that talks like a pirate.",
+                    },
+                ]
+            )
+        elif "blocks_no_type" in param_id and instructions is None:  # type: ignore
+            assert ai_client_span["attributes"][
+                "gen_ai.system_instructions"
+            ] == safe_serialize(
+                [
+                    {"type": "text", "content": "You are a helpful assistant."},
+                ]
+            )
+        elif "blocks_no_type" in param_id:
+            assert ai_client_span["attributes"][
+                "gen_ai.system_instructions"
+            ] == safe_serialize(
+                [
+                    {
+                        "type": "text",
+                        "content": "You are a coding assistant that talks like a pirate.",
+                    },
+                    {"type": "text", "content": "You are a helpful assistant."},
+                ]
+            )
+        elif "blocks" in param_id and instructions is None:  # type: ignore
+            assert ai_client_span["attributes"][
+                "gen_ai.system_instructions"
+            ] == safe_serialize(
+                [
+                    {"type": "text", "content": "You are a helpful assistant."},
+                ]
+            )
+        elif "blocks" in param_id:
+            assert ai_client_span["attributes"][
+                "gen_ai.system_instructions"
+            ] == safe_serialize(
+                [
+                    {
+                        "type": "text",
+                        "content": "You are a coding assistant that talks like a pirate.",
+                    },
+                    {"type": "text", "content": "You are a helpful assistant."},
+                ]
+            )
+        elif "parts_no_type" in param_id and instructions is None:
+            assert ai_client_span["attributes"][
+                "gen_ai.system_instructions"
+            ] == safe_serialize(
+                [
+                    {"type": "text", "content": "You are a helpful assistant."},
+                    {"type": "text", "content": "Be concise and clear."},
+                ]
+            )
+        elif "parts_no_type" in param_id:
+            assert ai_client_span["attributes"][
+                "gen_ai.system_instructions"
+            ] == safe_serialize(
+                [
+                    {
+                        "type": "text",
+                        "content": "You are a coding assistant that talks like a pirate.",
+                    },
+                    {"type": "text", "content": "You are a helpful assistant."},
+                    {"type": "text", "content": "Be concise and clear."},
+                ]
+            )
+        elif instructions is None:  # type: ignore
+            assert ai_client_span["attributes"][
+                "gen_ai.system_instructions"
+            ] == safe_serialize(
+                [
+                    {"type": "text", "content": "You are a helpful assistant."},
+                    {"type": "text", "content": "Be concise and clear."},
+                ]
+            )
+        else:
+            assert ai_client_span["attributes"][
+                "gen_ai.system_instructions"
+            ] == safe_serialize(
+                [
+                    {
+                        "type": "text",
+                        "content": "You are a coding assistant that talks like a pirate.",
+                    },
+                    {"type": "text", "content": "You are a helpful assistant."},
+                    {"type": "text", "content": "Be concise and clear."},
+                ]
+            )
+    else:
+        with patch.object(
+            agent.model._client._client,
+            "send",
+            return_value=response,
+        ) as _:
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+                send_default_pii=True,
+                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+            )
+
+            events = capture_events()
+
+            result = agents.Runner.run_sync(
+                agent,
+                input,
+                run_config=test_run_config,
+            )
+
+            assert result is not None
+            assert result.final_output == "Hello, how can I help you?"
+
+        (transaction,) = events
+        spans = transaction["spans"]
+        invoke_agent_span, ai_client_span = spans
+
+        assert transaction["transaction"] == "test_agent workflow"
+        assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
+
+        assert invoke_agent_span["description"] == "invoke_agent test_agent"
+        assert invoke_agent_span["data"]["gen_ai.operation.name"] == "invoke_agent"
+        assert invoke_agent_span["data"]["gen_ai.system"] == "openai"
+        assert invoke_agent_span["data"]["gen_ai.agent.name"] == "test_agent"
+        assert invoke_agent_span["data"]["gen_ai.request.max_tokens"] == 100
+        assert invoke_agent_span["data"]["gen_ai.request.model"] == "gpt-4"
+        assert invoke_agent_span["data"]["gen_ai.request.temperature"] == 0.7
+        assert invoke_agent_span["data"]["gen_ai.request.top_p"] == 1.0
+
+        assert ai_client_span["description"] == "chat gpt-4"
+        assert ai_client_span["data"]["gen_ai.operation.name"] == "chat"
+        assert ai_client_span["data"]["gen_ai.system"] == "openai"
+        assert ai_client_span["data"]["gen_ai.agent.name"] == "test_agent"
+        assert ai_client_span["data"]["gen_ai.request.max_tokens"] == 100
+        assert ai_client_span["data"]["gen_ai.request.model"] == "gpt-4"
+        assert ai_client_span["data"]["gen_ai.request.temperature"] == 0.7
+        assert ai_client_span["data"]["gen_ai.request.top_p"] == 1.0
+
+        param_id = request.node.callspec.id
+        if "string" in param_id and instructions is None:  # type: ignore
+            assert "gen_ai.system_instructions" not in ai_client_span["data"]
+        elif "string" in param_id:
+            assert ai_client_span["data"][
+                "gen_ai.system_instructions"
+            ] == safe_serialize(
+                [
+                    {
+                        "type": "text",
+                        "content": "You are a coding assistant that talks like a pirate.",
+                    },
+                ]
+            )
+        elif "blocks_no_type" in param_id and instructions is None:  # type: ignore
+            assert ai_client_span["data"][
+                "gen_ai.system_instructions"
+            ] == safe_serialize(
+                [
+                    {"type": "text", "content": "You are a helpful assistant."},
+                ]
+            )
+        elif "blocks_no_type" in param_id:
+            assert ai_client_span["data"][
+                "gen_ai.system_instructions"
+            ] == safe_serialize(
+                [
+                    {
+                        "type": "text",
+                        "content": "You are a coding assistant that talks like a pirate.",
+                    },
+                    {"type": "text", "content": "You are a helpful assistant."},
+                ]
+            )
+        elif "blocks" in param_id and instructions is None:  # type: ignore
+            assert ai_client_span["data"][
+                "gen_ai.system_instructions"
+            ] == safe_serialize(
+                [
+                    {"type": "text", "content": "You are a helpful assistant."},
+                ]
+            )
+        elif "blocks" in param_id:
+            assert ai_client_span["data"][
+                "gen_ai.system_instructions"
+            ] == safe_serialize(
+                [
+                    {
+                        "type": "text",
+                        "content": "You are a coding assistant that talks like a pirate.",
+                    },
+                    {"type": "text", "content": "You are a helpful assistant."},
+                ]
+            )
+        elif "parts_no_type" in param_id and instructions is None:
+            assert ai_client_span["data"][
+                "gen_ai.system_instructions"
+            ] == safe_serialize(
+                [
+                    {"type": "text", "content": "You are a helpful assistant."},
+                    {"type": "text", "content": "Be concise and clear."},
+                ]
+            )
+        elif "parts_no_type" in param_id:
+            assert ai_client_span["data"][
+                "gen_ai.system_instructions"
+            ] == safe_serialize(
+                [
+                    {
+                        "type": "text",
+                        "content": "You are a coding assistant that talks like a pirate.",
+                    },
+                    {"type": "text", "content": "You are a helpful assistant."},
+                    {"type": "text", "content": "Be concise and clear."},
+                ]
+            )
+        elif instructions is None:  # type: ignore
+            assert ai_client_span["data"][
+                "gen_ai.system_instructions"
+            ] == safe_serialize(
+                [
+                    {"type": "text", "content": "You are a helpful assistant."},
+                    {"type": "text", "content": "Be concise and clear."},
+                ]
+            )
+        else:
+            assert ai_client_span["data"][
+                "gen_ai.system_instructions"
+            ] == safe_serialize(
+                [
+                    {
+                        "type": "text",
+                        "content": "You are a coding assistant that talks like a pirate.",
+                    },
+                    {"type": "text", "content": "You are a helpful assistant."},
+                    {"type": "text", "content": "Be concise and clear."},
+                ]
+            )
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
-async def test_handoff_span(sentry_init, capture_items, get_model_response):
+async def test_handoff_span(
+    sentry_init,
+    capture_events,
+    capture_items,
+    get_model_response,
+    stream_gen_ai_spans,
+):
     """
     Test that handoff spans are created when agents hand off to other agents.
     """
@@ -947,42 +1463,85 @@ async def test_handoff_span(sentry_init, capture_items, get_model_response):
         serialize_pydantic=True,
     )
 
-    with patch.object(
-        primary_agent.model._client._client,
-        "send",
-        side_effect=[handoff_response, final_response],
-    ) as _:
-        sentry_init(
-            integrations=[OpenAIAgentsIntegration()],
-            traces_sample_rate=1.0,
-        )
+    if stream_gen_ai_spans:
+        with patch.object(
+            primary_agent.model._client._client,
+            "send",
+            side_effect=[handoff_response, final_response],
+        ) as _:
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+            )
 
-        items = capture_items("transaction", "span")
+            items = capture_items("transaction", "span")
 
-        result = await agents.Runner.run(
-            primary_agent,
-            "Please hand off to secondary agent",
-            run_config=test_run_config,
-        )
+            result = await agents.Runner.run(
+                primary_agent,
+                "Please hand off to secondary agent",
+                run_config=test_run_config,
+            )
 
-        assert result is not None
+            assert result is not None
 
-    spans = [item.payload for item in items if item.type == "span"]
-    handoff_span = next(
-        span
-        for span in spans
-        if span["attributes"].get("sentry.op") == OP.GEN_AI_HANDOFF
-    )
+            spans = [item.payload for item in items if item.type == "span"]
+            handoff_span = next(
+                span
+                for span in spans
+                if span["attributes"].get("sentry.op") == OP.GEN_AI_HANDOFF
+            )
+
+            # Verify handoff span was created
+            assert handoff_span is not None
+            assert (
+                handoff_span["name"] == "handoff from primary_agent to secondary_agent"
+            )
+            assert handoff_span["attributes"]["gen_ai.operation.name"] == "handoff"
+    else:
+        with patch.object(
+            primary_agent.model._client._client,
+            "send",
+            side_effect=[handoff_response, final_response],
+        ) as _:
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+            )
+            events = capture_events()
+
+            result = await agents.Runner.run(
+                primary_agent,
+                "Please hand off to secondary agent",
+                run_config=test_run_config,
+            )
+
+            assert result is not None
+
+            (transaction,) = events
+            spans = transaction["spans"]
+            handoff_span = next(
+                span for span in spans if span.get("op") == OP.GEN_AI_HANDOFF
+            )
 
-    # Verify handoff span was created
-    assert handoff_span is not None
-    assert handoff_span["name"] == "handoff from primary_agent to secondary_agent"
-    assert handoff_span["attributes"]["gen_ai.operation.name"] == "handoff"
+            # Verify handoff span was created
+            assert handoff_span is not None
+            assert (
+                handoff_span["description"]
+                == "handoff from primary_agent to secondary_agent"
+            )
+            assert handoff_span["data"]["gen_ai.operation.name"] == "handoff"
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
 async def test_max_turns_before_handoff_span(
-    sentry_init, capture_items, get_model_response
+    sentry_init,
+    capture_events,
+    capture_items,
+    get_model_response,
+    stream_gen_ai_spans,
 ):
     """
     Example raising agents.exceptions.AgentsException after the agent invocation span is complete.
@@ -1076,46 +1635,87 @@ async def test_max_turns_before_handoff_span(
         serialize_pydantic=True,
     )
 
-    with patch.object(
-        primary_agent.model._client._client,
-        "send",
-        side_effect=[handoff_response, final_response],
-    ) as _:
-        sentry_init(
-            integrations=[OpenAIAgentsIntegration()],
-            traces_sample_rate=1.0,
-        )
+    if stream_gen_ai_spans:
+        with patch.object(
+            primary_agent.model._client._client,
+            "send",
+            side_effect=[handoff_response, final_response],
+        ) as _:
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+            )
 
-        items = capture_items("transaction", "span")
+            items = capture_items("transaction", "span")
 
-        with pytest.raises(MaxTurnsExceeded):
-            await agents.Runner.run(
-                primary_agent,
-                "Please hand off to secondary agent",
-                run_config=test_run_config,
-                max_turns=1,
+            with pytest.raises(MaxTurnsExceeded):
+                await agents.Runner.run(
+                    primary_agent,
+                    "Please hand off to secondary agent",
+                    run_config=test_run_config,
+                    max_turns=1,
+                )
+
+            spans = [item.payload for item in items if item.type == "span"]
+            handoff_span = next(
+                span
+                for span in spans
+                if span["attributes"].get("sentry.op") == OP.GEN_AI_HANDOFF
             )
 
-    spans = [item.payload for item in items if item.type == "span"]
-    handoff_span = next(
-        span
-        for span in spans
-        if span["attributes"].get("sentry.op") == OP.GEN_AI_HANDOFF
-    )
+            # Verify handoff span was created
+            assert handoff_span is not None
+            assert (
+                handoff_span["name"] == "handoff from primary_agent to secondary_agent"
+            )
+            assert handoff_span["attributes"]["gen_ai.operation.name"] == "handoff"
+    else:
+        with patch.object(
+            primary_agent.model._client._client,
+            "send",
+            side_effect=[handoff_response, final_response],
+        ) as _:
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+            )
+            events = capture_events()
+
+            with pytest.raises(MaxTurnsExceeded):
+                await agents.Runner.run(
+                    primary_agent,
+                    "Please hand off to secondary agent",
+                    run_config=test_run_config,
+                    max_turns=1,
+                )
 
-    # Verify handoff span was created
-    assert handoff_span is not None
-    assert handoff_span["name"] == "handoff from primary_agent to secondary_agent"
-    assert handoff_span["attributes"]["gen_ai.operation.name"] == "handoff"
+            (error, transaction) = events
+            spans = transaction["spans"]
+            handoff_span = next(
+                span for span in spans if span.get("op") == OP.GEN_AI_HANDOFF
+            )
+
+            # Verify handoff span was created
+            assert handoff_span is not None
+            assert (
+                handoff_span["description"]
+                == "handoff from primary_agent to secondary_agent"
+            )
+            assert handoff_span["data"]["gen_ai.operation.name"] == "handoff"
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
 async def test_tool_execution_span(
     sentry_init,
+    capture_events,
     capture_items,
     test_agent,
     get_model_response,
     responses_tool_call_model_responses,
+    stream_gen_ai_spans,
 ):
     """
     Test tool execution span creation.
@@ -1182,9 +1782,13 @@ def simple_test_tool(message: str) -> str:
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=True,
+            _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
         )
 
-        items = capture_items("transaction", "span")
+        if stream_gen_ai_spans:
+            items = capture_items("transaction", "span")
+        else:
+            events = capture_events()
 
         await agents.Runner.run(
             agent_with_tool,
@@ -1192,24 +1796,35 @@ def simple_test_tool(message: str) -> str:
             run_config=test_run_config,
         )
 
-    (transaction,) = (item.payload for item in items if item.type == "transaction")
-    assert transaction["transaction"] == "test_agent workflow"
-    assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
+    if stream_gen_ai_spans:
+        (transaction,) = (item.payload for item in items if item.type == "transaction")
+        assert transaction["transaction"] == "test_agent workflow"
+        assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
 
-    spans = [item.payload for item in items if item.type == "span"]
-    agent_span = next(
-        span
-        for span in spans
-        if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
-    )
-    ai_client_span1, ai_client_span2 = (
-        span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-    )
-    tool_span = next(
-        span
-        for span in spans
-        if span["attributes"]["sentry.op"] == OP.GEN_AI_EXECUTE_TOOL
-    )
+        spans = [item.payload for item in items if item.type == "span"]
+        agent_span = next(
+            span
+            for span in spans
+            if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
+        )
+        ai_client_span1, ai_client_span2 = (
+            span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        )
+        tool_span = next(
+            span
+            for span in spans
+            if span["attributes"]["sentry.op"] == OP.GEN_AI_EXECUTE_TOOL
+        )
+    else:
+        (transaction,) = events
+        spans = transaction["spans"]
+        agent_span = next(
+            span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT
+        )
+        ai_client_span1, ai_client_span2 = (
+            span for span in spans if span["op"] == OP.GEN_AI_CHAT
+        )
+        tool_span = next(span for span in spans if span["op"] == OP.GEN_AI_EXECUTE_TOOL)
 
     available_tool = {
         "name": "simple_test_tool",
@@ -1249,53 +1864,107 @@ def simple_test_tool(message: str) -> str:
             }
         )
 
-    assert agent_span["name"] == "invoke_agent test_agent"
-    assert agent_span["attributes"]["sentry.origin"] == "auto.ai.openai_agents"
-    assert agent_span["attributes"]["gen_ai.agent.name"] == "test_agent"
-    assert agent_span["attributes"]["gen_ai.operation.name"] == "invoke_agent"
+    if stream_gen_ai_spans:
+        assert agent_span["name"] == "invoke_agent test_agent"
+        assert agent_span["attributes"]["sentry.origin"] == "auto.ai.openai_agents"
+        assert agent_span["attributes"]["gen_ai.agent.name"] == "test_agent"
+        assert agent_span["attributes"]["gen_ai.operation.name"] == "invoke_agent"
 
-    agent_span_available_tool = json.loads(
-        agent_span["attributes"]["gen_ai.request.available_tools"]
-    )[0]
-    assert all(agent_span_available_tool[k] == v for k, v in available_tool.items())
+        agent_span_available_tool = json.loads(
+            agent_span["attributes"]["gen_ai.request.available_tools"]
+        )[0]
+    else:
+        assert transaction["transaction"] == "test_agent workflow"
+        assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
 
-    assert agent_span["attributes"]["gen_ai.request.max_tokens"] == 100
-    assert agent_span["attributes"]["gen_ai.request.model"] == "gpt-4"
-    assert agent_span["attributes"]["gen_ai.request.temperature"] == 0.7
-    assert agent_span["attributes"]["gen_ai.request.top_p"] == 1.0
-    assert agent_span["attributes"]["gen_ai.system"] == "openai"
+        assert agent_span["description"] == "invoke_agent test_agent"
+        assert agent_span["origin"] == "auto.ai.openai_agents"
+        assert agent_span["data"]["gen_ai.agent.name"] == "test_agent"
+        assert agent_span["data"]["gen_ai.operation.name"] == "invoke_agent"
 
-    assert ai_client_span1["name"] == "chat gpt-4"
-    assert ai_client_span1["attributes"]["gen_ai.operation.name"] == "chat"
-    assert ai_client_span1["attributes"]["gen_ai.system"] == "openai"
-    assert ai_client_span1["attributes"]["gen_ai.agent.name"] == "test_agent"
+        agent_span_available_tool = json.loads(
+            agent_span["data"]["gen_ai.request.available_tools"]
+        )[0]
+    assert all(agent_span_available_tool[k] == v for k, v in available_tool.items())
 
-    ai_client_span1_available_tool = json.loads(
-        ai_client_span1["attributes"]["gen_ai.request.available_tools"]
-    )[0]
+    if stream_gen_ai_spans:
+        assert agent_span["attributes"]["gen_ai.request.max_tokens"] == 100
+        assert agent_span["attributes"]["gen_ai.request.model"] == "gpt-4"
+        assert agent_span["attributes"]["gen_ai.request.temperature"] == 0.7
+        assert agent_span["attributes"]["gen_ai.request.top_p"] == 1.0
+        assert agent_span["attributes"]["gen_ai.system"] == "openai"
+
+        assert ai_client_span1["name"] == "chat gpt-4"
+        assert ai_client_span1["attributes"]["gen_ai.operation.name"] == "chat"
+        assert ai_client_span1["attributes"]["gen_ai.system"] == "openai"
+        assert ai_client_span1["attributes"]["gen_ai.agent.name"] == "test_agent"
+
+        ai_client_span1_available_tool = json.loads(
+            ai_client_span1["attributes"]["gen_ai.request.available_tools"]
+        )[0]
+    else:
+        assert agent_span["data"]["gen_ai.request.max_tokens"] == 100
+        assert agent_span["data"]["gen_ai.request.model"] == "gpt-4"
+        assert agent_span["data"]["gen_ai.request.temperature"] == 0.7
+        assert agent_span["data"]["gen_ai.request.top_p"] == 1.0
+        assert agent_span["data"]["gen_ai.system"] == "openai"
+
+        assert ai_client_span1["description"] == "chat gpt-4"
+        assert ai_client_span1["data"]["gen_ai.operation.name"] == "chat"
+        assert ai_client_span1["data"]["gen_ai.system"] == "openai"
+        assert ai_client_span1["data"]["gen_ai.agent.name"] == "test_agent"
+
+        ai_client_span1_available_tool = json.loads(
+            ai_client_span1["data"]["gen_ai.request.available_tools"]
+        )[0]
     assert all(
         ai_client_span1_available_tool[k] == v for k, v in available_tool.items()
     )
 
-    assert ai_client_span1["attributes"]["gen_ai.request.max_tokens"] == 100
-    assert ai_client_span1["attributes"]["gen_ai.request.messages"] == safe_serialize(
-        [
-            {
-                "role": "user",
-                "content": [
-                    {"type": "text", "text": "Please use the simple test tool"}
-                ],
-            },
-        ]
-    )
-    assert ai_client_span1["attributes"]["gen_ai.request.model"] == "gpt-4"
-    assert ai_client_span1["attributes"]["gen_ai.request.temperature"] == 0.7
-    assert ai_client_span1["attributes"]["gen_ai.request.top_p"] == 1.0
-    assert ai_client_span1["attributes"]["gen_ai.usage.input_tokens"] == 10
-    assert ai_client_span1["attributes"]["gen_ai.usage.input_tokens.cached"] == 0
-    assert ai_client_span1["attributes"]["gen_ai.usage.output_tokens"] == 5
-    assert ai_client_span1["attributes"]["gen_ai.usage.output_tokens.reasoning"] == 0
-    assert ai_client_span1["attributes"]["gen_ai.usage.total_tokens"] == 15
+    if stream_gen_ai_spans:
+        assert ai_client_span1["attributes"]["gen_ai.request.max_tokens"] == 100
+        assert ai_client_span1["attributes"][
+            "gen_ai.request.messages"
+        ] == safe_serialize(
+            [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "text", "text": "Please use the simple test tool"}
+                    ],
+                },
+            ]
+        )
+        assert ai_client_span1["attributes"]["gen_ai.request.model"] == "gpt-4"
+        assert ai_client_span1["attributes"]["gen_ai.request.temperature"] == 0.7
+        assert ai_client_span1["attributes"]["gen_ai.request.top_p"] == 1.0
+        assert ai_client_span1["attributes"]["gen_ai.usage.input_tokens"] == 10
+        assert ai_client_span1["attributes"]["gen_ai.usage.input_tokens.cached"] == 0
+        assert ai_client_span1["attributes"]["gen_ai.usage.output_tokens"] == 5
+        assert (
+            ai_client_span1["attributes"]["gen_ai.usage.output_tokens.reasoning"] == 0
+        )
+        assert ai_client_span1["attributes"]["gen_ai.usage.total_tokens"] == 15
+    else:
+        assert ai_client_span1["data"]["gen_ai.request.max_tokens"] == 100
+        assert ai_client_span1["data"]["gen_ai.request.messages"] == safe_serialize(
+            [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "text", "text": "Please use the simple test tool"}
+                    ],
+                },
+            ]
+        )
+        assert ai_client_span1["data"]["gen_ai.request.model"] == "gpt-4"
+        assert ai_client_span1["data"]["gen_ai.request.temperature"] == 0.7
+        assert ai_client_span1["data"]["gen_ai.request.top_p"] == 1.0
+        assert ai_client_span1["data"]["gen_ai.usage.input_tokens"] == 10
+        assert ai_client_span1["data"]["gen_ai.usage.input_tokens.cached"] == 0
+        assert ai_client_span1["data"]["gen_ai.usage.output_tokens"] == 5
+        assert ai_client_span1["data"]["gen_ai.usage.output_tokens.reasoning"] == 0
+        assert ai_client_span1["data"]["gen_ai.usage.total_tokens"] == 15
 
     tool_call = {
         "arguments": '{"message": "hello"}',
@@ -1309,67 +1978,135 @@ def simple_test_tool(message: str) -> str:
     if OPENAI_VERSION >= (2, 25, 0):
         tool_call["namespace"] = None
 
-    assert json.loads(ai_client_span1["attributes"]["gen_ai.response.tool_calls"]) == [
-        tool_call
-    ]
+    if stream_gen_ai_spans:
+        assert json.loads(
+            ai_client_span1["attributes"]["gen_ai.response.tool_calls"]
+        ) == [tool_call]
+    else:
+        assert json.loads(ai_client_span1["data"]["gen_ai.response.tool_calls"]) == [
+            tool_call
+        ]
+
+    if stream_gen_ai_spans:
+        assert tool_span["name"] == "execute_tool simple_test_tool"
+        assert tool_span["attributes"]["gen_ai.agent.name"] == "test_agent"
+        assert tool_span["attributes"]["gen_ai.operation.name"] == "execute_tool"
 
-    assert tool_span["name"] == "execute_tool simple_test_tool"
-    assert tool_span["attributes"]["gen_ai.agent.name"] == "test_agent"
-    assert tool_span["attributes"]["gen_ai.operation.name"] == "execute_tool"
+        tool_span_available_tool = json.loads(
+            tool_span["attributes"]["gen_ai.request.available_tools"]
+        )[0]
+    else:
+        assert tool_span["description"] == "execute_tool simple_test_tool"
+        assert tool_span["data"]["gen_ai.agent.name"] == "test_agent"
+        assert tool_span["data"]["gen_ai.operation.name"] == "execute_tool"
 
-    tool_span_available_tool = json.loads(
-        tool_span["attributes"]["gen_ai.request.available_tools"]
-    )[0]
+        tool_span_available_tool = json.loads(
+            tool_span["data"]["gen_ai.request.available_tools"]
+        )[0]
     assert all(tool_span_available_tool[k] == v for k, v in available_tool.items())
 
-    assert tool_span["attributes"]["gen_ai.request.max_tokens"] == 100
-    assert tool_span["attributes"]["gen_ai.request.model"] == "gpt-4"
-    assert tool_span["attributes"]["gen_ai.request.temperature"] == 0.7
-    assert tool_span["attributes"]["gen_ai.request.top_p"] == 1.0
-    assert tool_span["attributes"]["gen_ai.system"] == "openai"
-    assert tool_span["attributes"]["gen_ai.tool.description"] == "A simple tool"
-    assert tool_span["attributes"]["gen_ai.tool.input"] == '{"message": "hello"}'
-    assert tool_span["attributes"]["gen_ai.tool.name"] == "simple_test_tool"
-    assert tool_span["attributes"]["gen_ai.tool.output"] == "Tool executed with: hello"
-    assert ai_client_span2["name"] == "chat gpt-4"
-    assert ai_client_span2["attributes"]["gen_ai.agent.name"] == "test_agent"
-    assert ai_client_span2["attributes"]["gen_ai.operation.name"] == "chat"
-
-    ai_client_span2_available_tool = json.loads(
-        ai_client_span2["attributes"]["gen_ai.request.available_tools"]
-    )[0]
+    if stream_gen_ai_spans:
+        assert tool_span["attributes"]["gen_ai.request.max_tokens"] == 100
+        assert tool_span["attributes"]["gen_ai.request.model"] == "gpt-4"
+        assert tool_span["attributes"]["gen_ai.request.temperature"] == 0.7
+        assert tool_span["attributes"]["gen_ai.request.top_p"] == 1.0
+        assert tool_span["attributes"]["gen_ai.system"] == "openai"
+        assert tool_span["attributes"]["gen_ai.tool.description"] == "A simple tool"
+        assert tool_span["attributes"]["gen_ai.tool.input"] == '{"message": "hello"}'
+        assert tool_span["attributes"]["gen_ai.tool.name"] == "simple_test_tool"
+        assert (
+            tool_span["attributes"]["gen_ai.tool.output"] == "Tool executed with: hello"
+        )
+        assert ai_client_span2["name"] == "chat gpt-4"
+        assert ai_client_span2["attributes"]["gen_ai.agent.name"] == "test_agent"
+        assert ai_client_span2["attributes"]["gen_ai.operation.name"] == "chat"
+
+        ai_client_span2_available_tool = json.loads(
+            ai_client_span2["attributes"]["gen_ai.request.available_tools"]
+        )[0]
+    else:
+        assert tool_span["data"]["gen_ai.request.max_tokens"] == 100
+        assert tool_span["data"]["gen_ai.request.model"] == "gpt-4"
+        assert tool_span["data"]["gen_ai.request.temperature"] == 0.7
+        assert tool_span["data"]["gen_ai.request.top_p"] == 1.0
+        assert tool_span["data"]["gen_ai.system"] == "openai"
+        assert tool_span["data"]["gen_ai.tool.description"] == "A simple tool"
+        assert tool_span["data"]["gen_ai.tool.input"] == '{"message": "hello"}'
+        assert tool_span["data"]["gen_ai.tool.name"] == "simple_test_tool"
+        assert tool_span["data"]["gen_ai.tool.output"] == "Tool executed with: hello"
+        assert ai_client_span2["description"] == "chat gpt-4"
+        assert ai_client_span2["data"]["gen_ai.agent.name"] == "test_agent"
+        assert ai_client_span2["data"]["gen_ai.operation.name"] == "chat"
+
+        ai_client_span2_available_tool = json.loads(
+            ai_client_span2["data"]["gen_ai.request.available_tools"]
+        )[0]
     assert all(
         ai_client_span2_available_tool[k] == v for k, v in available_tool.items()
     )
 
-    assert ai_client_span2["attributes"]["gen_ai.request.max_tokens"] == 100
-    assert ai_client_span2["attributes"]["gen_ai.request.messages"] == safe_serialize(
-        [
-            {
-                "role": "tool",
-                "content": [
-                    {
-                        "call_id": "call_123",
-                        "output": "Tool executed with: hello",
-                        "type": "function_call_output",
-                    }
-                ],
-            },
-        ]
-    )
-    assert ai_client_span2["attributes"]["gen_ai.request.model"] == "gpt-4"
-    assert ai_client_span2["attributes"]["gen_ai.request.temperature"] == 0.7
-    assert ai_client_span2["attributes"]["gen_ai.request.top_p"] == 1.0
-    assert (
-        ai_client_span2["attributes"]["gen_ai.response.text"]
-        == "Task completed using the tool"
-    )
-    assert ai_client_span2["attributes"]["gen_ai.system"] == "openai"
-    assert ai_client_span2["attributes"]["gen_ai.usage.input_tokens.cached"] == 0
-    assert ai_client_span2["attributes"]["gen_ai.usage.input_tokens"] == 15
-    assert ai_client_span2["attributes"]["gen_ai.usage.output_tokens.reasoning"] == 0
-    assert ai_client_span2["attributes"]["gen_ai.usage.output_tokens"] == 10
-    assert ai_client_span2["attributes"]["gen_ai.usage.total_tokens"] == 25
+    if stream_gen_ai_spans:
+        assert ai_client_span2["attributes"]["gen_ai.request.max_tokens"] == 100
+        assert ai_client_span2["attributes"][
+            "gen_ai.request.messages"
+        ] == safe_serialize(
+            [
+                {
+                    "role": "tool",
+                    "content": [
+                        {
+                            "call_id": "call_123",
+                            "output": "Tool executed with: hello",
+                            "type": "function_call_output",
+                        }
+                    ],
+                },
+            ]
+        )
+        assert ai_client_span2["attributes"]["gen_ai.request.model"] == "gpt-4"
+        assert ai_client_span2["attributes"]["gen_ai.request.temperature"] == 0.7
+        assert ai_client_span2["attributes"]["gen_ai.request.top_p"] == 1.0
+        assert (
+            ai_client_span2["attributes"]["gen_ai.response.text"]
+            == "Task completed using the tool"
+        )
+        assert ai_client_span2["attributes"]["gen_ai.system"] == "openai"
+        assert ai_client_span2["attributes"]["gen_ai.usage.input_tokens.cached"] == 0
+        assert ai_client_span2["attributes"]["gen_ai.usage.input_tokens"] == 15
+        assert (
+            ai_client_span2["attributes"]["gen_ai.usage.output_tokens.reasoning"] == 0
+        )
+        assert ai_client_span2["attributes"]["gen_ai.usage.output_tokens"] == 10
+        assert ai_client_span2["attributes"]["gen_ai.usage.total_tokens"] == 25
+    else:
+        assert ai_client_span2["data"]["gen_ai.request.max_tokens"] == 100
+        assert ai_client_span2["data"]["gen_ai.request.messages"] == safe_serialize(
+            [
+                {
+                    "role": "tool",
+                    "content": [
+                        {
+                            "call_id": "call_123",
+                            "output": "Tool executed with: hello",
+                            "type": "function_call_output",
+                        }
+                    ],
+                },
+            ]
+        )
+        assert ai_client_span2["data"]["gen_ai.request.model"] == "gpt-4"
+        assert ai_client_span2["data"]["gen_ai.request.temperature"] == 0.7
+        assert ai_client_span2["data"]["gen_ai.request.top_p"] == 1.0
+        assert (
+            ai_client_span2["data"]["gen_ai.response.text"]
+            == "Task completed using the tool"
+        )
+        assert ai_client_span2["data"]["gen_ai.system"] == "openai"
+        assert ai_client_span2["data"]["gen_ai.usage.input_tokens.cached"] == 0
+        assert ai_client_span2["data"]["gen_ai.usage.input_tokens"] == 15
+        assert ai_client_span2["data"]["gen_ai.usage.output_tokens.reasoning"] == 0
+        assert ai_client_span2["data"]["gen_ai.usage.output_tokens"] == 10
+        assert ai_client_span2["data"]["gen_ai.usage.total_tokens"] == 25
 
 
 @pytest.mark.asyncio
@@ -1626,8 +2363,15 @@ async def test_hosted_mcp_tool_propagation_headers(
         assert hosted_mcp_tool["headers"]["baggage"] == expected_outgoing_baggage
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
-async def test_model_behavior_error(sentry_init, capture_items, test_agent):
+async def test_model_behavior_error(
+    sentry_init,
+    capture_events,
+    capture_items,
+    test_agent,
+    stream_gen_ai_spans,
+):
     """
     Example raising agents.exceptions.AgentsException before the agent invocation span is complete.
     The mocked API response indicates that "wrong_tool" was called.
@@ -1641,63 +2385,129 @@ def simple_test_tool(message: str) -> str:
     # Create agent with the tool
     agent_with_tool = test_agent.clone(tools=[simple_test_tool])
 
-    with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
-        with patch(
-            "agents.models.openai_responses.OpenAIResponsesModel.get_response"
-        ) as mock_get_response:
-            # Create a mock response that includes tool calls
-            tool_call = ResponseFunctionToolCall(
-                id="call_123",
-                call_id="call_123",
-                name="wrong_tool",
-                type="function_call",
-                arguments='{"message": "hello"}',
-            )
-
-            tool_response = ModelResponse(
-                output=[tool_call],
-                usage=Usage(
-                    requests=1, input_tokens=10, output_tokens=5, total_tokens=15
-                ),
-                response_id="resp_tool_123",
-            )
+    if stream_gen_ai_spans:
+        with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
+            with patch(
+                "agents.models.openai_responses.OpenAIResponsesModel.get_response"
+            ) as mock_get_response:
+                # Create a mock response that includes tool calls
+                tool_call = ResponseFunctionToolCall(
+                    id="call_123",
+                    call_id="call_123",
+                    name="wrong_tool",
+                    type="function_call",
+                    arguments='{"message": "hello"}',
+                )
 
-            mock_get_response.side_effect = [tool_response]
+                tool_response = ModelResponse(
+                    output=[tool_call],
+                    usage=Usage(
+                        requests=1, input_tokens=10, output_tokens=5, total_tokens=15
+                    ),
+                    response_id="resp_tool_123",
+                )
 
-            sentry_init(
-                integrations=[OpenAIAgentsIntegration()],
-                traces_sample_rate=1.0,
-                send_default_pii=True,
+                mock_get_response.side_effect = [tool_response]
+
+                sentry_init(
+                    integrations=[OpenAIAgentsIntegration()],
+                    traces_sample_rate=1.0,
+                    send_default_pii=True,
+                    _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+                )
+
+                items = capture_items("span", "transaction")
+
+                with pytest.raises(ModelBehaviorError):
+                    await agents.Runner.run(
+                        agent_with_tool,
+                        "Please use the simple test tool",
+                        run_config=test_run_config,
+                    )
+
+            (transaction,) = (
+                item.payload for item in items if item.type == "transaction"
             )
+            assert transaction["transaction"] == "test_agent workflow"
+            assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
 
-            items = capture_items("span", "transaction")
+            spans = [item.payload for item in items if item.type == "span"]
 
-            with pytest.raises(ModelBehaviorError):
-                await agents.Runner.run(
-                    agent_with_tool,
-                    "Please use the simple test tool",
-                    run_config=test_run_config,
+            (
+                agent_span,
+                ai_client_span1,
+            ) = spans
+            assert agent_span["name"] == "invoke_agent test_agent"
+            assert agent_span["attributes"]["sentry.origin"] == "auto.ai.openai_agents"
+
+            # Error due to unrecognized tool in model response.
+            assert agent_span["status"] == "error"
+    else:
+        with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
+            with patch(
+                "agents.models.openai_responses.OpenAIResponsesModel.get_response"
+            ) as mock_get_response:
+                # Create a mock response that includes tool calls
+                tool_call = ResponseFunctionToolCall(
+                    id="call_123",
+                    call_id="call_123",
+                    name="wrong_tool",
+                    type="function_call",
+                    arguments='{"message": "hello"}',
                 )
 
-    (transaction,) = (item.payload for item in items if item.type == "transaction")
-    assert transaction["transaction"] == "test_agent workflow"
-    assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
+                tool_response = ModelResponse(
+                    output=[tool_call],
+                    usage=Usage(
+                        requests=1, input_tokens=10, output_tokens=5, total_tokens=15
+                    ),
+                    response_id="resp_tool_123",
+                )
 
-    spans = [item.payload for item in items if item.type == "span"]
-    (
-        agent_span,
-        ai_client_span1,
-    ) = spans
+                mock_get_response.side_effect = [tool_response]
+
+                sentry_init(
+                    integrations=[OpenAIAgentsIntegration()],
+                    traces_sample_rate=1.0,
+                    send_default_pii=True,
+                    _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+                )
+                events = capture_events()
+
+                with pytest.raises(ModelBehaviorError):
+                    await agents.Runner.run(
+                        agent_with_tool,
+                        "Please use the simple test tool",
+                        run_config=test_run_config,
+                    )
+
+            (error, transaction) = events
+            spans = transaction["spans"]
+            (
+                agent_span,
+                ai_client_span1,
+            ) = spans
 
-    assert agent_span["name"] == "invoke_agent test_agent"
-    assert agent_span["attributes"]["sentry.origin"] == "auto.ai.openai_agents"
+            assert transaction["transaction"] == "test_agent workflow"
+            assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
 
-    # Error due to unrecognized tool in model response.
-    assert agent_span["status"] == "error"
+            assert agent_span["description"] == "invoke_agent test_agent"
+            assert agent_span["origin"] == "auto.ai.openai_agents"
 
+            # Error due to unrecognized tool in model response.
+            assert agent_span["status"] == "internal_error"
+            assert agent_span["tags"]["status"] == "internal_error"
 
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
-async def test_error_handling(sentry_init, capture_items, test_agent):
+async def test_error_handling(
+    sentry_init,
+    capture_events,
+    capture_items,
+    test_agent,
+    stream_gen_ai_spans,
+):
     """
     Test error handling in agent execution.
     """
@@ -1714,37 +2524,100 @@ async def test_error_handling(sentry_init, capture_items, test_agent):
                     LoggingIntegration(event_level=logging.CRITICAL),
                 ],
                 traces_sample_rate=1.0,
+                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
             )
 
-            items = capture_items("event", "span", "transaction")
+            if stream_gen_ai_spans:
+                items = capture_items("event", "span", "transaction")
 
-            with pytest.raises(Exception, match="Model Error"):
-                await agents.Runner.run(
-                    test_agent, "Test input", run_config=test_run_config
+                with pytest.raises(Exception, match="Model Error"):
+                    await agents.Runner.run(
+                        test_agent, "Test input", run_config=test_run_config
+                    )
+
+                (error_event,) = (
+                    item.payload for item in items if item.type == "event"
+                )
+
+                assert error_event["exception"]["values"][0]["type"] == "Exception"
+                assert error_event["exception"]["values"][0]["value"] == "Model Error"
+                assert (
+                    error_event["exception"]["values"][0]["mechanism"]["type"]
+                    == "openai_agents"
+                )
+
+                (transaction,) = (
+                    item.payload for item in items if item.type == "transaction"
+                )
+
+                assert transaction["transaction"] == "test_agent workflow"
+                assert (
+                    transaction["contexts"]["trace"]["origin"]
+                    == "auto.ai.openai_agents"
+                )
+
+                spans = [item.payload for item in items if item.type == "span"]
+                (invoke_agent_span, ai_client_span) = spans
+
+                assert invoke_agent_span["name"] == "invoke_agent test_agent"
+                assert (
+                    invoke_agent_span["attributes"]["sentry.origin"]
+                    == "auto.ai.openai_agents"
                 )
 
-    (error_event,) = (item.payload for item in items if item.type == "event")
-    assert error_event["exception"]["values"][0]["type"] == "Exception"
-    assert error_event["exception"]["values"][0]["value"] == "Model Error"
-    assert error_event["exception"]["values"][0]["mechanism"]["type"] == "openai_agents"
+                assert ai_client_span["name"] == "chat gpt-4"
+                assert (
+                    ai_client_span["attributes"]["sentry.origin"]
+                    == "auto.ai.openai_agents"
+                )
+                assert ai_client_span["status"] == "error"
+            else:
+                events = capture_events()
+
+                with pytest.raises(Exception, match="Model Error"):
+                    await agents.Runner.run(
+                        test_agent, "Test input", run_config=test_run_config
+                    )
+
+                (
+                    error_event,
+                    transaction,
+                ) = events
+
+                assert error_event["exception"]["values"][0]["type"] == "Exception"
+                assert error_event["exception"]["values"][0]["value"] == "Model Error"
+                assert (
+                    error_event["exception"]["values"][0]["mechanism"]["type"]
+                    == "openai_agents"
+                )
 
-    (transaction,) = (item.payload for item in items if item.type == "transaction")
-    assert transaction["transaction"] == "test_agent workflow"
-    assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
+                spans = transaction["spans"]
+                (invoke_agent_span, ai_client_span) = spans
 
-    spans = [item.payload for item in items if item.type == "span"]
-    (invoke_agent_span, ai_client_span) = spans
+                assert transaction["transaction"] == "test_agent workflow"
+                assert (
+                    transaction["contexts"]["trace"]["origin"]
+                    == "auto.ai.openai_agents"
+                )
 
-    assert invoke_agent_span["name"] == "invoke_agent test_agent"
-    assert invoke_agent_span["attributes"]["sentry.origin"] == "auto.ai.openai_agents"
+                assert invoke_agent_span["description"] == "invoke_agent test_agent"
+                assert invoke_agent_span["origin"] == "auto.ai.openai_agents"
 
-    assert ai_client_span["name"] == "chat gpt-4"
-    assert ai_client_span["attributes"]["sentry.origin"] == "auto.ai.openai_agents"
-    assert ai_client_span["status"] == "error"
+                assert ai_client_span["description"] == "chat gpt-4"
+                assert ai_client_span["origin"] == "auto.ai.openai_agents"
+                assert ai_client_span["status"] == "internal_error"
+                assert ai_client_span["tags"]["status"] == "internal_error"
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
-async def test_error_captures_input_data(sentry_init, capture_items, test_agent):
+async def test_error_captures_input_data(
+    sentry_init,
+    capture_events,
+    capture_items,
+    test_agent,
+    stream_gen_ai_spans,
+):
     """
     Test that input data is captured even when the API call raises an exception.
     This verifies that _set_input_data is called before the API call.
@@ -1775,36 +2648,69 @@ async def test_error_captures_input_data(sentry_init, capture_items, test_agent)
             ],
             traces_sample_rate=1.0,
             send_default_pii=True,
+            _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
         )
 
-        items = capture_items("event", "span")
+        if stream_gen_ai_spans:
+            items = capture_items("event", "span")
+        else:
+            events = capture_events()
 
         with pytest.raises(InternalServerError, match="Error code: 500"):
             await agents.Runner.run(agent, "Test input", run_config=test_run_config)
 
-    (error_event,) = (item.payload for item in items if item.type == "event")
+    if stream_gen_ai_spans:
+        (error_event,) = (item.payload for item in items if item.type == "event")
+    else:
+        (
+            error_event,
+            transaction,
+        ) = events
+
     assert error_event["exception"]["values"][0]["type"] == "InternalServerError"
     assert error_event["exception"]["values"][0]["value"] == "Error code: 500"
 
-    spans = [item.payload for item in items if item.type == "span"]
-    ai_client_span = [
-        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
-    ][0]
+    if stream_gen_ai_spans:
+        spans = [item.payload for item in items if item.type == "span"]
+        ai_client_span = [
+            s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+        ][0]
+
+        assert ai_client_span["name"] == "chat gpt-4"
+        assert ai_client_span["status"] == "error"
+
+        assert "gen_ai.request.messages" in ai_client_span["attributes"]
+    else:
+        spans = transaction["spans"]
+        ai_client_span = [s for s in spans if s["op"] == "gen_ai.chat"][0]
 
-    assert ai_client_span["name"] == "chat gpt-4"
-    assert ai_client_span["status"] == "error"
+        assert ai_client_span["description"] == "chat gpt-4"
+        assert ai_client_span["status"] == "internal_error"
+        assert ai_client_span["tags"]["status"] == "internal_error"
 
-    assert "gen_ai.request.messages" in ai_client_span["attributes"]
+        assert "gen_ai.request.messages" in ai_client_span["data"]
     request_messages = safe_serialize(
         [
             {"role": "user", "content": [{"type": "text", "text": "Test input"}]},
         ]
     )
-    assert ai_client_span["attributes"]["gen_ai.request.messages"] == request_messages
+    if stream_gen_ai_spans:
+        assert (
+            ai_client_span["attributes"]["gen_ai.request.messages"] == request_messages
+        )
+    else:
+        assert ai_client_span["data"]["gen_ai.request.messages"] == request_messages
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
-async def test_span_status_error(sentry_init, capture_items, test_agent):
+async def test_span_status_error(
+    sentry_init,
+    capture_events,
+    capture_items,
+    test_agent,
+    stream_gen_ai_spans,
+):
     with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
         with patch(
             "agents.models.openai_responses.OpenAIResponsesModel.get_response"
@@ -1817,28 +2723,51 @@ async def test_span_status_error(sentry_init, capture_items, test_agent):
                     LoggingIntegration(event_level=logging.CRITICAL),
                 ],
                 traces_sample_rate=1.0,
+                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
             )
 
-            items = capture_items("event", "transaction", "span")
+            if stream_gen_ai_spans:
+                items = capture_items("event", "transaction", "span")
 
-            with pytest.raises(ValueError, match="Model Error"):
-                await agents.Runner.run(
-                    test_agent, "Test input", run_config=test_run_config
+                with pytest.raises(ValueError, match="Model Error"):
+                    await agents.Runner.run(
+                        test_agent, "Test input", run_config=test_run_config
+                    )
+
+                (error,) = (item.payload for item in items if item.type == "event")
+                assert error["level"] == "error"
+
+                spans = [item.payload for item in items if item.type == "span"]
+                assert spans[0]["status"] == "error"
+
+                (transaction,) = (
+                    item.payload for item in items if item.type == "transaction"
                 )
+            else:
+                events = capture_events()
 
-    (error,) = (item.payload for item in items if item.type == "event")
-    assert error["level"] == "error"
+                with pytest.raises(ValueError, match="Model Error"):
+                    await agents.Runner.run(
+                        test_agent, "Test input", run_config=test_run_config
+                    )
 
-    spans = [item.payload for item in items if item.type == "span"]
-    assert spans[0]["status"] == "error"
+                (error, transaction) = events
+                assert error["level"] == "error"
+                assert transaction["spans"][0]["status"] == "internal_error"
+                assert transaction["spans"][0]["tags"]["status"] == "internal_error"
 
-    (transaction,) = (item.payload for item in items if item.type == "transaction")
     assert transaction["contexts"]["trace"]["status"] == "internal_error"
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
 async def test_mcp_tool_execution_spans(
-    sentry_init, capture_items, test_agent, get_model_response
+    sentry_init,
+    capture_events,
+    capture_items,
+    test_agent,
+    get_model_response,
+    stream_gen_ai_spans,
 ):
     """
     Test that MCP (Model Context Protocol) tool calls create execute_tool spans.
@@ -1930,45 +2859,88 @@ async def test_mcp_tool_execution_spans(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=True,
+            _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
         )
 
-        items = capture_items("span", "transaction")
+        if stream_gen_ai_spans:
+            items = capture_items("span", "transaction")
 
-        await agents.Runner.run(
-            agent,
-            "Please use MCP tool",
-            run_config=test_run_config,
-        )
+            await agents.Runner.run(
+                agent,
+                "Please use MCP tool",
+                run_config=test_run_config,
+            )
 
-    spans = [item.payload for item in items if item.type == "span"]
-
-    # Find the MCP execute_tool span
-    mcp_tool_span = None
-    for span in spans:
-        if span.get("name") == "execute_tool test_mcp_tool":
-            mcp_tool_span = span
-            break
-
-    # Verify the MCP tool span was created
-    assert mcp_tool_span is not None, "MCP execute_tool span was not created"
-    assert mcp_tool_span["name"] == "execute_tool test_mcp_tool"
-    assert mcp_tool_span["attributes"]["gen_ai.tool.name"] == "test_mcp_tool"
-    assert (
-        mcp_tool_span["attributes"]["gen_ai.tool.input"] == '{"query": "search term"}'
-    )
-    assert (
-        mcp_tool_span["attributes"]["gen_ai.tool.output"]
-        == "MCP tool executed successfully"
-    )
+            spans = [item.payload for item in items if item.type == "span"]
+
+            # Find the MCP execute_tool span
+            mcp_tool_span = None
+            for span in spans:
+                if span.get("name") == "execute_tool test_mcp_tool":
+                    mcp_tool_span = span
+                    break
+
+            # Verify the MCP tool span was created
+            assert mcp_tool_span is not None, "MCP execute_tool span was not created"
+            assert mcp_tool_span["name"] == "execute_tool test_mcp_tool"
+            assert mcp_tool_span["attributes"]["gen_ai.tool.name"] == "test_mcp_tool"
+            assert (
+                mcp_tool_span["attributes"]["gen_ai.tool.input"]
+                == '{"query": "search term"}'
+            )
+            assert (
+                mcp_tool_span["attributes"]["gen_ai.tool.output"]
+                == "MCP tool executed successfully"
+            )
+
+            # Verify no error status since error was None
+            assert mcp_tool_span.get("status") != "error"
+            assert mcp_tool_span.get("tags", {}).get("status") != "error"
+        else:
+            events = capture_events()
 
-    # Verify no error status since error was None
-    assert mcp_tool_span.get("status") != "error"
-    assert mcp_tool_span.get("tags", {}).get("status") != "error"
+            await agents.Runner.run(
+                agent,
+                "Please use MCP tool",
+                run_config=test_run_config,
+            )
+
+            (transaction,) = events
+            spans = transaction["spans"]
+
+            # Find the MCP execute_tool span
+            mcp_tool_span = None
+            for span in spans:
+                if span.get("description") == "execute_tool test_mcp_tool":
+                    mcp_tool_span = span
+                    break
+
+            # Verify the MCP tool span was created
+            assert mcp_tool_span is not None, "MCP execute_tool span was not created"
+            assert mcp_tool_span["description"] == "execute_tool test_mcp_tool"
+            assert mcp_tool_span["data"]["gen_ai.tool.name"] == "test_mcp_tool"
+            assert (
+                mcp_tool_span["data"]["gen_ai.tool.input"] == '{"query": "search term"}'
+            )
+            assert (
+                mcp_tool_span["data"]["gen_ai.tool.output"]
+                == "MCP tool executed successfully"
+            )
 
+            # Verify no error status since error was None
+            assert mcp_tool_span.get("status") != "internal_error"
+            assert mcp_tool_span.get("tags", {}).get("status") != "internal_error"
 
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
 async def test_mcp_tool_execution_with_error(
-    sentry_init, capture_items, test_agent, get_model_response
+    sentry_init,
+    capture_events,
+    capture_items,
+    test_agent,
+    get_model_response,
+    stream_gen_ai_spans,
 ):
     """
     Test that MCP tool calls with errors are tracked with error status.
@@ -2060,9 +3032,13 @@ async def test_mcp_tool_execution_with_error(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=True,
+            _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
         )
 
-        items = capture_items("span", "transaction")
+        if stream_gen_ai_spans:
+            items = capture_items("span", "transaction")
+        else:
+            events = capture_events()
 
         await agents.Runner.run(
             agent,
@@ -2070,29 +3046,57 @@ async def test_mcp_tool_execution_with_error(
             run_config=test_run_config,
         )
 
-    spans = [item.payload for item in items if item.type == "span"]
+    if stream_gen_ai_spans:
+        spans = [item.payload for item in items if item.type == "span"]
+
+        # Find the MCP execute_tool span with error
+        mcp_tool_span = None
+        for span in spans:
+            if span.get("name") == "execute_tool failing_mcp_tool":
+                mcp_tool_span = span
+                break
+
+        # Verify the MCP tool span was created with error status
+        assert mcp_tool_span is not None, "MCP execute_tool span was not created"
+        assert mcp_tool_span["name"] == "execute_tool failing_mcp_tool"
+        assert mcp_tool_span["attributes"]["gen_ai.tool.name"] == "failing_mcp_tool"
+        assert mcp_tool_span["attributes"]["gen_ai.tool.input"] == '{"query": "test"}'
+        assert mcp_tool_span["attributes"]["gen_ai.tool.output"] == "None"
+
+        # Verify error status was set
+        assert mcp_tool_span["status"] == "error"
+    else:
+        (transaction,) = events
+        spans = transaction["spans"]
 
-    # Find the MCP execute_tool span with error
-    mcp_tool_span = None
-    for span in spans:
-        if span.get("name") == "execute_tool failing_mcp_tool":
-            mcp_tool_span = span
-            break
+        # Find the MCP execute_tool span with error
+        mcp_tool_span = None
+        for span in spans:
+            if span.get("description") == "execute_tool failing_mcp_tool":
+                mcp_tool_span = span
+                break
 
-    # Verify the MCP tool span was created with error status
-    assert mcp_tool_span is not None, "MCP execute_tool span was not created"
-    assert mcp_tool_span["name"] == "execute_tool failing_mcp_tool"
-    assert mcp_tool_span["attributes"]["gen_ai.tool.name"] == "failing_mcp_tool"
-    assert mcp_tool_span["attributes"]["gen_ai.tool.input"] == '{"query": "test"}'
-    assert mcp_tool_span["attributes"]["gen_ai.tool.output"] == "None"
+        # Verify the MCP tool span was created with error status
+        assert mcp_tool_span is not None, "MCP execute_tool span was not created"
+        assert mcp_tool_span["description"] == "execute_tool failing_mcp_tool"
+        assert mcp_tool_span["data"]["gen_ai.tool.name"] == "failing_mcp_tool"
+        assert mcp_tool_span["data"]["gen_ai.tool.input"] == '{"query": "test"}'
+        assert mcp_tool_span["data"]["gen_ai.tool.output"] is None
 
-    # Verify error status was set
-    assert mcp_tool_span["status"] == "error"
+        # Verify error status was set
+        assert mcp_tool_span["status"] == "internal_error"
+        assert mcp_tool_span["tags"]["status"] == "internal_error"
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
 async def test_mcp_tool_execution_without_pii(
-    sentry_init, capture_items, test_agent, get_model_response
+    sentry_init,
+    capture_events,
+    capture_items,
+    test_agent,
+    get_model_response,
+    stream_gen_ai_spans,
 ):
     """
     Test that MCP tool input/output are not included when send_default_pii is False.
@@ -2184,42 +3188,74 @@ async def test_mcp_tool_execution_without_pii(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=False,  # PII disabled
+            _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
         )
 
-        items = capture_items("span", "transaction")
+        if stream_gen_ai_spans:
+            items = capture_items("span", "transaction")
 
-        await agents.Runner.run(
-            agent,
-            "Please use MCP tool",
-            run_config=test_run_config,
-        )
+            await agents.Runner.run(
+                agent,
+                "Please use MCP tool",
+                run_config=test_run_config,
+            )
+
+            spans = [item.payload for item in items if item.type == "span"]
+
+            # Find the MCP execute_tool span
+            mcp_tool_span = None
+            for span in spans:
+                if span.get("name") == "execute_tool test_mcp_tool":
+                    mcp_tool_span = span
+                    break
+
+            # Verify the MCP tool span was created but without input/output
+            assert mcp_tool_span is not None, "MCP execute_tool span was not created"
+            assert mcp_tool_span["name"] == "execute_tool test_mcp_tool"
+            assert mcp_tool_span["attributes"]["gen_ai.tool.name"] == "test_mcp_tool"
+
+            # Verify input and output are not included when send_default_pii is False
+            assert "gen_ai.tool.input" not in mcp_tool_span["attributes"]
+            assert "gen_ai.tool.output" not in mcp_tool_span["attributes"]
+        else:
+            events = capture_events()
+
+            await agents.Runner.run(
+                agent,
+                "Please use MCP tool",
+                run_config=test_run_config,
+            )
 
-    spans = [item.payload for item in items if item.type == "span"]
+            (transaction,) = events
+            spans = transaction["spans"]
 
-    # Find the MCP execute_tool span
-    mcp_tool_span = None
-    for span in spans:
-        if span.get("name") == "execute_tool test_mcp_tool":
-            mcp_tool_span = span
-            break
+            # Find the MCP execute_tool span
+            mcp_tool_span = None
+            for span in spans:
+                if span.get("description") == "execute_tool test_mcp_tool":
+                    mcp_tool_span = span
+                    break
 
-    # Verify the MCP tool span was created but without input/output
-    assert mcp_tool_span is not None, "MCP execute_tool span was not created"
-    assert mcp_tool_span["name"] == "execute_tool test_mcp_tool"
-    assert mcp_tool_span["attributes"]["gen_ai.tool.name"] == "test_mcp_tool"
+            # Verify the MCP tool span was created but without input/output
+            assert mcp_tool_span is not None, "MCP execute_tool span was not created"
+            assert mcp_tool_span["description"] == "execute_tool test_mcp_tool"
+            assert mcp_tool_span["data"]["gen_ai.tool.name"] == "test_mcp_tool"
 
-    # Verify input and output are not included when send_default_pii is False
-    assert "gen_ai.tool.input" not in mcp_tool_span["attributes"]
-    assert "gen_ai.tool.output" not in mcp_tool_span["attributes"]
+            # Verify input and output are not included when send_default_pii is False
+            assert "gen_ai.tool.input" not in mcp_tool_span["data"]
+            assert "gen_ai.tool.output" not in mcp_tool_span["data"]
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
 async def test_multiple_agents_asyncio(
     sentry_init,
+    capture_events,
     capture_items,
     test_agent,
     nonstreaming_responses_model_response,
     get_model_response,
+    stream_gen_ai_spans,
 ):
     """
     Test that multiple agents can be run at the same time in asyncio tasks
@@ -2241,10 +3277,9 @@ async def test_multiple_agents_asyncio(
         sentry_init(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
+            _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
         )
 
-        items = capture_items("span", "transaction")
-
         async def run():
             await agents.Runner.run(
                 starting_agent=agent,
@@ -2252,12 +3287,31 @@ async def run():
                 run_config=test_run_config,
             )
 
-        await asyncio.gather(*[run() for _ in range(3)])
+        if stream_gen_ai_spans:
+            items = capture_items("span", "transaction")
+
+            await asyncio.gather(*[run() for _ in range(3)])
+
+            txn1, txn2, txn3 = (
+                item.payload for item in items if item.type == "transaction"
+            )
+
+            assert txn1["transaction"] == "test_agent workflow"
+            assert txn2["transaction"] == "test_agent workflow"
+        else:
+            events = capture_events()
+
+            await asyncio.gather(*[run() for _ in range(3)])
 
-    txn1, txn2, txn3 = (item.payload for item in items if item.type == "transaction")
+            assert len(events) == 3
+            txn1, txn2, txn3 = events
+
+            assert txn1["type"] == "transaction"
+            assert txn1["transaction"] == "test_agent workflow"
+            assert txn2["type"] == "transaction"
+            assert txn2["transaction"] == "test_agent workflow"
+            assert txn3["type"] == "transaction"
 
-    assert txn1["transaction"] == "test_agent workflow"
-    assert txn2["transaction"] == "test_agent workflow"
     assert txn3["transaction"] == "test_agent workflow"
 
 
@@ -2303,13 +3357,16 @@ def test_openai_agents_message_role_mapping(
     assert stored_messages[0]["role"] == expected_role
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
 async def test_tool_execution_error_tracing(
     sentry_init,
+    capture_events,
     capture_items,
     test_agent,
     get_model_response,
     responses_tool_call_model_responses,
+    stream_gen_ai_spans,
 ):
     """
     Test that tool execution errors are properly tracked via error tracing patch.
@@ -2383,46 +3440,84 @@ def failing_tool(message: str) -> str:
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=True,
+            _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
         )
 
-        items = capture_items("span", "transaction")
+        if stream_gen_ai_spans:
+            items = capture_items("span", "transaction")
 
-        # Note: The agents library catches tool exceptions internally,
-        # so we don't expect this to raise
-        await agents.Runner.run(
-            agent_with_tool,
-            "Please use the failing tool",
-            run_config=test_run_config,
-        )
+            # Note: The agents library catches tool exceptions internally,
+            # so we don't expect this to raise
+            await agents.Runner.run(
+                agent_with_tool,
+                "Please use the failing tool",
+                run_config=test_run_config,
+            )
+
+            spans = [item.payload for item in items if item.type == "span"]
+
+            # Find the execute_tool span
+            execute_tool_span = None
+            for span in spans:
+                description = span.get("name", "")
+                if description is not None and description.startswith(
+                    "execute_tool failing_tool"
+                ):
+                    execute_tool_span = span
+                    break
+
+            # Verify the execute_tool span was created
+            assert execute_tool_span is not None, "execute_tool span was not created"
+            assert execute_tool_span["name"] == "execute_tool failing_tool"
+            assert execute_tool_span["attributes"]["gen_ai.tool.name"] == "failing_tool"
+
+            # Verify error status was set (this is the key test for our patch)
+            # The span should be marked as error because the tool execution failed
+            assert execute_tool_span["status"] == "error"
+        else:
+            events = capture_events()
+
+            # Note: The agents library catches tool exceptions internally,
+            # so we don't expect this to raise
+            await agents.Runner.run(
+                agent_with_tool,
+                "Please use the failing tool",
+                run_config=test_run_config,
+            )
 
-    spans = [item.payload for item in items if item.type == "span"]
+            (transaction,) = events
+            spans = transaction["spans"]
 
-    # Find the execute_tool span
-    execute_tool_span = None
-    for span in spans:
-        description = span.get("name", "")
-        if description is not None and description.startswith(
-            "execute_tool failing_tool"
-        ):
-            execute_tool_span = span
-            break
+            # Find the execute_tool span
+            execute_tool_span = None
+            for span in spans:
+                description = span.get("description", "")
+                if description is not None and description.startswith(
+                    "execute_tool failing_tool"
+                ):
+                    execute_tool_span = span
+                    break
 
-    # Verify the execute_tool span was created
-    assert execute_tool_span is not None, "execute_tool span was not created"
-    assert execute_tool_span["name"] == "execute_tool failing_tool"
-    assert execute_tool_span["attributes"]["gen_ai.tool.name"] == "failing_tool"
+            # Verify the execute_tool span was created
+            assert execute_tool_span is not None, "execute_tool span was not created"
+            assert execute_tool_span["description"] == "execute_tool failing_tool"
+            assert execute_tool_span["data"]["gen_ai.tool.name"] == "failing_tool"
 
-    # Verify error status was set (this is the key test for our patch)
-    # The span should be marked as error because the tool execution failed
-    assert execute_tool_span["status"] == "error"
+            # Verify error status was set (this is the key test for our patch)
+            # The span should be marked as error because the tool execution failed
+            assert execute_tool_span["status"] == "internal_error"
+            assert execute_tool_span["tags"]["status"] == "internal_error"
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
 async def test_invoke_agent_span_includes_usage_data(
     sentry_init,
+    capture_events,
     capture_items,
     test_agent,
     get_model_response,
+    stream_gen_ai_spans,
 ):
     """
     Test that invoke_agent spans include aggregated usage data from context_wrapper.
@@ -2480,42 +3575,80 @@ async def test_invoke_agent_span_includes_usage_data(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=True,
+            _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
         )
 
-        items = capture_items("span", "transaction")
+        if stream_gen_ai_spans:
+            items = capture_items("span", "transaction")
+
+            result = await agents.Runner.run(
+                agent, "Test input", run_config=test_run_config
+            )
 
-        result = await agents.Runner.run(
-            agent, "Test input", run_config=test_run_config
-        )
+            assert result is not None
 
-        assert result is not None
+            spans = [item.payload for item in items if item.type == "span"]
+            invoke_agent_span = next(
+                span
+                for span in spans
+                if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
+            )
 
-    spans = [item.payload for item in items if item.type == "span"]
-    invoke_agent_span = next(
-        span
-        for span in spans
-        if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
-    )
+            # Verify invoke_agent span has usage data from context_wrapper
+            assert invoke_agent_span["name"] == "invoke_agent test_agent"
+            assert "gen_ai.usage.input_tokens" in invoke_agent_span["attributes"]
+            assert "gen_ai.usage.output_tokens" in invoke_agent_span["attributes"]
+            assert "gen_ai.usage.total_tokens" in invoke_agent_span["attributes"]
+
+            assert invoke_agent_span["attributes"]["gen_ai.usage.input_tokens"] == 10
+            assert invoke_agent_span["attributes"]["gen_ai.usage.output_tokens"] == 20
+            assert invoke_agent_span["attributes"]["gen_ai.usage.total_tokens"] == 30
+            assert (
+                invoke_agent_span["attributes"]["gen_ai.usage.input_tokens.cached"] == 0
+            )
+            assert (
+                invoke_agent_span["attributes"]["gen_ai.usage.output_tokens.reasoning"]
+                == 5
+            )
+        else:
+            events = capture_events()
 
-    # Verify invoke_agent span has usage data from context_wrapper
-    assert invoke_agent_span["name"] == "invoke_agent test_agent"
-    assert "gen_ai.usage.input_tokens" in invoke_agent_span["attributes"]
-    assert "gen_ai.usage.output_tokens" in invoke_agent_span["attributes"]
-    assert "gen_ai.usage.total_tokens" in invoke_agent_span["attributes"]
+            result = await agents.Runner.run(
+                agent, "Test input", run_config=test_run_config
+            )
+
+            assert result is not None
+
+            (transaction,) = events
+            spans = transaction["spans"]
+            invoke_agent_span = next(
+                span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT
+            )
 
-    assert invoke_agent_span["attributes"]["gen_ai.usage.input_tokens"] == 10
-    assert invoke_agent_span["attributes"]["gen_ai.usage.output_tokens"] == 20
-    assert invoke_agent_span["attributes"]["gen_ai.usage.total_tokens"] == 30
-    assert invoke_agent_span["attributes"]["gen_ai.usage.input_tokens.cached"] == 0
-    assert invoke_agent_span["attributes"]["gen_ai.usage.output_tokens.reasoning"] == 5
+            # Verify invoke_agent span has usage data from context_wrapper
+            assert invoke_agent_span["description"] == "invoke_agent test_agent"
+            assert "gen_ai.usage.input_tokens" in invoke_agent_span["data"]
+            assert "gen_ai.usage.output_tokens" in invoke_agent_span["data"]
+            assert "gen_ai.usage.total_tokens" in invoke_agent_span["data"]
+
+            assert invoke_agent_span["data"]["gen_ai.usage.input_tokens"] == 10
+            assert invoke_agent_span["data"]["gen_ai.usage.output_tokens"] == 20
+            assert invoke_agent_span["data"]["gen_ai.usage.total_tokens"] == 30
+            assert invoke_agent_span["data"]["gen_ai.usage.input_tokens.cached"] == 0
+            assert (
+                invoke_agent_span["data"]["gen_ai.usage.output_tokens.reasoning"] == 5
+            )
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
 async def test_ai_client_span_includes_response_model(
     sentry_init,
+    capture_events,
     capture_items,
     test_agent,
     get_model_response,
+    stream_gen_ai_spans,
 ):
     """
     Test that ai_client spans (gen_ai.chat) include the response model from the actual API response.
@@ -2573,32 +3706,63 @@ async def test_ai_client_span_includes_response_model(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
             send_default_pii=True,
+            _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
         )
 
-        items = capture_items("span", "transaction")
+        if stream_gen_ai_spans:
+            items = capture_items("span", "transaction")
 
-        result = await agents.Runner.run(
-            agent, "Test input", run_config=test_run_config
-        )
+            result = await agents.Runner.run(
+                agent, "Test input", run_config=test_run_config
+            )
 
-        assert result is not None
+            assert result is not None
 
-    spans = [item.payload for item in items if item.type == "span"]
-    ai_client_span = next(
-        span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-    )
+            spans = [item.payload for item in items if item.type == "span"]
+            ai_client_span = next(
+                span
+                for span in spans
+                if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+            )
+
+            # Verify ai_client span has response model from API response
+            assert ai_client_span["name"] == "chat gpt-4"
+            assert "gen_ai.response.model" in ai_client_span["attributes"]
+            assert (
+                ai_client_span["attributes"]["gen_ai.response.model"]
+                == "gpt-4.1-2025-04-14"
+            )
+        else:
+            events = capture_events()
+
+            result = await agents.Runner.run(
+                agent, "Test input", run_config=test_run_config
+            )
 
-    # Verify ai_client span has response model from API response
-    assert ai_client_span["name"] == "chat gpt-4"
-    assert "gen_ai.response.model" in ai_client_span["attributes"]
-    assert ai_client_span["attributes"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14"
+            assert result is not None
 
+            (transaction,) = events
+            spans = transaction["spans"]
+            ai_client_span = next(
+                span for span in spans if span["op"] == OP.GEN_AI_CHAT
+            )
+
+            # Verify ai_client span has response model from API response
+            assert ai_client_span["description"] == "chat gpt-4"
+            assert "gen_ai.response.model" in ai_client_span["data"]
+            assert (
+                ai_client_span["data"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14"
+            )
 
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
 async def test_ai_client_span_response_model_with_chat_completions(
     sentry_init,
+    capture_events,
     capture_items,
     get_model_response,
+    stream_gen_ai_spans,
 ):
     """
     Test that response model is captured when using ChatCompletions API (not Responses API).
@@ -2661,32 +3825,63 @@ async def test_ai_client_span_response_model_with_chat_completions(
         sentry_init(
             integrations=[OpenAIAgentsIntegration()],
             traces_sample_rate=1.0,
+            _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
         )
 
-        items = capture_items("span", "transaction")
+        if stream_gen_ai_spans:
+            items = capture_items("span", "transaction")
 
-        result = await agents.Runner.run(
-            agent, "Test input", run_config=test_run_config
-        )
+            result = await agents.Runner.run(
+                agent, "Test input", run_config=test_run_config
+            )
 
-        assert result is not None
+            assert result is not None
 
-    spans = [item.payload for item in items if item.type == "span"]
-    ai_client_span = next(
-        span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-    )
+            spans = [item.payload for item in items if item.type == "span"]
+            ai_client_span = next(
+                span
+                for span in spans
+                if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+            )
 
-    # Verify response model from API response is captured
-    assert "gen_ai.response.model" in ai_client_span["attributes"]
-    assert (
-        ai_client_span["attributes"]["gen_ai.response.model"]
-        == "gpt-4o-mini-2024-07-18"
-    )
+            # Verify response model from API response is captured
+            assert "gen_ai.response.model" in ai_client_span["attributes"]
+            assert (
+                ai_client_span["attributes"]["gen_ai.response.model"]
+                == "gpt-4o-mini-2024-07-18"
+            )
+        else:
+            events = capture_events()
+
+            result = await agents.Runner.run(
+                agent, "Test input", run_config=test_run_config
+            )
 
+            assert result is not None
 
+            (transaction,) = events
+            spans = transaction["spans"]
+            ai_client_span = next(
+                span for span in spans if span["op"] == OP.GEN_AI_CHAT
+            )
+
+            # Verify response model from API response is captured
+            assert "gen_ai.response.model" in ai_client_span["data"]
+            assert (
+                ai_client_span["data"]["gen_ai.response.model"]
+                == "gpt-4o-mini-2024-07-18"
+            )
+
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
 async def test_multiple_llm_calls_aggregate_usage(
-    sentry_init, capture_items, test_agent, get_model_response
+    sentry_init,
+    capture_events,
+    capture_items,
+    test_agent,
+    get_model_response,
+    stream_gen_ai_spans,
 ):
     """
     Test that invoke_agent spans show aggregated usage across multiple LLM calls
@@ -2774,47 +3969,91 @@ def calculator(a: int, b: int) -> int:
         serialize_pydantic=True,
     )
 
-    with patch.object(
-        agent_with_tool.model._client._client,
-        "send",
-        side_effect=[tool_call_response, final_response],
-    ) as _:
-        sentry_init(
-            integrations=[OpenAIAgentsIntegration()],
-            traces_sample_rate=1.0,
-            send_default_pii=True,
-        )
+    if stream_gen_ai_spans:
+        with patch.object(
+            agent_with_tool.model._client._client,
+            "send",
+            side_effect=[tool_call_response, final_response],
+        ) as _:
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+                send_default_pii=True,
+                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+            )
+
+            items = capture_items("span", "transaction")
+
+            result = await agents.Runner.run(
+                agent_with_tool,
+                "What is 5 + 3?",
+                run_config=test_run_config,
+            )
 
-        items = capture_items("span", "transaction")
+            assert result is not None
 
-        result = await agents.Runner.run(
-            agent_with_tool,
-            "What is 5 + 3?",
-            run_config=test_run_config,
+        spans = [item.payload for item in items if item.type == "span"]
+
+        invoke_agent_span = spans[0]
+
+        # Verify invoke_agent span has aggregated usage from both API calls
+        # Total: 10 + 20 = 30 input tokens, 5 + 15 = 20 output tokens, 15 + 35 = 50 total
+        assert invoke_agent_span["attributes"]["gen_ai.usage.input_tokens"] == 30
+        assert invoke_agent_span["attributes"]["gen_ai.usage.output_tokens"] == 20
+        assert invoke_agent_span["attributes"]["gen_ai.usage.total_tokens"] == 50
+        # Cached tokens should be aggregated: 0 + 5 = 5
+        assert invoke_agent_span["attributes"]["gen_ai.usage.input_tokens.cached"] == 5
+        # Reasoning tokens should be aggregated: 0 + 3 = 3
+        assert (
+            invoke_agent_span["attributes"]["gen_ai.usage.output_tokens.reasoning"] == 3
         )
+    else:
+        with patch.object(
+            agent_with_tool.model._client._client,
+            "send",
+            side_effect=[tool_call_response, final_response],
+        ) as _:
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+                send_default_pii=True,
+                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+            )
+            events = capture_events()
 
-        assert result is not None
+            result = await agents.Runner.run(
+                agent_with_tool,
+                "What is 5 + 3?",
+                run_config=test_run_config,
+            )
+
+            assert result is not None
 
-    spans = [item.payload for item in items if item.type == "span"]
-    invoke_agent_span = spans[0]
+        (transaction,) = events
+        spans = transaction["spans"]
 
-    # Verify invoke_agent span has aggregated usage from both API calls
-    # Total: 10 + 20 = 30 input tokens, 5 + 15 = 20 output tokens, 15 + 35 = 50 total
-    assert invoke_agent_span["attributes"]["gen_ai.usage.input_tokens"] == 30
-    assert invoke_agent_span["attributes"]["gen_ai.usage.output_tokens"] == 20
-    assert invoke_agent_span["attributes"]["gen_ai.usage.total_tokens"] == 50
-    # Cached tokens should be aggregated: 0 + 5 = 5
-    assert invoke_agent_span["attributes"]["gen_ai.usage.input_tokens.cached"] == 5
-    # Reasoning tokens should be aggregated: 0 + 3 = 3
-    assert invoke_agent_span["attributes"]["gen_ai.usage.output_tokens.reasoning"] == 3
+        invoke_agent_span = spans[0]
 
+        # Verify invoke_agent span has aggregated usage from both API calls
+        # Total: 10 + 20 = 30 input tokens, 5 + 15 = 20 output tokens, 15 + 35 = 50 total
+        assert invoke_agent_span["data"]["gen_ai.usage.input_tokens"] == 30
+        assert invoke_agent_span["data"]["gen_ai.usage.output_tokens"] == 20
+        assert invoke_agent_span["data"]["gen_ai.usage.total_tokens"] == 50
+        # Cached tokens should be aggregated: 0 + 5 = 5
+        assert invoke_agent_span["data"]["gen_ai.usage.input_tokens.cached"] == 5
+        # Reasoning tokens should be aggregated: 0 + 3 = 3
+        assert invoke_agent_span["data"]["gen_ai.usage.output_tokens.reasoning"] == 3
 
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
 async def test_invoke_agent_span_includes_response_model(
     sentry_init,
+    capture_events,
     capture_items,
     test_agent,
     get_model_response,
+    stream_gen_ai_spans,
 ):
     """
     Test that invoke_agent spans include the response model from the API response.
@@ -2862,53 +4101,106 @@ async def test_invoke_agent_span_includes_response_model(
         serialize_pydantic=True,
     )
 
-    with patch.object(
-        agent.model._client._client,
-        "send",
-        return_value=response,
-    ) as _:
-        sentry_init(
-            integrations=[OpenAIAgentsIntegration()],
-            traces_sample_rate=1.0,
-            send_default_pii=True,
-        )
+    if stream_gen_ai_spans:
+        with patch.object(
+            agent.model._client._client,
+            "send",
+            return_value=response,
+        ) as _:
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+                send_default_pii=True,
+                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+            )
 
-        items = capture_items("span", "transaction")
+            items = capture_items("span", "transaction")
 
-        result = await agents.Runner.run(
-            agent, "Test input", run_config=test_run_config
-        )
+            result = await agents.Runner.run(
+                agent, "Test input", run_config=test_run_config
+            )
 
-        assert result is not None
+            assert result is not None
 
-    spans = [item.payload for item in items if item.type == "span"]
-    invoke_agent_span = next(
-        span
-        for span in spans
-        if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
-    )
-    ai_client_span = next(
-        span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-    )
+            spans = [item.payload for item in items if item.type == "span"]
+            invoke_agent_span = next(
+                span
+                for span in spans
+                if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
+            )
+            ai_client_span = next(
+                span
+                for span in spans
+                if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+            )
 
-    # Verify invoke_agent span has response model from API
-    assert invoke_agent_span["name"] == "invoke_agent test_agent"
-    assert "gen_ai.response.model" in invoke_agent_span["attributes"]
-    assert (
-        invoke_agent_span["attributes"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14"
-    )
+            # Verify invoke_agent span has response model from API
+            assert invoke_agent_span["name"] == "invoke_agent test_agent"
+            assert "gen_ai.response.model" in invoke_agent_span["attributes"]
+            assert (
+                invoke_agent_span["attributes"]["gen_ai.response.model"]
+                == "gpt-4.1-2025-04-14"
+            )
+
+            # Also verify ai_client span has it
+            assert "gen_ai.response.model" in ai_client_span["attributes"]
+            assert (
+                ai_client_span["attributes"]["gen_ai.response.model"]
+                == "gpt-4.1-2025-04-14"
+            )
+    else:
+        with patch.object(
+            agent.model._client._client,
+            "send",
+            return_value=response,
+        ) as _:
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+                send_default_pii=True,
+                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+            )
+            events = capture_events()
 
-    # Also verify ai_client span has it
-    assert "gen_ai.response.model" in ai_client_span["attributes"]
-    assert ai_client_span["attributes"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14"
+            result = await agents.Runner.run(
+                agent, "Test input", run_config=test_run_config
+            )
+
+            assert result is not None
+
+            (transaction,) = events
+            spans = transaction["spans"]
+            invoke_agent_span = next(
+                span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT
+            )
+            ai_client_span = next(
+                span for span in spans if span["op"] == OP.GEN_AI_CHAT
+            )
+
+            # Verify invoke_agent span has response model from API
+            assert invoke_agent_span["description"] == "invoke_agent test_agent"
+            assert "gen_ai.response.model" in invoke_agent_span["data"]
+            assert (
+                invoke_agent_span["data"]["gen_ai.response.model"]
+                == "gpt-4.1-2025-04-14"
+            )
+
+            # Also verify ai_client span has it
+            assert "gen_ai.response.model" in ai_client_span["data"]
+            assert (
+                ai_client_span["data"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14"
+            )
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
 async def test_invoke_agent_span_uses_last_response_model(
     sentry_init,
+    capture_events,
     capture_items,
     test_agent,
     get_model_response,
+    stream_gen_ai_spans,
 ):
     """
     Test that when an agent makes multiple LLM calls (e.g., with tools),
@@ -2996,44 +4288,93 @@ def calculator(a: int, b: int) -> int:
         serialize_pydantic=True,
     )
 
-    with patch.object(
-        agent_with_tool.model._client._client,
-        "send",
-        side_effect=[first_response, second_response],
-    ) as _:
-        sentry_init(
-            integrations=[OpenAIAgentsIntegration()],
-            traces_sample_rate=1.0,
-            send_default_pii=True,
-        )
+    if stream_gen_ai_spans:
+        with patch.object(
+            agent_with_tool.model._client._client,
+            "send",
+            side_effect=[first_response, second_response],
+        ) as _:
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+                send_default_pii=True,
+                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+            )
 
-        items = capture_items("span", "transaction")
+            items = capture_items("span", "transaction")
 
-        result = await agents.Runner.run(
-            agent_with_tool,
-            "What is 5 + 3?",
-            run_config=test_run_config,
-        )
+            result = await agents.Runner.run(
+                agent_with_tool,
+                "What is 5 + 3?",
+                run_config=test_run_config,
+            )
 
-        assert result is not None
+            assert result is not None
 
-    spans = [item.payload for item in items if item.type == "span"]
-    invoke_agent_span = spans[0]
-    first_ai_client_span = spans[1]
-    second_ai_client_span = spans[3]  # After tool span
+            spans = [item.payload for item in items if item.type == "span"]
 
-    # Invoke_agent span uses the LAST response model
-    assert "gen_ai.response.model" in invoke_agent_span["attributes"]
-    assert (
-        invoke_agent_span["attributes"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14"
-    )
+            invoke_agent_span = spans[0]
+            first_ai_client_span = spans[1]
+            second_ai_client_span = spans[3]  # After tool span
 
-    # Each ai_client span has its own response model from the API
-    assert first_ai_client_span["attributes"]["gen_ai.response.model"] == "gpt-4-0613"
-    assert (
-        second_ai_client_span["attributes"]["gen_ai.response.model"]
-        == "gpt-4.1-2025-04-14"
-    )
+            # Invoke_agent span uses the LAST response model
+            assert "gen_ai.response.model" in invoke_agent_span["attributes"]
+            assert (
+                invoke_agent_span["attributes"]["gen_ai.response.model"]
+                == "gpt-4.1-2025-04-14"
+            )
+
+            # Each ai_client span has its own response model from the API
+            assert (
+                first_ai_client_span["attributes"]["gen_ai.response.model"]
+                == "gpt-4-0613"
+            )
+            assert (
+                second_ai_client_span["attributes"]["gen_ai.response.model"]
+                == "gpt-4.1-2025-04-14"
+            )
+    else:
+        with patch.object(
+            agent_with_tool.model._client._client,
+            "send",
+            side_effect=[first_response, second_response],
+        ) as _:
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+                send_default_pii=True,
+                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+            )
+            events = capture_events()
+
+            result = await agents.Runner.run(
+                agent_with_tool,
+                "What is 5 + 3?",
+                run_config=test_run_config,
+            )
+
+            assert result is not None
+
+            (transaction,) = events
+            spans = transaction["spans"]
+
+            invoke_agent_span = spans[0]
+            first_ai_client_span = spans[1]
+            second_ai_client_span = spans[3]  # After tool span
+
+            # Invoke_agent span uses the LAST response model
+            assert "gen_ai.response.model" in invoke_agent_span["data"]
+            assert (
+                invoke_agent_span["data"]["gen_ai.response.model"]
+                == "gpt-4.1-2025-04-14"
+            )
+
+            # Each ai_client span has its own response model from the API
+            assert first_ai_client_span["data"]["gen_ai.response.model"] == "gpt-4-0613"
+            assert (
+                second_ai_client_span["data"]["gen_ai.response.model"]
+                == "gpt-4.1-2025-04-14"
+            )
 
 
 def test_openai_agents_message_truncation(sentry_init, capture_items):
@@ -3280,6 +4621,7 @@ async def test_streaming_ttft_on_chat_span(
             assert chat_span._data.get(SPANDATA.GEN_AI_RESPONSE_STREAMING) is True
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.skipif(
     parse_version(OPENAI_AGENTS_VERSION) < (0, 4, 0),
     reason="conversation_id support requires openai-agents >= 0.4.0",
@@ -3287,10 +4629,12 @@ async def test_streaming_ttft_on_chat_span(
 @pytest.mark.asyncio
 async def test_conversation_id_on_all_spans(
     sentry_init,
+    capture_events,
     capture_items,
     test_agent,
     nonstreaming_responses_model_response,
     get_model_response,
+    stream_gen_ai_spans,
 ):
     """
     Test that gen_ai.conversation.id is set on all AI-related spans when passed to Runner.run().
@@ -3304,58 +4648,121 @@ async def test_conversation_id_on_all_spans(
         nonstreaming_responses_model_response, serialize_pydantic=True
     )
 
-    with patch.object(
-        agent.model._client._client,
-        "send",
-        return_value=response,
-    ) as _:
-        sentry_init(
-            integrations=[OpenAIAgentsIntegration()],
-            traces_sample_rate=1.0,
-        )
+    if stream_gen_ai_spans:
+        with patch.object(
+            agent.model._client._client,
+            "send",
+            return_value=response,
+        ) as _:
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+            )
 
-        items = capture_items("span", "transaction")
+            items = capture_items("span", "transaction")
 
-        result = await agents.Runner.run(
-            agent,
-            "Test input",
-            run_config=test_run_config,
-            conversation_id="conv_test_123",
-        )
+            result = await agents.Runner.run(
+                agent,
+                "Test input",
+                run_config=test_run_config,
+                conversation_id="conv_test_123",
+            )
 
-        assert result is not None
+            assert result is not None
 
-    spans = [item.payload for item in items if item.type == "span"]
-    invoke_agent_span = next(
-        span
-        for span in spans
-        if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
-    )
-    ai_client_span = next(
-        span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-    )
+            spans = [item.payload for item in items if item.type == "span"]
+            invoke_agent_span = next(
+                span
+                for span in spans
+                if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
+            )
+            ai_client_span = next(
+                span
+                for span in spans
+                if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+            )
 
-    # Verify workflow span (transaction) has conversation_id
-    (transaction,) = (item.payload for item in items if item.type == "transaction")
-    assert (
-        transaction["contexts"]["trace"]["data"]["gen_ai.conversation.id"]
-        == "conv_test_123"
-    )
+            # Verify workflow span (transaction) has conversation_id
+            (transaction,) = (
+                item.payload for item in items if item.type == "transaction"
+            )
+
+            assert (
+                transaction["contexts"]["trace"]["data"]["gen_ai.conversation.id"]
+                == "conv_test_123"
+            )
+
+            # Verify invoke_agent span has conversation_id
+            assert (
+                invoke_agent_span["attributes"]["gen_ai.conversation.id"]
+                == "conv_test_123"
+            )
+
+            # Verify ai_client span has conversation_id
+            assert (
+                ai_client_span["attributes"]["gen_ai.conversation.id"]
+                == "conv_test_123"
+            )
+    else:
+        with patch.object(
+            agent.model._client._client,
+            "send",
+            return_value=response,
+        ) as _:
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+            )
+            events = capture_events()
+
+            result = await agents.Runner.run(
+                agent,
+                "Test input",
+                run_config=test_run_config,
+                conversation_id="conv_test_123",
+            )
 
-    # Verify invoke_agent span has conversation_id
-    assert invoke_agent_span["attributes"]["gen_ai.conversation.id"] == "conv_test_123"
+            assert result is not None
 
-    # Verify ai_client span has conversation_id
-    assert ai_client_span["attributes"]["gen_ai.conversation.id"] == "conv_test_123"
+            (transaction,) = events
+            spans = transaction["spans"]
+            invoke_agent_span = next(
+                span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT
+            )
+            ai_client_span = next(
+                span for span in spans if span["op"] == OP.GEN_AI_CHAT
+            )
+
+            # Verify workflow span (transaction) has conversation_id
+            assert (
+                transaction["contexts"]["trace"]["data"]["gen_ai.conversation.id"]
+                == "conv_test_123"
+            )
+
+            # Verify invoke_agent span has conversation_id
+            assert (
+                invoke_agent_span["data"]["gen_ai.conversation.id"] == "conv_test_123"
+            )
+
+            # Verify ai_client span has conversation_id
+            assert ai_client_span["data"]["gen_ai.conversation.id"] == "conv_test_123"
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.skipif(
     parse_version(OPENAI_AGENTS_VERSION) < (0, 4, 0),
     reason="conversation_id support requires openai-agents >= 0.4.0",
 )
 @pytest.mark.asyncio
 async def test_conversation_id_on_tool_span(
-    sentry_init, capture_items, test_agent, get_model_response
+    sentry_init,
+    capture_events,
+    capture_items,
+    test_agent,
+    get_model_response,
+    stream_gen_ai_spans,
 ):
     """
     Test that gen_ai.conversation.id is set on tool execution spans when passed to Runner.run().
@@ -3442,45 +4849,91 @@ def simple_tool(message: str) -> str:
         serialize_pydantic=True,
     )
 
-    with patch.object(
-        agent_with_tool.model._client._client,
-        "send",
-        side_effect=[tool_response, final_response],
-    ) as _:
-        sentry_init(
-            integrations=[OpenAIAgentsIntegration()],
-            traces_sample_rate=1.0,
-        )
+    if stream_gen_ai_spans:
+        with patch.object(
+            agent_with_tool.model._client._client,
+            "send",
+            side_effect=[tool_response, final_response],
+        ) as _:
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+            )
 
-        items = capture_items("span", "transaction")
+            items = capture_items("span", "transaction")
 
-        await agents.Runner.run(
-            agent_with_tool,
-            "Use the tool",
-            run_config=test_run_config,
-            conversation_id="conv_tool_test_456",
+            await agents.Runner.run(
+                agent_with_tool,
+                "Use the tool",
+                run_config=test_run_config,
+                conversation_id="conv_tool_test_456",
+            )
+
+        spans = [item.payload for item in items if item.type == "span"]
+
+        # Find the tool span
+        tool_span = None
+        for span in spans:
+            if span.get("name", "").startswith("execute_tool"):
+                tool_span = span
+                break
+
+        assert tool_span is not None
+        # Tool span should have the conversation_id passed to Runner.run()
+        assert tool_span["attributes"]["gen_ai.conversation.id"] == "conv_tool_test_456"
+
+        # Workflow span (transaction) should have the same conversation_id
+        (transaction,) = (item.payload for item in items if item.type == "transaction")
+
+        # Workflow span (transaction) should have the same conversation_id
+        assert (
+            transaction["contexts"]["trace"]["data"]["gen_ai.conversation.id"]
+            == "conv_tool_test_456"
         )
+    else:
+        with patch.object(
+            agent_with_tool.model._client._client,
+            "send",
+            side_effect=[tool_response, final_response],
+        ) as _:
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+            )
 
-    spans = [item.payload for item in items if item.type == "span"]
-    # Find the tool span
-    tool_span = None
-    for span in spans:
-        if span.get("name", "").startswith("execute_tool"):
-            tool_span = span
-            break
-
-    assert tool_span is not None
-    # Tool span should have the conversation_id passed to Runner.run()
-    assert tool_span["attributes"]["gen_ai.conversation.id"] == "conv_tool_test_456"
-
-    # Workflow span (transaction) should have the same conversation_id
-    (transaction,) = (item.payload for item in items if item.type == "transaction")
-    assert (
-        transaction["contexts"]["trace"]["data"]["gen_ai.conversation.id"]
-        == "conv_tool_test_456"
-    )
+            events = capture_events()
+
+            await agents.Runner.run(
+                agent_with_tool,
+                "Use the tool",
+                run_config=test_run_config,
+                conversation_id="conv_tool_test_456",
+            )
+
+        (transaction,) = events
+        spans = transaction["spans"]
+
+        # Find the tool span
+        tool_span = None
+        for span in spans:
+            if span.get("description", "").startswith("execute_tool"):
+                tool_span = span
+                break
+
+        assert tool_span is not None
+        # Tool span should have the conversation_id passed to Runner.run()
+        assert tool_span["data"]["gen_ai.conversation.id"] == "conv_tool_test_456"
+
+        # Workflow span (transaction) should have the same conversation_id
+        assert (
+            transaction["contexts"]["trace"]["data"]["gen_ai.conversation.id"]
+            == "conv_tool_test_456"
+        )
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.skipif(
     parse_version(OPENAI_AGENTS_VERSION) < (0, 4, 0),
     reason="conversation_id support requires openai-agents >= 0.4.0",
@@ -3488,10 +4941,12 @@ def simple_tool(message: str) -> str:
 @pytest.mark.asyncio
 async def test_no_conversation_id_when_not_provided(
     sentry_init,
+    capture_events,
     capture_items,
     test_agent,
     nonstreaming_responses_model_response,
     get_model_response,
+    stream_gen_ai_spans,
 ):
     """
     Test that gen_ai.conversation.id is not set when not passed to Runner.run().
@@ -3505,40 +4960,84 @@ async def test_no_conversation_id_when_not_provided(
         nonstreaming_responses_model_response, serialize_pydantic=True
     )
 
-    with patch.object(
-        agent.model._client._client,
-        "send",
-        return_value=response,
-    ) as _:
-        sentry_init(
-            integrations=[OpenAIAgentsIntegration()],
-            traces_sample_rate=1.0,
-        )
+    if stream_gen_ai_spans:
+        with patch.object(
+            agent.model._client._client,
+            "send",
+            return_value=response,
+        ) as _:
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+            )
 
-        items = capture_items("span", "transaction")
+            items = capture_items("span", "transaction")
 
-        # Don't pass conversation_id
-        result = await agents.Runner.run(
-            agent, "Test input", run_config=test_run_config
-        )
+            # Don't pass conversation_id
+            result = await agents.Runner.run(
+                agent, "Test input", run_config=test_run_config
+            )
 
-        assert result is not None
+            assert result is not None
 
-    (transaction,) = (item.payload for item in items if item.type == "transaction")
+            (transaction,) = (
+                item.payload for item in items if item.type == "transaction"
+            )
 
-    spans = [item.payload for item in items if item.type == "span"]
-    invoke_agent_span = next(
-        span
-        for span in spans
-        if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
-    )
-    ai_client_span = next(
-        span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-    )
+            spans = [item.payload for item in items if item.type == "span"]
+            invoke_agent_span = next(
+                span
+                for span in spans
+                if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
+            )
+            ai_client_span = next(
+                span
+                for span in spans
+                if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+            )
 
-    # Verify conversation_id is NOT set on any spans
-    assert "gen_ai.conversation.id" not in transaction["contexts"]["trace"].get(
-        "attributes", {}
-    )
-    assert "gen_ai.conversation.id" not in invoke_agent_span.get("attributes", {})
-    assert "gen_ai.conversation.id" not in ai_client_span.get("attributes", {})
+            # Verify conversation_id is NOT set on any spans
+            assert "gen_ai.conversation.id" not in transaction["contexts"]["trace"].get(
+                "attributes", {}
+            )
+            assert "gen_ai.conversation.id" not in invoke_agent_span.get(
+                "attributes", {}
+            )
+            assert "gen_ai.conversation.id" not in ai_client_span.get("attributes", {})
+    else:
+        with patch.object(
+            agent.model._client._client,
+            "send",
+            return_value=response,
+        ) as _:
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+            )
+
+            events = capture_events()
+
+            # Don't pass conversation_id
+            result = await agents.Runner.run(
+                agent, "Test input", run_config=test_run_config
+            )
+
+            assert result is not None
+
+            (transaction,) = events
+            spans = transaction["spans"]
+            invoke_agent_span = next(
+                span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT
+            )
+            ai_client_span = next(
+                span for span in spans if span["op"] == OP.GEN_AI_CHAT
+            )
+
+            # Verify conversation_id is NOT set on any spans
+            assert "gen_ai.conversation.id" not in transaction["contexts"]["trace"].get(
+                "data", {}
+            )
+            assert "gen_ai.conversation.id" not in invoke_agent_span.get("data", {})
+            assert "gen_ai.conversation.id" not in ai_client_span.get("data", {})
diff --git a/tests/integrations/pydantic_ai/test_pydantic_ai.py b/tests/integrations/pydantic_ai/test_pydantic_ai.py
index cfb1ca09ca..d60058e4ce 100644
--- a/tests/integrations/pydantic_ai/test_pydantic_ai.py
+++ b/tests/integrations/pydantic_ai/test_pydantic_ai.py
@@ -52,8 +52,15 @@ def inner():
     return inner
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
-async def test_agent_run_async(sentry_init, capture_items, get_test_agent):
+async def test_agent_run_async(
+    sentry_init,
+    capture_events,
+    capture_items,
+    get_test_agent,
+    stream_gen_ai_spans,
+):
     """
     Test that the integration creates spans for async agent runs.
     """
@@ -61,51 +68,91 @@ async def test_agent_run_async(sentry_init, capture_items, get_test_agent):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
 
-    items = capture_items("transaction", "span")
-
     test_agent = get_test_agent()
-    result = await test_agent.run("Test input")
 
-    assert result is not None
-    assert result.output is not None
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
 
-    (transaction,) = (item.payload for item in items if item.type == "transaction")
+        result = await test_agent.run("Test input")
 
-    # Verify transaction (the transaction IS the invoke_agent span)
-    assert transaction["transaction"] == "invoke_agent test_agent"
-    assert transaction["contexts"]["trace"]["origin"] == "auto.ai.pydantic_ai"
+        assert result is not None
+        assert result.output is not None
 
-    # The transaction itself should have invoke_agent data
-    assert transaction["contexts"]["trace"]["op"] == "gen_ai.invoke_agent"
+        (transaction,) = (item.payload for item in items if item.type == "transaction")
 
-    spans = [item.payload for item in items if item.type == "span"]
-    # Find child span types (invoke_agent is the transaction, not a child span)
-    chat_spans = [
-        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
-    ]
-    assert len(chat_spans) >= 1
+        # Verify transaction (the transaction IS the invoke_agent span)
+        assert transaction["transaction"] == "invoke_agent test_agent"
+        assert transaction["contexts"]["trace"]["origin"] == "auto.ai.pydantic_ai"
 
-    # Check chat span
-    chat_span = chat_spans[0]
-    assert "chat" in chat_span["name"]
-    assert chat_span["attributes"]["gen_ai.operation.name"] == "chat"
-    assert chat_span["attributes"]["gen_ai.response.streaming"] is False
-    assert "gen_ai.request.messages" in chat_span["attributes"]
-    assert "gen_ai.usage.input_tokens" in chat_span["attributes"]
-    assert "gen_ai.usage.output_tokens" in chat_span["attributes"]
+        # The transaction itself should have invoke_agent data
+        assert transaction["contexts"]["trace"]["op"] == "gen_ai.invoke_agent"
+
+        spans = [item.payload for item in items if item.type == "span"]
+        # Find child span types (invoke_agent is the transaction, not a child span)
+        chat_spans = [
+            s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+        ]
+
+        assert len(chat_spans) >= 1
+
+        # Check chat span
+        chat_span = chat_spans[0]
+        assert "chat" in chat_span["name"]
+        assert chat_span["attributes"]["gen_ai.operation.name"] == "chat"
+        assert chat_span["attributes"]["gen_ai.response.streaming"] is False
+        assert "gen_ai.request.messages" in chat_span["attributes"]
+        assert "gen_ai.usage.input_tokens" in chat_span["attributes"]
+        assert "gen_ai.usage.output_tokens" in chat_span["attributes"]
+    else:
+        events = capture_events()
+
+        result = await test_agent.run("Test input")
 
+        assert result is not None
+        assert result.output is not None
 
+        (transaction,) = events
+        spans = transaction["spans"]
+
+        # Verify transaction (the transaction IS the invoke_agent span)
+        assert transaction["transaction"] == "invoke_agent test_agent"
+        assert transaction["contexts"]["trace"]["origin"] == "auto.ai.pydantic_ai"
+
+        # The transaction itself should have invoke_agent data
+        assert transaction["contexts"]["trace"]["op"] == "gen_ai.invoke_agent"
+
+        # Find child span types (invoke_agent is the transaction, not a child span)
+        chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
+
+        assert len(chat_spans) >= 1
+
+        # Check chat span
+        chat_span = chat_spans[0]
+        assert "chat" in chat_span["description"]
+        assert chat_span["data"]["gen_ai.operation.name"] == "chat"
+        assert chat_span["data"]["gen_ai.response.streaming"] is False
+        assert "gen_ai.request.messages" in chat_span["data"]
+        assert "gen_ai.usage.input_tokens" in chat_span["data"]
+        assert "gen_ai.usage.output_tokens" in chat_span["data"]
+
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
-async def test_agent_run_async_model_error(sentry_init, capture_items):
+async def test_agent_run_async_model_error(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
 
-    items = capture_items("event", "transaction", "span")
-
     def failing_model(messages, info):
         raise RuntimeError("model exploded")
 
@@ -114,20 +161,43 @@ def failing_model(messages, info):
         name="test_agent",
     )
 
-    with pytest.raises(RuntimeError, match="model exploded"):
-        await agent.run("Test input")
+    if stream_gen_ai_spans:
+        items = capture_items("event", "transaction", "span")
+
+        with pytest.raises(RuntimeError, match="model exploded"):
+            await agent.run("Test input")
 
-    (error,) = (item.payload for item in items if item.type == "event")
-    assert error["level"] == "error"
+        (error,) = (item.payload for item in items if item.type == "event")
+        assert error["level"] == "error"
 
-    spans = [item.payload for item in items if item.type == "span"]
-    assert len(spans) == 1
+        spans = [item.payload for item in items if item.type == "span"]
+        assert len(spans) == 1
 
-    assert spans[0]["status"] == "error"
+        assert spans[0]["status"] == "error"
+    else:
+        events = capture_events()
+
+        with pytest.raises(RuntimeError, match="model exploded"):
+            await agent.run("Test input")
+
+        (error, transaction) = events
+        assert error["level"] == "error"
 
+        spans = transaction["spans"]
+        assert len(spans) == 1
 
+        assert spans[0]["status"] == "internal_error"
+
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
-async def test_agent_run_async_usage_data(sentry_init, capture_items, get_test_agent):
+async def test_agent_run_async_usage_data(
+    sentry_init,
+    capture_events,
+    capture_items,
+    get_test_agent,
+    stream_gen_ai_spans,
+):
     """
     Test that the invoke_agent span includes token usage and model data.
     """
@@ -135,17 +205,30 @@ async def test_agent_run_async_usage_data(sentry_init, capture_items, get_test_a
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
 
-    items = capture_items("transaction", "span")
-
     test_agent = get_test_agent()
-    result = await test_agent.run("Test input")
 
-    assert result is not None
-    assert result.output is not None
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        result = await test_agent.run("Test input")
+
+        assert result is not None
+        assert result.output is not None
+
+        (transaction,) = (item.payload for item in items if item.type == "transaction")
+    else:
+        events = capture_events()
+
+        result = await test_agent.run("Test input")
+
+        assert result is not None
+        assert result.output is not None
+
+        (transaction,) = events
 
-    (transaction,) = (item.payload for item in items if item.type == "transaction")
     # Verify transaction (the transaction IS the invoke_agent span)
     assert transaction["transaction"] == "invoke_agent test_agent"
 
@@ -171,7 +254,14 @@ async def test_agent_run_async_usage_data(sentry_init, capture_items, get_test_a
     assert trace_data["gen_ai.response.model"] == "test"  # Test model name
 
 
-def test_agent_run_sync(sentry_init, capture_items, get_test_agent):
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_agent_run_sync(
+    sentry_init,
+    capture_events,
+    capture_items,
+    get_test_agent,
+    stream_gen_ai_spans,
+):
     """
     Test that the integration creates spans for sync agent runs.
     """
@@ -179,42 +269,76 @@ def test_agent_run_sync(sentry_init, capture_items, get_test_agent):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
 
-    items = capture_items("transaction", "span")
-
     test_agent = get_test_agent()
-    result = test_agent.run_sync("Test input")
 
-    assert result is not None
-    assert result.output is not None
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
 
-    spans = [item.payload for item in items if item.type == "span"]
+        result = test_agent.run_sync("Test input")
 
-    # Verify transaction
-    (transaction,) = (item.payload for item in items if item.type == "transaction")
-    assert transaction["transaction"] == "invoke_agent test_agent"
-    assert transaction["contexts"]["trace"]["origin"] == "auto.ai.pydantic_ai"
+        assert result is not None
+        assert result.output is not None
 
-    # Find span types
-    chat_spans = [
-        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
-    ]
-    assert len(chat_spans) >= 1
+        spans = [item.payload for item in items if item.type == "span"]
 
-    # Verify streaming flag is False for sync
-    for chat_span in chat_spans:
-        assert chat_span["attributes"]["gen_ai.response.streaming"] is False
+        # Verify transaction
+        (transaction,) = (item.payload for item in items if item.type == "transaction")
 
+        # Verify transaction
+        assert transaction["transaction"] == "invoke_agent test_agent"
+        assert transaction["contexts"]["trace"]["origin"] == "auto.ai.pydantic_ai"
+
+        # Find span types
+        chat_spans = [
+            s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+        ]
+
+        assert len(chat_spans) >= 1
+
+        # Verify streaming flag is False for sync
+        for chat_span in chat_spans:
+            assert chat_span["attributes"]["gen_ai.response.streaming"] is False
+    else:
+        events = capture_events()
+
+        result = test_agent.run_sync("Test input")
+
+        assert result is not None
+        assert result.output is not None
+
+        (transaction,) = events
+        spans = transaction["spans"]
 
-def test_agent_run_sync_model_error(sentry_init, capture_items):
+        # Verify transaction
+        assert transaction["transaction"] == "invoke_agent test_agent"
+        assert transaction["contexts"]["trace"]["origin"] == "auto.ai.pydantic_ai"
+
+        # Find span types
+        chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
+
+        assert len(chat_spans) >= 1
+
+        # Verify streaming flag is False for sync
+        for chat_span in chat_spans:
+            assert chat_span["data"]["gen_ai.response.streaming"] is False
+
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_agent_run_sync_model_error(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
 
-    items = capture_items("event", "transaction", "span")
-
     def failing_model(messages, info):
         raise RuntimeError("model exploded")
 
@@ -223,20 +347,43 @@ def failing_model(messages, info):
         name="test_agent",
     )
 
-    with pytest.raises(RuntimeError, match="model exploded"):
-        agent.run_sync("Test input")
+    if stream_gen_ai_spans:
+        items = capture_items("event", "transaction", "span")
 
-    (error,) = (item.payload for item in items if item.type == "event")
-    assert error["level"] == "error"
+        with pytest.raises(RuntimeError, match="model exploded"):
+            agent.run_sync("Test input")
 
-    spans = [item.payload for item in items if item.type == "span"]
-    assert len(spans) == 1
+        (error,) = (item.payload for item in items if item.type == "event")
+        assert error["level"] == "error"
+
+        spans = [item.payload for item in items if item.type == "span"]
+        assert len(spans) == 1
+
+        assert spans[0]["status"] == "error"
+    else:
+        events = capture_events()
+
+        with pytest.raises(RuntimeError, match="model exploded"):
+            agent.run_sync("Test input")
 
-    assert spans[0]["status"] == "error"
+        (error, transaction) = events
+        assert error["level"] == "error"
+
+        spans = transaction["spans"]
+        assert len(spans) == 1
+
+        assert spans[0]["status"] == "internal_error"
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
-async def test_agent_run_stream(sentry_init, capture_items, get_test_agent):
+async def test_agent_run_stream(
+    sentry_init,
+    capture_events,
+    capture_items,
+    get_test_agent,
+    stream_gen_ai_spans,
+):
     """
     Test that the integration creates spans for streaming agent runs.
     """
@@ -244,43 +391,86 @@ async def test_agent_run_stream(sentry_init, capture_items, get_test_agent):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
 
-    items = capture_items("transaction", "span")
-
     test_agent = get_test_agent()
-    async with test_agent.run_stream("Test input") as result:
-        # Consume the stream
-        async for _ in result.stream_output():
-            pass
 
-    spans = [item.payload for item in items if item.type == "span"]
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
 
-    # Verify transaction
-    (transaction,) = (item.payload for item in items if item.type == "transaction")
-    assert transaction["transaction"] == "invoke_agent test_agent"
-    assert transaction["contexts"]["trace"]["origin"] == "auto.ai.pydantic_ai"
+        async with test_agent.run_stream("Test input") as result:
+            # Consume the stream
+            async for _ in result.stream_output():
+                pass
 
-    # Find chat spans
-    chat_spans = [
-        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
-    ]
-    assert len(chat_spans) >= 1
+        spans = [item.payload for item in items if item.type == "span"]
 
-    # Verify streaming flag is True for streaming
-    for chat_span in chat_spans:
-        assert chat_span["attributes"]["gen_ai.response.streaming"] is True
-        assert "gen_ai.request.messages" in chat_span["attributes"]
-        assert "gen_ai.usage.input_tokens" in chat_span["attributes"]
-        # Streaming responses should still have output data
-        assert (
-            "gen_ai.response.text" in chat_span["attributes"]
-            or "gen_ai.response.model" in chat_span["attributes"]
-        )
+        # Verify transaction
+        (transaction,) = (item.payload for item in items if item.type == "transaction")
+
+        # Verify transaction
+        assert transaction["transaction"] == "invoke_agent test_agent"
+        assert transaction["contexts"]["trace"]["origin"] == "auto.ai.pydantic_ai"
+
+        # Find chat spans
+        chat_spans = [
+            s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+        ]
+
+        assert len(chat_spans) >= 1
+
+        # Verify streaming flag is True for streaming
+        for chat_span in chat_spans:
+            assert chat_span["attributes"]["gen_ai.response.streaming"] is True
+            assert "gen_ai.request.messages" in chat_span["attributes"]
+            assert "gen_ai.usage.input_tokens" in chat_span["attributes"]
+            # Streaming responses should still have output data
+            assert (
+                "gen_ai.response.text" in chat_span["attributes"]
+                or "gen_ai.response.model" in chat_span["attributes"]
+            )
+    else:
+        events = capture_events()
+
+        async with test_agent.run_stream("Test input") as result:
+            # Consume the stream
+            async for _ in result.stream_output():
+                pass
 
+        (transaction,) = events
+        spans = transaction["spans"]
 
+        # Verify transaction
+        assert transaction["transaction"] == "invoke_agent test_agent"
+        assert transaction["contexts"]["trace"]["origin"] == "auto.ai.pydantic_ai"
+
+        # Find chat spans
+        chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
+
+        assert len(chat_spans) >= 1
+
+        # Verify streaming flag is True for streaming
+        for chat_span in chat_spans:
+            assert chat_span["data"]["gen_ai.response.streaming"] is True
+            assert "gen_ai.request.messages" in chat_span["data"]
+            assert "gen_ai.usage.input_tokens" in chat_span["data"]
+            # Streaming responses should still have output data
+            assert (
+                "gen_ai.response.text" in chat_span["data"]
+                or "gen_ai.response.model" in chat_span["data"]
+            )
+
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
-async def test_agent_run_stream_events(sentry_init, capture_items, get_test_agent):
+async def test_agent_run_stream_events(
+    sentry_init,
+    capture_events,
+    capture_items,
+    get_test_agent,
+    stream_gen_ai_spans,
+):
     """
     Test that run_stream_events creates spans (it uses run internally, so non-streaming).
     """
@@ -288,33 +478,66 @@ async def test_agent_run_stream_events(sentry_init, capture_items, get_test_agen
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
 
-    items = capture_items("transaction", "span")
-
     # Consume all events
     test_agent = get_test_agent()
-    async for _ in test_agent.run_stream_events("Test input"):
-        pass
 
-    # Verify transaction
-    (transaction,) = (item.payload for item in items if item.type == "transaction")
-    assert transaction["transaction"] == "invoke_agent test_agent"
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
 
-    # Find chat spans
-    spans = [item.payload for item in items if item.type == "span"]
-    chat_spans = [
-        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
-    ]
-    assert len(chat_spans) >= 1
+        async for _ in test_agent.run_stream_events("Test input"):
+            pass
 
-    # run_stream_events uses run() internally, so streaming should be False
-    for chat_span in chat_spans:
-        assert chat_span["attributes"]["gen_ai.response.streaming"] is False
+        # Verify transaction
+        (transaction,) = (item.payload for item in items if item.type == "transaction")
+
+        # Verify transaction
+        assert transaction["transaction"] == "invoke_agent test_agent"
 
+        # Find chat spans
+        spans = [item.payload for item in items if item.type == "span"]
+        chat_spans = [
+            s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+        ]
+
+        assert len(chat_spans) >= 1
 
+        # run_stream_events uses run() internally, so streaming should be False
+        for chat_span in chat_spans:
+            assert chat_span["attributes"]["gen_ai.response.streaming"] is False
+    else:
+        events = capture_events()
+
+        async for _ in test_agent.run_stream_events("Test input"):
+            pass
+
+        (transaction,) = events
+
+        # Verify transaction
+        assert transaction["transaction"] == "invoke_agent test_agent"
+
+        # Find chat spans
+        spans = transaction["spans"]
+        chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
+
+        assert len(chat_spans) >= 1
+
+        # run_stream_events uses run() internally, so streaming should be False
+        for chat_span in chat_spans:
+            assert chat_span["data"]["gen_ai.response.streaming"] is False
+
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
-async def test_agent_with_tools(sentry_init, capture_items, get_test_agent):
+async def test_agent_with_tools(
+    sentry_init,
+    capture_events,
+    capture_items,
+    get_test_agent,
+    stream_gen_ai_spans,
+):
     """
     Test that tool execution creates execute_tool spans.
     """
@@ -322,6 +545,7 @@ async def test_agent_with_tools(sentry_init, capture_items, get_test_agent):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
 
     test_agent = get_test_agent()
@@ -331,50 +555,90 @@ def add_numbers(a: int, b: int) -> int:
         """Add two numbers together."""
         return a + b
 
-    items = capture_items("transaction", "span")
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
 
-    result = await test_agent.run("What is 5 + 3?")
+        result = await test_agent.run("What is 5 + 3?")
 
-    assert result is not None
+        assert result is not None
 
-    spans = [item.payload for item in items if item.type == "span"]
+        spans = [item.payload for item in items if item.type == "span"]
 
-    # Find child span types (invoke_agent is the transaction, not a child span)
-    chat_spans = [
-        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
-    ]
-    tool_spans = [
-        s
-        for s in spans
-        if s["attributes"].get("sentry.op", "") == "gen_ai.execute_tool"
-    ]
+        # Find child span types (invoke_agent is the transaction, not a child span)
+        chat_spans = [
+            s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+        ]
+        tool_spans = [
+            s
+            for s in spans
+            if s["attributes"].get("sentry.op", "") == "gen_ai.execute_tool"
+        ]
+
+        # Should have tool spans
+        assert len(tool_spans) >= 1
+
+        # Check tool span
+        tool_span = tool_spans[0]
+        assert "execute_tool" in tool_span["name"]
+        assert tool_span["attributes"]["gen_ai.operation.name"] == "execute_tool"
+        assert tool_span["attributes"]["gen_ai.tool.name"] == "add_numbers"
+        assert "gen_ai.tool.input" in tool_span["attributes"]
+        assert "gen_ai.tool.output" in tool_span["attributes"]
+
+        # Check chat spans have available_tools
+        for chat_span in chat_spans:
+            assert "gen_ai.request.available_tools" in chat_span["attributes"]
+            available_tools_str = chat_span["attributes"][
+                "gen_ai.request.available_tools"
+            ]
+            # Available tools is serialized as a string
+            assert "add_numbers" in available_tools_str
+    else:
+        events = capture_events()
+
+        result = await test_agent.run("What is 5 + 3?")
+
+        assert result is not None
+
+        (transaction,) = events
+        spans = transaction["spans"]
 
-    # Should have tool spans
-    assert len(tool_spans) >= 1
+        # Find child span types (invoke_agent is the transaction, not a child span)
+        chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
+        tool_spans = [s for s in spans if s["op"] == "gen_ai.execute_tool"]
 
-    # Check tool span
-    tool_span = tool_spans[0]
-    assert "execute_tool" in tool_span["name"]
-    assert tool_span["attributes"]["gen_ai.operation.name"] == "execute_tool"
-    assert tool_span["attributes"]["gen_ai.tool.name"] == "add_numbers"
-    assert "gen_ai.tool.input" in tool_span["attributes"]
-    assert "gen_ai.tool.output" in tool_span["attributes"]
+        # Should have tool spans
+        assert len(tool_spans) >= 1
 
-    # Check chat spans have available_tools
-    for chat_span in chat_spans:
-        assert "gen_ai.request.available_tools" in chat_span["attributes"]
-        available_tools_str = chat_span["attributes"]["gen_ai.request.available_tools"]
-        # Available tools is serialized as a string
-        assert "add_numbers" in available_tools_str
+        # Check tool span
+        tool_span = tool_spans[0]
+        assert "execute_tool" in tool_span["description"]
+        assert tool_span["data"]["gen_ai.operation.name"] == "execute_tool"
+        assert tool_span["data"]["gen_ai.tool.name"] == "add_numbers"
+        assert "gen_ai.tool.input" in tool_span["data"]
+        assert "gen_ai.tool.output" in tool_span["data"]
 
+        # Check chat spans have available_tools
+        for chat_span in chat_spans:
+            assert "gen_ai.request.available_tools" in chat_span["data"]
+            available_tools_str = chat_span["data"]["gen_ai.request.available_tools"]
+            # Available tools is serialized as a string
+            assert "add_numbers" in available_tools_str
 
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.parametrize(
     "handled_tool_call_exceptions",
     [False, True],
 )
 @pytest.mark.asyncio
 async def test_agent_with_tool_model_retry(
-    sentry_init, capture_items, get_test_agent, handled_tool_call_exceptions
+    sentry_init,
+    capture_events,
+    capture_items,
+    get_test_agent,
+    handled_tool_call_exceptions,
+    stream_gen_ai_spans,
 ):
     """
     Test that a handled exception is captured when a tool raises ModelRetry.
@@ -387,6 +651,7 @@ async def test_agent_with_tool_model_retry(
         ],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
 
     retries = 0
@@ -402,62 +667,117 @@ def add_numbers(a: int, b: int) -> float:
             raise ModelRetry(message="Try again with the same arguments.")
         return a + b
 
-    items = capture_items("event", "transaction", "span")
+    if stream_gen_ai_spans:
+        items = capture_items("event", "transaction", "span")
 
-    result = await test_agent.run("What is 5 + 3?")
+        result = await test_agent.run("What is 5 + 3?")
 
-    assert result is not None
+        assert result is not None
 
-    if handled_tool_call_exceptions:
-        (error,) = (item.payload for item in items if item.type == "event")
-        assert error["level"] == "error"
-        assert error["exception"]["values"][0]["mechanism"]["handled"]
+        if handled_tool_call_exceptions:
+            (error,) = (item.payload for item in items if item.type == "event")
+            assert error["level"] == "error"
+            assert error["exception"]["values"][0]["mechanism"]["handled"]
 
-    spans = [item.payload for item in items if item.type == "span"]
-    # Find child span types (invoke_agent is the transaction, not a child span)
-    chat_spans = [
-        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
-    ]
-    tool_spans = [
-        s
-        for s in spans
-        if s["attributes"].get("sentry.op", "") == "gen_ai.execute_tool"
-    ]
+        spans = [item.payload for item in items if item.type == "span"]
+        # Find child span types (invoke_agent is the transaction, not a child span)
+        chat_spans = [
+            s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+        ]
+        tool_spans = [
+            s
+            for s in spans
+            if s["attributes"].get("sentry.op", "") == "gen_ai.execute_tool"
+        ]
 
-    # Should have tool spans
-    assert len(tool_spans) >= 1
+        # Should have tool spans
+        assert len(tool_spans) >= 1
 
-    # Check tool spans
-    model_retry_tool_span = tool_spans[0]
-    assert "execute_tool" in model_retry_tool_span["name"]
-    assert (
-        model_retry_tool_span["attributes"]["gen_ai.operation.name"] == "execute_tool"
-    )
-    assert model_retry_tool_span["attributes"]["gen_ai.tool.name"] == "add_numbers"
-    assert "gen_ai.tool.input" in model_retry_tool_span["attributes"]
+        # Check tool spans
+        model_retry_tool_span = tool_spans[0]
+        assert "execute_tool" in model_retry_tool_span["name"]
+        assert (
+            model_retry_tool_span["attributes"]["gen_ai.operation.name"]
+            == "execute_tool"
+        )
+        assert model_retry_tool_span["attributes"]["gen_ai.tool.name"] == "add_numbers"
+        assert "gen_ai.tool.input" in model_retry_tool_span["attributes"]
+
+        tool_span = tool_spans[1]
+        assert "execute_tool" in tool_span["name"]
+        assert tool_span["attributes"]["gen_ai.operation.name"] == "execute_tool"
+        assert tool_span["attributes"]["gen_ai.tool.name"] == "add_numbers"
+        assert "gen_ai.tool.input" in tool_span["attributes"]
+        assert "gen_ai.tool.output" in tool_span["attributes"]
+
+        # Check chat spans have available_tools
+        for chat_span in chat_spans:
+            assert "gen_ai.request.available_tools" in chat_span["attributes"]
+            available_tools_str = chat_span["attributes"][
+                "gen_ai.request.available_tools"
+            ]
+
+            # Available tools is serialized as a string
+            assert "add_numbers" in available_tools_str
+    else:
+        events = capture_events()
+
+        result = await test_agent.run("What is 5 + 3?")
+
+        assert result is not None
 
-    tool_span = tool_spans[1]
-    assert "execute_tool" in tool_span["name"]
-    assert tool_span["attributes"]["gen_ai.operation.name"] == "execute_tool"
-    assert tool_span["attributes"]["gen_ai.tool.name"] == "add_numbers"
-    assert "gen_ai.tool.input" in tool_span["attributes"]
-    assert "gen_ai.tool.output" in tool_span["attributes"]
+        if handled_tool_call_exceptions:
+            (error, transaction) = events
+        else:
+            (transaction,) = events
+        spans = transaction["spans"]
 
-    # Check chat spans have available_tools
-    for chat_span in chat_spans:
-        assert "gen_ai.request.available_tools" in chat_span["attributes"]
-        available_tools_str = chat_span["attributes"]["gen_ai.request.available_tools"]
-        # Available tools is serialized as a string
-        assert "add_numbers" in available_tools_str
+        if handled_tool_call_exceptions:
+            assert error["level"] == "error"
+            assert error["exception"]["values"][0]["mechanism"]["handled"]
 
+        # Find child span types (invoke_agent is the transaction, not a child span)
+        chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
+        tool_spans = [s for s in spans if s["op"] == "gen_ai.execute_tool"]
 
+        # Should have tool spans
+        assert len(tool_spans) >= 1
+
+        # Check tool spans
+        model_retry_tool_span = tool_spans[0]
+        assert "execute_tool" in model_retry_tool_span["description"]
+        assert model_retry_tool_span["data"]["gen_ai.operation.name"] == "execute_tool"
+        assert model_retry_tool_span["data"]["gen_ai.tool.name"] == "add_numbers"
+        assert "gen_ai.tool.input" in model_retry_tool_span["data"]
+
+        tool_span = tool_spans[1]
+        assert "execute_tool" in tool_span["description"]
+        assert tool_span["data"]["gen_ai.operation.name"] == "execute_tool"
+        assert tool_span["data"]["gen_ai.tool.name"] == "add_numbers"
+        assert "gen_ai.tool.input" in tool_span["data"]
+        assert "gen_ai.tool.output" in tool_span["data"]
+
+        # Check chat spans have available_tools
+        for chat_span in chat_spans:
+            assert "gen_ai.request.available_tools" in chat_span["data"]
+            available_tools_str = chat_span["data"]["gen_ai.request.available_tools"]
+            # Available tools is serialized as a string
+            assert "add_numbers" in available_tools_str
+
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.parametrize(
     "handled_tool_call_exceptions",
     [False, True],
 )
 @pytest.mark.asyncio
 async def test_agent_with_tool_validation_error(
-    sentry_init, capture_items, get_test_agent, handled_tool_call_exceptions
+    sentry_init,
+    capture_events,
+    capture_items,
+    get_test_agent,
+    handled_tool_call_exceptions,
+    stream_gen_ai_spans,
 ):
     """
     Test that a handled exception is captured when a tool has unsatisfiable constraints.
@@ -470,6 +790,7 @@ async def test_agent_with_tool_validation_error(
         ],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
 
     test_agent = get_test_agent()
@@ -479,54 +800,109 @@ def add_numbers(a: Annotated[int, Field(gt=0, lt=0)], b: int) -> int:
         """Add two numbers together."""
         return a + b
 
-    items = capture_items("event", "transaction", "span")
+    if stream_gen_ai_spans:
+        items = capture_items("event", "transaction", "span")
 
-    result = None
-    with pytest.raises(UnexpectedModelBehavior):
-        result = await test_agent.run("What is 5 + 3?")
+        result = None
+        with pytest.raises(UnexpectedModelBehavior):
+            result = await test_agent.run("What is 5 + 3?")
 
-    assert result is None
+        assert result is None
 
-    if handled_tool_call_exceptions:
-        (
-            error,
-            model_behaviour_error,
-        ) = (item.payload for item in items if item.type == "event")
-        assert error["level"] == "error"
-        assert error["exception"]["values"][0]["mechanism"]["handled"]
+        if handled_tool_call_exceptions:
+            (
+                error,
+                model_behaviour_error,
+            ) = (item.payload for item in items if item.type == "event")
 
-    spans = [item.payload for item in items if item.type == "span"]
-    chat_spans = [
-        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
-    ]
-    tool_spans = [
-        s
-        for s in spans
-        if s["attributes"].get("sentry.op", "") == "gen_ai.execute_tool"
-    ]
-
-    # Should have tool spans
-    assert len(tool_spans) >= 1
+            assert error["level"] == "error"
+            assert error["exception"]["values"][0]["mechanism"]["handled"]
 
-    # Check tool spans
-    model_retry_tool_span = tool_spans[0]
-    assert "execute_tool" in model_retry_tool_span["name"]
-    assert (
-        model_retry_tool_span["attributes"]["gen_ai.operation.name"] == "execute_tool"
-    )
-    assert model_retry_tool_span["attributes"]["gen_ai.tool.name"] == "add_numbers"
-    assert "gen_ai.tool.input" in model_retry_tool_span["attributes"]
-
-    # Check chat spans have available_tools
-    for chat_span in chat_spans:
-        assert "gen_ai.request.available_tools" in chat_span["attributes"]
-        available_tools_str = chat_span["attributes"]["gen_ai.request.available_tools"]
-        # Available tools is serialized as a string
-        assert "add_numbers" in available_tools_str
+        spans = [item.payload for item in items if item.type == "span"]
+        chat_spans = [
+            s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+        ]
+        tool_spans = [
+            s
+            for s in spans
+            if s["attributes"].get("sentry.op", "") == "gen_ai.execute_tool"
+        ]
 
+        # Should have tool spans
+        assert len(tool_spans) >= 1
 
+        # Check tool spans
+        model_retry_tool_span = tool_spans[0]
+        assert "execute_tool" in model_retry_tool_span["name"]
+        assert (
+            model_retry_tool_span["attributes"]["gen_ai.operation.name"]
+            == "execute_tool"
+        )
+        assert model_retry_tool_span["attributes"]["gen_ai.tool.name"] == "add_numbers"
+        assert "gen_ai.tool.input" in model_retry_tool_span["attributes"]
+
+        # Check chat spans have available_tools
+        for chat_span in chat_spans:
+            assert "gen_ai.request.available_tools" in chat_span["attributes"]
+            available_tools_str = chat_span["attributes"][
+                "gen_ai.request.available_tools"
+            ]
+
+            # Available tools is serialized as a string
+            assert "add_numbers" in available_tools_str
+    else:
+        events = capture_events()
+
+        result = None
+        with pytest.raises(UnexpectedModelBehavior):
+            result = await test_agent.run("What is 5 + 3?")
+
+        assert result is None
+
+        if handled_tool_call_exceptions:
+            (error, model_behaviour_error, transaction) = events
+        else:
+            (
+                model_behaviour_error,
+                transaction,
+            ) = events
+        spans = transaction["spans"]
+
+        if handled_tool_call_exceptions:
+            assert error["level"] == "error"
+            assert error["exception"]["values"][0]["mechanism"]["handled"]
+
+        # Find child span types (invoke_agent is the transaction, not a child span)
+        chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
+        tool_spans = [s for s in spans if s["op"] == "gen_ai.execute_tool"]
+
+        # Should have tool spans
+        assert len(tool_spans) >= 1
+
+        # Check tool spans
+        model_retry_tool_span = tool_spans[0]
+        assert "execute_tool" in model_retry_tool_span["description"]
+        assert model_retry_tool_span["data"]["gen_ai.operation.name"] == "execute_tool"
+        assert model_retry_tool_span["data"]["gen_ai.tool.name"] == "add_numbers"
+        assert "gen_ai.tool.input" in model_retry_tool_span["data"]
+
+        # Check chat spans have available_tools
+        for chat_span in chat_spans:
+            assert "gen_ai.request.available_tools" in chat_span["data"]
+            available_tools_str = chat_span["data"]["gen_ai.request.available_tools"]
+            # Available tools is serialized as a string
+            assert "add_numbers" in available_tools_str
+
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
-async def test_agent_with_tools_streaming(sentry_init, capture_items, get_test_agent):
+async def test_agent_with_tools_streaming(
+    sentry_init,
+    capture_events,
+    capture_items,
+    get_test_agent,
+    stream_gen_ai_spans,
+):
     """
     Test that tool execution works correctly with streaming.
     """
@@ -534,6 +910,7 @@ async def test_agent_with_tools_streaming(sentry_init, capture_items, get_test_a
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
 
     test_agent = get_test_agent()
@@ -543,68 +920,123 @@ def multiply(a: int, b: int) -> int:
         """Multiply two numbers."""
         return a * b
 
-    items = capture_items("transaction", "span")
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
 
-    async with test_agent.run_stream("What is 7 times 8?") as result:
-        async for _ in result.stream_output():
-            pass
+        async with test_agent.run_stream("What is 7 times 8?") as result:
+            async for _ in result.stream_output():
+                pass
 
-    spans = [item.payload for item in items if item.type == "span"]
+        spans = [item.payload for item in items if item.type == "span"]
 
-    # Find span types
-    chat_spans = [
-        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
-    ]
-    tool_spans = [
-        s
-        for s in spans
-        if s["attributes"].get("sentry.op", "") == "gen_ai.execute_tool"
-    ]
+        # Find span types
+        chat_spans = [
+            s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+        ]
+        tool_spans = [
+            s
+            for s in spans
+            if s["attributes"].get("sentry.op", "") == "gen_ai.execute_tool"
+        ]
+
+        # Should have tool spans
+        assert len(tool_spans) >= 1
+
+        # Verify streaming flag is True
+        for chat_span in chat_spans:
+            assert chat_span["attributes"]["gen_ai.response.streaming"] is True
 
-    # Should have tool spans
-    assert len(tool_spans) >= 1
+        # Check tool span
+        tool_span = tool_spans[0]
+        assert tool_span["attributes"]["gen_ai.tool.name"] == "multiply"
+        assert "gen_ai.tool.input" in tool_span["attributes"]
+        assert "gen_ai.tool.output" in tool_span["attributes"]
+    else:
+        events = capture_events()
+
+        async with test_agent.run_stream("What is 7 times 8?") as result:
+            async for _ in result.stream_output():
+                pass
+
+        (transaction,) = events
+        spans = transaction["spans"]
+
+        # Find span types
+        chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
+        tool_spans = [s for s in spans if s["op"] == "gen_ai.execute_tool"]
+
+        # Should have tool spans
+        assert len(tool_spans) >= 1
 
-    # Verify streaming flag is True
-    for chat_span in chat_spans:
-        assert chat_span["attributes"]["gen_ai.response.streaming"] is True
+        # Verify streaming flag is True
+        for chat_span in chat_spans:
+            assert chat_span["data"]["gen_ai.response.streaming"] is True
 
-    # Check tool span
-    tool_span = tool_spans[0]
-    assert tool_span["attributes"]["gen_ai.tool.name"] == "multiply"
-    assert "gen_ai.tool.input" in tool_span["attributes"]
-    assert "gen_ai.tool.output" in tool_span["attributes"]
+        # Check tool span
+        tool_span = tool_spans[0]
+        assert tool_span["data"]["gen_ai.tool.name"] == "multiply"
+        assert "gen_ai.tool.input" in tool_span["data"]
+        assert "gen_ai.tool.output" in tool_span["data"]
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
-async def test_model_settings(sentry_init, capture_items, get_test_agent_with_settings):
+async def test_model_settings(
+    sentry_init,
+    capture_events,
+    capture_items,
+    get_test_agent_with_settings,
+    stream_gen_ai_spans,
+):
     """
     Test that model settings are captured in spans.
     """
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
 
-    items = capture_items("transaction", "span")
-
     test_agent_with_settings = get_test_agent_with_settings()
-    await test_agent_with_settings.run("Test input")
 
-    spans = [item.payload for item in items if item.type == "span"]
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
 
-    # Find chat span
-    chat_spans = [
-        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
-    ]
-    assert len(chat_spans) >= 1
+        await test_agent_with_settings.run("Test input")
 
-    chat_span = chat_spans[0]
-    # Check that model settings are captured
-    assert chat_span["attributes"].get("gen_ai.request.temperature") == 0.7
-    assert chat_span["attributes"].get("gen_ai.request.max_tokens") == 100
-    assert chat_span["attributes"].get("gen_ai.request.top_p") == 0.9
+        spans = [item.payload for item in items if item.type == "span"]
 
+        # Find chat span
+        chat_spans = [
+            s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+        ]
+        assert len(chat_spans) >= 1
+
+        chat_span = chat_spans[0]
+        # Check that model settings are captured
+        assert chat_span["attributes"].get("gen_ai.request.temperature") == 0.7
+        assert chat_span["attributes"].get("gen_ai.request.max_tokens") == 100
+        assert chat_span["attributes"].get("gen_ai.request.top_p") == 0.9
+    else:
+        events = capture_events()
+
+        await test_agent_with_settings.run("Test input")
+
+        (transaction,) = events
+        spans = transaction["spans"]
+
+        # Find chat span
+        chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
+        assert len(chat_spans) >= 1
+
+        chat_span = chat_spans[0]
+        # Check that model settings are captured
+        assert chat_span["data"].get("gen_ai.request.temperature") == 0.7
+        assert chat_span["data"].get("gen_ai.request.max_tokens") == 100
+        assert chat_span["data"].get("gen_ai.request.top_p") == 0.9
 
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
 @pytest.mark.parametrize(
     "send_default_pii, include_prompts",
@@ -616,7 +1048,12 @@ async def test_model_settings(sentry_init, capture_items, get_test_agent_with_se
     ],
 )
 async def test_system_prompt_attribute(
-    sentry_init, capture_items, send_default_pii, include_prompts
+    sentry_init,
+    capture_events,
+    capture_items,
+    send_default_pii,
+    include_prompts,
+    stream_gen_ai_spans,
 ):
     """
     Test that system prompts are included as the first message.
@@ -631,38 +1068,72 @@ async def test_system_prompt_attribute(
         integrations=[PydanticAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
 
-    items = capture_items("transaction", "span")
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
 
-    await agent.run("Hello")
+        await agent.run("Hello")
 
-    spans = [item.payload for item in items if item.type == "span"]
+        spans = [item.payload for item in items if item.type == "span"]
 
-    # The transaction IS the invoke_agent span, check for messages in chat spans instead
-    chat_spans = [
-        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
-    ]
-    assert len(chat_spans) >= 1
+        # The transaction IS the invoke_agent span, check for messages in chat spans instead
+        chat_spans = [
+            s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+        ]
 
-    chat_span = chat_spans[0]
+        assert len(chat_spans) >= 1
 
-    if send_default_pii and include_prompts:
-        system_instructions = chat_span["attributes"][
-            SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS
-        ]
-        assert json.loads(system_instructions) == [
-            {
-                "type": "text",
-                "content": "You are a helpful assistant specialized in testing.",
-            }
-        ]
+        chat_span = chat_spans[0]
+
+        if send_default_pii and include_prompts:
+            system_instructions = chat_span["attributes"][
+                SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS
+            ]
+            assert json.loads(system_instructions) == [
+                {
+                    "type": "text",
+                    "content": "You are a helpful assistant specialized in testing.",
+                }
+            ]
+        else:
+            assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_span["attributes"]
     else:
-        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_span["attributes"]
+        events = capture_events()
+
+        await agent.run("Hello")
+
+        (transaction,) = events
+        spans = transaction["spans"]
 
+        # The transaction IS the invoke_agent span, check for messages in chat spans instead
+        chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
 
+        assert len(chat_spans) >= 1
+
+        chat_span = chat_spans[0]
+
+        if send_default_pii and include_prompts:
+            system_instructions = chat_span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
+            assert json.loads(system_instructions) == [
+                {
+                    "type": "text",
+                    "content": "You are a helpful assistant specialized in testing.",
+                }
+            ]
+        else:
+            assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_span["data"]
+
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
-async def test_error_handling(sentry_init, capture_items):
+async def test_error_handling(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     """
     Test error handling in agent execution.
     """
@@ -676,23 +1147,42 @@ async def test_error_handling(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
 
-    items = capture_items("transaction", "span")
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
 
-    # Simple run that should succeed
-    await agent.run("Hello")
+        # Simple run that should succeed
+        await agent.run("Hello")
+
+        # At minimum, we should have a transaction
+        transaction = next(item.payload for item in items if item.type == "transaction")
+    else:
+        events = capture_events()
+
+        # Simple run that should succeed
+        await agent.run("Hello")
+
+        # At minimum, we should have a transaction
+        assert len(events) >= 1
+        transaction = [e for e in events if e.get("type") == "transaction"][0]
 
-    # At minimum, we should have a transaction
-    transaction = next(item.payload for item in items if item.type == "transaction")
     assert transaction["transaction"] == "invoke_agent test_error"
     # Transaction should complete successfully (status key may not exist if no error)
     trace_status = transaction["contexts"]["trace"].get("status")
     assert trace_status != "error"  # Could be None or some other status
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
-async def test_without_pii(sentry_init, capture_items, get_test_agent):
+async def test_without_pii(
+    sentry_init,
+    capture_events,
+    capture_items,
+    get_test_agent,
+    stream_gen_ai_spans,
+):
     """
     Test that PII is not captured when send_default_pii is False.
     """
@@ -700,28 +1190,53 @@ async def test_without_pii(sentry_init, capture_items, get_test_agent):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=False,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
 
-    items = capture_items("transaction", "span")
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
 
-    test_agent = get_test_agent()
-    await test_agent.run("Sensitive input")
+        test_agent = get_test_agent()
+        await test_agent.run("Sensitive input")
 
-    spans = [item.payload for item in items if item.type == "span"]
+        spans = [item.payload for item in items if item.type == "span"]
 
-    # Find child spans (invoke_agent is the transaction, not a child span)
-    chat_spans = [
-        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
-    ]
+        # Find child spans (invoke_agent is the transaction, not a child span)
+        chat_spans = [
+            s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+        ]
+
+        # Verify that messages and response text are not captured
+        for span in chat_spans:
+            assert "gen_ai.request.messages" not in span["attributes"]
+            assert "gen_ai.response.text" not in span["attributes"]
+    else:
+        events = capture_events()
 
-    # Verify that messages and response text are not captured
-    for span in chat_spans:
-        assert "gen_ai.request.messages" not in span["attributes"]
-        assert "gen_ai.response.text" not in span["attributes"]
+        test_agent = get_test_agent()
+        await test_agent.run("Sensitive input")
 
+        (transaction,) = events
+        spans = transaction["spans"]
 
+        # Find child spans (invoke_agent is the transaction, not a child span)
+        chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
+
+        # Verify that messages and response text are not captured
+        for span in chat_spans:
+            assert "gen_ai.request.messages" not in span["data"]
+            assert "gen_ai.response.text" not in span["data"]
+
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
-async def test_without_pii_tools(sentry_init, capture_items, get_test_agent):
+async def test_without_pii_tools(
+    sentry_init,
+    capture_events,
+    capture_items,
+    get_test_agent,
+    stream_gen_ai_spans,
+):
     """
     Test that tool input/output are not captured when send_default_pii is False.
     """
@@ -729,6 +1244,7 @@ async def test_without_pii_tools(sentry_init, capture_items, get_test_agent):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=False,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
 
     test_agent = get_test_agent()
@@ -738,56 +1254,103 @@ def sensitive_tool(data: str) -> str:
         """A tool with sensitive data."""
         return f"Processed: {data}"
 
-    items = capture_items("transaction", "span")
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
 
-    await test_agent.run("Use sensitive tool with private data")
+        await test_agent.run("Use sensitive tool with private data")
 
-    spans = [item.payload for item in items if item.type == "span"]
+        spans = [item.payload for item in items if item.type == "span"]
 
-    # Find tool spans
-    tool_spans = [
-        s
-        for s in spans
-        if s["attributes"].get("sentry.op", "") == "gen_ai.execute_tool"
-    ]
+        # Find tool spans
+        tool_spans = [
+            s
+            for s in spans
+            if s["attributes"].get("sentry.op", "") == "gen_ai.execute_tool"
+        ]
+
+        # If tool was executed, verify input/output are not captured
+        for tool_span in tool_spans:
+            assert "gen_ai.tool.input" not in tool_span["attributes"]
+            assert "gen_ai.tool.output" not in tool_span["attributes"]
+    else:
+        events = capture_events()
+
+        await test_agent.run("Use sensitive tool with private data")
 
-    # If tool was executed, verify input/output are not captured
-    for tool_span in tool_spans:
-        assert "gen_ai.tool.input" not in tool_span["attributes"]
-        assert "gen_ai.tool.output" not in tool_span["attributes"]
+        (transaction,) = events
+        spans = transaction["spans"]
 
+        # Find tool spans
+        tool_spans = [s for s in spans if s["op"] == "gen_ai.execute_tool"]
 
+        # If tool was executed, verify input/output are not captured
+        for tool_span in tool_spans:
+            assert "gen_ai.tool.input" not in tool_span["data"]
+            assert "gen_ai.tool.output" not in tool_span["data"]
+
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
-async def test_multiple_agents_concurrent(sentry_init, capture_items, get_test_agent):
+async def test_multiple_agents_concurrent(
+    sentry_init,
+    capture_events,
+    capture_items,
+    get_test_agent,
+    stream_gen_ai_spans,
+):
     """
     Test that multiple agents can run concurrently without interfering.
     """
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
 
-    items = capture_items("transaction", "span")
-
     test_agent = get_test_agent()
 
     async def run_agent(input_text):
         return await test_agent.run(input_text)
 
-    # Run 3 agents concurrently
-    results = await asyncio.gather(*[run_agent(f"Input {i}") for i in range(3)])
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
 
-    assert len(results) == 3
+        # Run 3 agents concurrently
+        results = await asyncio.gather(*[run_agent(f"Input {i}") for i in range(3)])
 
-    # Verify each transaction is separate
-    events = [item.payload for item in items if item.type == "transaction"]
-    assert len(events) == 3
-    for i, transaction in enumerate(events):
-        assert transaction["transaction"] == "invoke_agent test_agent"
+        assert len(results) == 3
+
+        # Verify each transaction is separate
+        events = [item.payload for item in items if item.type == "transaction"]
+        assert len(events) == 3
+        for i, transaction in enumerate(events):
+            assert transaction["transaction"] == "invoke_agent test_agent"
+    else:
+        events = capture_events()
+
+        # Run 3 agents concurrently
+        results = await asyncio.gather(*[run_agent(f"Input {i}") for i in range(3)])
+
+        assert len(results) == 3
 
+        assert len(events) == 3
 
+        # Verify each transaction is separate
+        for i, transaction in enumerate(events):
+            assert transaction["type"] == "transaction"
+            assert transaction["transaction"] == "invoke_agent test_agent"
+            # Each should have its own spans
+            assert len(transaction["spans"]) >= 1
+
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
-async def test_message_history(sentry_init, capture_items):
+async def test_message_history(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     """
     Test that full conversation history is captured in chat spans.
     """
@@ -800,10 +1363,9 @@ async def test_message_history(sentry_init, capture_items):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
 
-    items = capture_items("transaction", "span")
-
     # First message
     await agent.run("Hello, I'm Alice")
 
@@ -820,58 +1382,114 @@ async def test_message_history(sentry_init, capture_items):
         ),
     ]
 
-    await agent.run("What is my name?", message_history=history)
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
 
-    # We should have 2 transactions
-    events = [item.payload for item in items if item.type == "transaction"]
-    assert len(events) >= 2
+        await agent.run("What is my name?", message_history=history)
 
-    # Check the second transaction has the full history
-    second_transaction = events[1]
-    spans = second_transaction["spans"]
-    chat_spans = [
-        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
-    ]
+        # We should have 2 transactions
+        events = [item.payload for item in items if item.type == "transaction"]
 
-    if chat_spans:
-        chat_span = chat_spans[0]
-        if "gen_ai.request.messages" in chat_span["attributes"]:
-            messages_data = chat_span["attributes"]["gen_ai.request.messages"]
-            # Should have multiple messages including history
-            assert len(messages_data) > 1
+        # Check the second transaction has the full history
+        second_transaction = events[1]
+        spans = second_transaction["spans"]
+
+        chat_spans = [
+            s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+        ]
+
+        if chat_spans:
+            chat_span = chat_spans[0]
+            if "gen_ai.request.messages" in chat_span["attributes"]:
+                messages_data = chat_span["attributes"]["gen_ai.request.messages"]
+                # Should have multiple messages including history
+                assert len(messages_data) > 1
+    else:
+        events = capture_events()
+
+        await agent.run("What is my name?", message_history=history)
+
+        # We should have 2 transactions
+        assert len(events) >= 2
 
+        # Check the second transaction has the full history
+        second_transaction = events[1]
+        spans = second_transaction["spans"]
 
+        chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
+
+        if chat_spans:
+            chat_span = chat_spans[0]
+            if "gen_ai.request.messages" in chat_span["data"]:
+                messages_data = chat_span["data"]["gen_ai.request.messages"]
+                # Should have multiple messages including history
+                assert len(messages_data) > 1
+
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
-async def test_gen_ai_system(sentry_init, capture_items, get_test_agent):
+async def test_gen_ai_system(
+    sentry_init,
+    capture_events,
+    capture_items,
+    get_test_agent,
+    stream_gen_ai_spans,
+):
     """
     Test that gen_ai.system is set from the model.
     """
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
 
-    items = capture_items("transaction", "span")
-
     test_agent = get_test_agent()
-    await test_agent.run("Test input")
 
-    spans = [item.payload for item in items if item.type == "span"]
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
 
-    # Find chat span
-    chat_spans = [
-        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
-    ]
-    assert len(chat_spans) >= 1
+        await test_agent.run("Test input")
 
-    chat_span = chat_spans[0]
-    # gen_ai.system should be set from the model (TestModel -> 'test')
-    assert "gen_ai.system" in chat_span["attributes"]
-    assert chat_span["attributes"]["gen_ai.system"] == "test"
+        spans = [item.payload for item in items if item.type == "span"]
+
+        # Find chat span
+        chat_spans = [
+            s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+        ]
+        assert len(chat_spans) >= 1
+
+        chat_span = chat_spans[0]
+        # gen_ai.system should be set from the model (TestModel -> 'test')
+        assert "gen_ai.system" in chat_span["attributes"]
+        assert chat_span["attributes"]["gen_ai.system"] == "test"
+    else:
+        events = capture_events()
 
+        await test_agent.run("Test input")
 
+        (transaction,) = events
+        spans = transaction["spans"]
+
+        # Find chat span
+        chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
+        assert len(chat_spans) >= 1
+
+        chat_span = chat_spans[0]
+        # gen_ai.system should be set from the model (TestModel -> 'test')
+        assert "gen_ai.system" in chat_span["data"]
+        assert chat_span["data"]["gen_ai.system"] == "test"
+
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
-async def test_include_prompts_false(sentry_init, capture_items, get_test_agent):
+async def test_include_prompts_false(
+    sentry_init,
+    capture_events,
+    capture_items,
+    get_test_agent,
+    stream_gen_ai_spans,
+):
     """
     Test that prompts are not captured when include_prompts=False.
     """
@@ -879,28 +1497,53 @@ async def test_include_prompts_false(sentry_init, capture_items, get_test_agent)
         integrations=[PydanticAIIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
         send_default_pii=True,  # Even with PII enabled, prompts should not be captured
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
 
-    items = capture_items("transaction", "span")
-
     test_agent = get_test_agent()
-    await test_agent.run("Sensitive prompt")
 
-    spans = [item.payload for item in items if item.type == "span"]
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
 
-    # Find child spans (invoke_agent is the transaction, not a child span)
-    chat_spans = [
-        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
-    ]
+        await test_agent.run("Sensitive prompt")
+
+        spans = [item.payload for item in items if item.type == "span"]
 
-    # Verify that messages and response text are not captured
-    for span in chat_spans:
-        assert "gen_ai.request.messages" not in span["attributes"]
-        assert "gen_ai.response.text" not in span["attributes"]
+        # Find child spans (invoke_agent is the transaction, not a child span)
+        chat_spans = [
+            s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+        ]
+
+        # Verify that messages and response text are not captured
+        for span in chat_spans:
+            assert "gen_ai.request.messages" not in span["attributes"]
+            assert "gen_ai.response.text" not in span["attributes"]
+    else:
+        events = capture_events()
+
+        await test_agent.run("Sensitive prompt")
+
+        (transaction,) = events
+        spans = transaction["spans"]
 
+        # Find child spans (invoke_agent is the transaction, not a child span)
+        chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
 
+        # Verify that messages and response text are not captured
+        for span in chat_spans:
+            assert "gen_ai.request.messages" not in span["data"]
+            assert "gen_ai.response.text" not in span["data"]
+
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
-async def test_include_prompts_true(sentry_init, capture_items, get_test_agent):
+async def test_include_prompts_true(
+    sentry_init,
+    capture_events,
+    capture_items,
+    get_test_agent,
+    stream_gen_ai_spans,
+):
     """
     Test that prompts are captured when include_prompts=True (default).
     """
@@ -908,29 +1551,52 @@ async def test_include_prompts_true(sentry_init, capture_items, get_test_agent):
         integrations=[PydanticAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
 
-    items = capture_items("transaction", "span")
-
     test_agent = get_test_agent()
-    await test_agent.run("Test prompt")
 
-    spans = [item.payload for item in items if item.type == "span"]
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
 
-    # Find child spans (invoke_agent is the transaction, not a child span)
-    chat_spans = [
-        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
-    ]
+        await test_agent.run("Test prompt")
 
-    # Verify that messages are captured in chat spans
-    assert len(chat_spans) >= 1
-    for chat_span in chat_spans:
-        assert "gen_ai.request.messages" in chat_span["attributes"]
+        spans = [item.payload for item in items if item.type == "span"]
+
+        # Find child spans (invoke_agent is the transaction, not a child span)
+        chat_spans = [
+            s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+        ]
+
+        # Verify that messages are captured in chat spans
+        assert len(chat_spans) >= 1
+        for chat_span in chat_spans:
+            assert "gen_ai.request.messages" in chat_span["attributes"]
+    else:
+        events = capture_events()
+
+        await test_agent.run("Test prompt")
 
+        (transaction,) = events
+        spans = transaction["spans"]
 
+        # Find child spans (invoke_agent is the transaction, not a child span)
+        chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
+
+        # Verify that messages are captured in chat spans
+        assert len(chat_spans) >= 1
+        for chat_span in chat_spans:
+            assert "gen_ai.request.messages" in chat_span["data"]
+
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
 async def test_include_prompts_false_with_tools(
-    sentry_init, capture_items, get_test_agent
+    sentry_init,
+    capture_events,
+    capture_items,
+    get_test_agent,
+    stream_gen_ai_spans,
 ):
     """
     Test that tool input/output are not captured when include_prompts=False.
@@ -939,6 +1605,7 @@ async def test_include_prompts_false_with_tools(
         integrations=[PydanticAIIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
 
     test_agent = get_test_agent()
@@ -948,27 +1615,50 @@ def test_tool(value: int) -> int:
         """A test tool."""
         return value * 2
 
-    items = capture_items("transaction", "span")
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
 
-    await test_agent.run("Use the test tool with value 5")
+        await test_agent.run("Use the test tool with value 5")
 
-    spans = [item.payload for item in items if item.type == "span"]
+        spans = [item.payload for item in items if item.type == "span"]
 
-    # Find tool spans
-    tool_spans = [
-        s
-        for s in spans
-        if s["attributes"].get("sentry.op", "") == "gen_ai.execute_tool"
-    ]
+        # Find tool spans
+        tool_spans = [
+            s
+            for s in spans
+            if s["attributes"].get("sentry.op", "") == "gen_ai.execute_tool"
+        ]
+
+        # If tool was executed, verify input/output are not captured
+        for tool_span in tool_spans:
+            assert "gen_ai.tool.input" not in tool_span["attributes"]
+            assert "gen_ai.tool.output" not in tool_span["attributes"]
+    else:
+        events = capture_events()
+
+        await test_agent.run("Use the test tool with value 5")
 
-    # If tool was executed, verify input/output are not captured
-    for tool_span in tool_spans:
-        assert "gen_ai.tool.input" not in tool_span["attributes"]
-        assert "gen_ai.tool.output" not in tool_span["attributes"]
+        (transaction,) = events
+        spans = transaction["spans"]
 
+        # Find tool spans
+        tool_spans = [s for s in spans if s["op"] == "gen_ai.execute_tool"]
 
+        # If tool was executed, verify input/output are not captured
+        for tool_span in tool_spans:
+            assert "gen_ai.tool.input" not in tool_span["data"]
+            assert "gen_ai.tool.output" not in tool_span["data"]
+
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
-async def test_include_prompts_requires_pii(sentry_init, capture_items, get_test_agent):
+async def test_include_prompts_requires_pii(
+    sentry_init,
+    capture_events,
+    capture_items,
+    get_test_agent,
+    stream_gen_ai_spans,
+):
     """
     Test that include_prompts requires send_default_pii=True.
     """
@@ -976,28 +1666,52 @@ async def test_include_prompts_requires_pii(sentry_init, capture_items, get_test
         integrations=[PydanticAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=False,  # PII disabled
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
 
-    items = capture_items("transaction", "span")
-
     test_agent = get_test_agent()
-    await test_agent.run("Test prompt")
 
-    spans = [item.payload for item in items if item.type == "span"]
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
 
-    # Find child spans (invoke_agent is the transaction, not a child span)
-    chat_spans = [
-        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
-    ]
+        await test_agent.run("Test prompt")
+
+        spans = [item.payload for item in items if item.type == "span"]
+
+        # Find child spans (invoke_agent is the transaction, not a child span)
+        chat_spans = [
+            s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+        ]
 
-    # Even with include_prompts=True, if PII is disabled, messages should not be captured
-    for span in chat_spans:
-        assert "gen_ai.request.messages" not in span["attributes"]
-        assert "gen_ai.response.text" not in span["attributes"]
+        # Even with include_prompts=True, if PII is disabled, messages should not be captured
+        for span in chat_spans:
+            assert "gen_ai.request.messages" not in span["attributes"]
+            assert "gen_ai.response.text" not in span["attributes"]
+    else:
+        events = capture_events()
+
+        await test_agent.run("Test prompt")
+
+        (transaction,) = events
+        spans = transaction["spans"]
+
+        # Find child spans (invoke_agent is the transaction, not a child span)
+        chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
 
+        # Even with include_prompts=True, if PII is disabled, messages should not be captured
+        for span in chat_spans:
+            assert "gen_ai.request.messages" not in span["data"]
+            assert "gen_ai.response.text" not in span["data"]
 
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
-async def test_mcp_tool_execution_spans(sentry_init, capture_items):
+async def test_mcp_tool_execution_spans(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     """
     Test that MCP (Model Context Protocol) tool calls create execute_tool spans.
 
@@ -1067,54 +1781,113 @@ async def mock_map_tool_result_part(part):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
 
-    items = capture_items("transaction", "span")
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        # Simulate MCP tool execution within a transaction through CombinedToolset
+        with sentry_sdk.start_transaction(
+            op="ai.run", name="invoke_agent test_mcp_agent"
+        ):
+            # Set up the agent context
+            scope = sentry_sdk.get_current_scope()
+            scope._contexts["pydantic_ai_agent"] = {
+                "_agent": agent,
+            }
 
-    # Simulate MCP tool execution within a transaction through CombinedToolset
-    with sentry_sdk.start_transaction(op="ai.run", name="invoke_agent test_mcp_agent"):
-        # Set up the agent context
-        scope = sentry_sdk.get_current_scope()
-        scope._contexts["pydantic_ai_agent"] = {
-            "_agent": agent,
-        }
+            # Create a mock tool that simulates an MCP tool from CombinedToolset
+            from pydantic_ai._run_context import RunContext
+            from pydantic_ai.result import RunUsage
+            from pydantic_ai.models.test import TestModel
+            from pydantic_ai.toolsets.combined import _CombinedToolsetTool
+
+            ctx = RunContext(
+                deps=None,
+                model=TestModel(),
+                usage=RunUsage(),
+                retry=0,
+                tool_name="test_mcp_tool",
+            )
 
-        # Create a mock tool that simulates an MCP tool from CombinedToolset
-        from pydantic_ai._run_context import RunContext
-        from pydantic_ai.result import RunUsage
-        from pydantic_ai.models.test import TestModel
-        from pydantic_ai.toolsets.combined import _CombinedToolsetTool
-
-        ctx = RunContext(
-            deps=None,
-            model=TestModel(),
-            usage=RunUsage(),
-            retry=0,
-            tool_name="test_mcp_tool",
-        )
+            tool_name = "test_mcp_tool"
+
+            # Create a tool that points to the MCP server
+            # This simulates how CombinedToolset wraps tools from different sources
+            tool = _CombinedToolsetTool(
+                toolset=combined,
+                tool_def=MagicMock(name=tool_name),
+                max_retries=0,
+                args_validator=MagicMock(),
+                source_toolset=mock_server,
+                source_tool=MagicMock(),
+            )
 
-        tool_name = "test_mcp_tool"
-
-        # Create a tool that points to the MCP server
-        # This simulates how CombinedToolset wraps tools from different sources
-        tool = _CombinedToolsetTool(
-            toolset=combined,
-            tool_def=MagicMock(name=tool_name),
-            max_retries=0,
-            args_validator=MagicMock(),
-            source_toolset=mock_server,
-            source_tool=MagicMock(),
-        )
+            try:
+                await combined.call_tool(tool_name, {"query": "test"}, ctx, tool)
+            except Exception:
+                # MCP tool might raise if not fully mocked, that's okay
+                pass
 
-        try:
-            await combined.call_tool(tool_name, {"query": "test"}, ctx, tool)
-        except Exception:
-            # MCP tool might raise if not fully mocked, that's okay
-            pass
+        events_list = items
+        if len(events_list) == 0:
+            pytest.skip("No events captured, MCP test setup incomplete")
+
+        (transaction,) = events_list
+        transaction["spans"]
+    else:
+        events = capture_events()
+
+        # Simulate MCP tool execution within a transaction through CombinedToolset
+        with sentry_sdk.start_transaction(
+            op="ai.run", name="invoke_agent test_mcp_agent"
+        ) as transaction:
+            # Set up the agent context
+            scope = sentry_sdk.get_current_scope()
+            scope._contexts["pydantic_ai_agent"] = {
+                "_agent": agent,
+            }
+
+            # Create a mock tool that simulates an MCP tool from CombinedToolset
+            from pydantic_ai._run_context import RunContext
+            from pydantic_ai.result import RunUsage
+            from pydantic_ai.models.test import TestModel
+            from pydantic_ai.toolsets.combined import _CombinedToolsetTool
+
+            ctx = RunContext(
+                deps=None,
+                model=TestModel(),
+                usage=RunUsage(),
+                retry=0,
+                tool_name="test_mcp_tool",
+            )
+
+            tool_name = "test_mcp_tool"
+
+            # Create a tool that points to the MCP server
+            # This simulates how CombinedToolset wraps tools from different sources
+            tool = _CombinedToolsetTool(
+                toolset=combined,
+                tool_def=MagicMock(name=tool_name),
+                max_retries=0,
+                args_validator=MagicMock(),
+                source_toolset=mock_server,
+                source_tool=MagicMock(),
+            )
+
+            try:
+                await combined.call_tool(tool_name, {"query": "test"}, ctx, tool)
+            except Exception:
+                # MCP tool might raise if not fully mocked, that's okay
+                pass
 
-    events_list = items
-    if len(events_list) == 0:
-        pytest.skip("No events captured, MCP test setup incomplete")
+        events_list = events
+        if len(events_list) == 0:
+            pytest.skip("No events captured, MCP test setup incomplete")
+
+        (transaction,) = events_list
+        transaction["spans"]
 
     # Note: This test manually calls combined.call_tool which doesn't go through
     # ToolManager._call_tool (which is what the integration patches).
@@ -1284,8 +2057,14 @@ async def run_and_check_context(agent, agent_name):
 # ==================== Additional Coverage Tests ====================
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
-async def test_invoke_agent_with_list_user_prompt(sentry_init, capture_items):
+async def test_invoke_agent_with_list_user_prompt(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     """
     Test that invoke_agent span handles list user prompts correctly.
     """
@@ -1298,16 +2077,26 @@ async def test_invoke_agent_with_list_user_prompt(sentry_init, capture_items):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
 
-    items = capture_items("transaction", "span")
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
 
-    # Use a list as user prompt
-    await agent.run(["First part", "Second part"])
+        # Use a list as user prompt
+        await agent.run(["First part", "Second part"])
+
+        (transaction,) = [item.payload for item in items if item.type == "transaction"]
+    else:
+        events = capture_events()
+
+        # Use a list as user prompt
+        await agent.run(["First part", "Second part"])
+
+        (transaction,) = events
 
     # Check that the invoke_agent transaction has messages data
     # The invoke_agent is the transaction itself
-    (transaction,) = [item.payload for item in items if item.type == "transaction"]
     if "gen_ai.request.messages" in transaction["contexts"]["trace"]["data"]:
         messages_str = transaction["contexts"]["trace"]["data"][
             "gen_ai.request.messages"
@@ -1316,6 +2105,7 @@ async def test_invoke_agent_with_list_user_prompt(sentry_init, capture_items):
         assert "Second part" in messages_str
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
 @pytest.mark.parametrize(
     "send_default_pii, include_prompts",
@@ -1327,7 +2117,12 @@ async def test_invoke_agent_with_list_user_prompt(sentry_init, capture_items):
     ],
 )
 async def test_invoke_agent_with_instructions(
-    sentry_init, capture_items, send_default_pii, include_prompts
+    sentry_init,
+    capture_events,
+    capture_items,
+    send_default_pii,
+    include_prompts,
+    stream_gen_ai_spans,
 ):
     """
     Test that invoke_agent span handles instructions correctly.
@@ -1348,32 +2143,59 @@ async def test_invoke_agent_with_instructions(
         integrations=[PydanticAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
 
-    items = capture_items("transaction", "span")
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
 
-    await agent.run("Test input")
+        await agent.run("Test input")
 
-    spans = [item.payload for item in items if item.type == "span"]
+        spans = [item.payload for item in items if item.type == "span"]
 
-    # The transaction IS the invoke_agent span, check for messages in chat spans instead
-    chat_spans = [
-        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
-    ]
-    assert len(chat_spans) >= 1
+        # The transaction IS the invoke_agent span, check for messages in chat spans instead
+        chat_spans = [
+            s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+        ]
 
-    chat_span = chat_spans[0]
+        assert len(chat_spans) >= 1
+
+        chat_span = chat_spans[0]
+
+        if send_default_pii and include_prompts:
+            system_instructions = chat_span["attributes"][
+                SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS
+            ]
+            assert json.loads(system_instructions) == [
+                {"type": "text", "content": "System prompt"},
+                {"type": "text", "content": "Instruction 1\nInstruction 2"},
+            ]
+        else:
+            assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_span["attributes"]
 
-    if send_default_pii and include_prompts:
-        system_instructions = chat_span["attributes"][
-            SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS
-        ]
-        assert json.loads(system_instructions) == [
-            {"type": "text", "content": "System prompt"},
-            {"type": "text", "content": "Instruction 1\nInstruction 2"},
-        ]
     else:
-        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_span["attributes"]
+        events = capture_events()
+
+        await agent.run("Test input")
+
+        (transaction,) = events
+        spans = transaction["spans"]
+
+        # The transaction IS the invoke_agent span, check for messages in chat spans instead
+        chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
+
+        assert len(chat_spans) >= 1
+
+        chat_span = chat_spans[0]
+
+        if send_default_pii and include_prompts:
+            system_instructions = chat_span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
+            assert json.loads(system_instructions) == [
+                {"type": "text", "content": "System prompt"},
+                {"type": "text", "content": "Instruction 1\nInstruction 2"},
+            ]
+        else:
+            assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_span["data"]
 
 
 @pytest.mark.asyncio
@@ -1463,8 +2285,14 @@ async def test_model_settings_object_style(sentry_init, capture_items):
     assert transaction is not None
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
-async def test_usage_data_partial(sentry_init, capture_items):
+async def test_usage_data_partial(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     """
     Test that usage data is correctly handled when only some fields are present.
     """
@@ -1476,17 +2304,29 @@ async def test_usage_data_partial(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
 
-    items = capture_items("transaction", "span")
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        await agent.run("Test input")
 
-    await agent.run("Test input")
+        spans = [item.payload for item in items if item.type == "span"]
 
-    spans = [item.payload for item in items if item.type == "span"]
+        chat_spans = [
+            s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+        ]
+    else:
+        events = capture_events()
+
+        await agent.run("Test input")
+
+        (transaction,) = events
+        spans = transaction["spans"]
+
+        chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
 
-    chat_spans = [
-        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
-    ]
     assert len(chat_spans) >= 1
 
     # Check that usage data fields exist (they may or may not be set depending on TestModel)
@@ -1495,8 +2335,14 @@ async def test_usage_data_partial(sentry_init, capture_items):
     assert chat_span is not None
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
-async def test_agent_data_from_scope(sentry_init, capture_items):
+async def test_agent_data_from_scope(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     """
     Test that agent data can be retrieved from Sentry scope when not passed directly.
     """
@@ -1509,21 +2355,38 @@ async def test_agent_data_from_scope(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
 
-    items = capture_items("transaction", "span")
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        # The integration automatically sets agent in scope during execution
+        await agent.run("Test input")
+
+        # Verify agent name is capture
+        (transaction,) = (item.payload for item in items if item.type == "transaction")
+    else:
+        events = capture_events()
+
+        # The integration automatically sets agent in scope during execution
+        await agent.run("Test input")
 
-    # The integration automatically sets agent in scope during execution
-    await agent.run("Test input")
+        # Verify agent name is capture
+        (transaction,) = events
 
-    # Verify agent name is capture
-    (transaction,) = (item.payload for item in items if item.type == "transaction")
+        # Verify agent name is captured
     assert transaction["transaction"] == "invoke_agent test_scope_agent"
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
 async def test_available_tools_without_description(
-    sentry_init, capture_items, get_test_agent
+    sentry_init,
+    capture_events,
+    capture_items,
+    get_test_agent,
+    stream_gen_ai_spans,
 ):
     """
     Test that available tools are captured even when description is missing.
@@ -1531,6 +2394,7 @@ async def test_available_tools_without_description(
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
 
     test_agent = get_test_agent()
@@ -1540,24 +2404,46 @@ def tool_without_desc(x: int) -> int:
         # No docstring = no description
         return x * 2
 
-    items = capture_items("transaction", "span")
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
 
-    await test_agent.run("Use the tool with 5")
+        await test_agent.run("Use the tool with 5")
 
-    spans = [item.payload for item in items if item.type == "span"]
+        spans = [item.payload for item in items if item.type == "span"]
 
-    chat_spans = [
-        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
-    ]
-    if chat_spans:
-        chat_span = chat_spans[0]
-        if "gen_ai.request.available_tools" in chat_span["attributes"]:
-            tools_str = chat_span["attributes"]["gen_ai.request.available_tools"]
-            assert "tool_without_desc" in tools_str
+        chat_spans = [
+            s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+        ]
+        if chat_spans:
+            chat_span = chat_spans[0]
+            if "gen_ai.request.available_tools" in chat_span["attributes"]:
+                tools_str = chat_span["attributes"]["gen_ai.request.available_tools"]
+                assert "tool_without_desc" in tools_str
+    else:
+        events = capture_events()
+
+        await test_agent.run("Use the tool with 5")
 
+        (transaction,) = events
+        spans = transaction["spans"]
 
+        chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
+        if chat_spans:
+            chat_span = chat_spans[0]
+            if "gen_ai.request.available_tools" in chat_span["data"]:
+                tools_str = chat_span["data"]["gen_ai.request.available_tools"]
+                assert "tool_without_desc" in tools_str
+
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
-async def test_output_with_tool_calls(sentry_init, capture_items, get_test_agent):
+async def test_output_with_tool_calls(
+    sentry_init,
+    capture_events,
+    capture_items,
+    get_test_agent,
+    stream_gen_ai_spans,
+):
     """
     Test that tool calls in model response are captured correctly.
     """
@@ -1565,6 +2451,7 @@ async def test_output_with_tool_calls(sentry_init, capture_items, get_test_agent
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
 
     test_agent = get_test_agent()
@@ -1574,28 +2461,53 @@ def calc_tool(value: int) -> int:
         """Calculate something."""
         return value + 10
 
-    items = capture_items("transaction", "span")
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
 
-    await test_agent.run("Use calc_tool with 5")
+        await test_agent.run("Use calc_tool with 5")
 
-    spans = [item.payload for item in items if item.type == "span"]
+        spans = [item.payload for item in items if item.type == "span"]
 
-    chat_spans = [
-        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
-    ]
+        chat_spans = [
+            s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+        ]
 
-    # At least one chat span should exist
-    assert len(chat_spans) >= 1
+        # At least one chat span should exist
+        assert len(chat_spans) >= 1
+
+        # Check if tool calls are captured in response
+        for chat_span in chat_spans:
+            # Tool calls may or may not be in response depending on TestModel behavior
+            # Just verify the span was created and has basic data
+            assert "gen_ai.operation.name" in chat_span["attributes"]
+    else:
+        events = capture_events()
+
+        await test_agent.run("Use calc_tool with 5")
+
+        (transaction,) = events
+        spans = transaction["spans"]
+
+        chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
+
+        # At least one chat span should exist
+        assert len(chat_spans) >= 1
 
-    # Check if tool calls are captured in response
-    for chat_span in chat_spans:
-        # Tool calls may or may not be in response depending on TestModel behavior
-        # Just verify the span was created and has basic data
-        assert "gen_ai.operation.name" in chat_span["attributes"]
+        # Check if tool calls are captured in response
+        for chat_span in chat_spans:
+            # Tool calls may or may not be in response depending on TestModel behavior
+            # Just verify the span was created and has basic data
+            assert "gen_ai.operation.name" in chat_span["data"]
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
-async def test_message_formatting_with_different_parts(sentry_init, capture_items):
+async def test_message_formatting_with_different_parts(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     """
     Test that different message part types are handled correctly in ai_client span.
     """
@@ -1610,10 +2522,9 @@ async def test_message_formatting_with_different_parts(sentry_init, capture_item
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
 
-    items = capture_items("transaction", "span")
-
     # Create message history with different part types
     history = [
         messages.ModelRequest(parts=[messages.UserPromptPart(content="Hello")]),
@@ -1625,23 +2536,44 @@ async def test_message_formatting_with_different_parts(sentry_init, capture_item
         ),
     ]
 
-    await agent.run("What did I say?", message_history=history)
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
 
-    spans = [item.payload for item in items if item.type == "span"]
+        await agent.run("What did I say?", message_history=history)
 
-    chat_spans = [
-        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
-    ]
+        spans = [item.payload for item in items if item.type == "span"]
 
-    # Should have chat spans
-    assert len(chat_spans) >= 1
+        chat_spans = [
+            s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+        ]
 
-    # Check that messages are captured
-    chat_span = chat_spans[0]
-    if "gen_ai.request.messages" in chat_span["attributes"]:
-        messages_data = chat_span["attributes"]["gen_ai.request.messages"]
-        # Should contain message history
-        assert messages_data is not None
+        # Should have chat spans
+        assert len(chat_spans) >= 1
+
+        # Check that messages are captured
+        chat_span = chat_spans[0]
+        if "gen_ai.request.messages" in chat_span["attributes"]:
+            messages_data = chat_span["attributes"]["gen_ai.request.messages"]
+            assert messages_data is not None
+    else:
+        events = capture_events()
+
+        await agent.run("What did I say?", message_history=history)
+
+        (transaction,) = events
+        spans = transaction["spans"]
+
+        chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
+
+        # Should have chat spans
+        assert len(chat_spans) >= 1
+
+        # Check that messages are captured
+        chat_span = chat_spans[0]
+        if "gen_ai.request.messages" in chat_span["data"]:
+            messages_data = chat_span["data"]["gen_ai.request.messages"]
+            # Should contain message history
+            assert messages_data is not None
 
 
 @pytest.mark.asyncio
@@ -1699,8 +2631,14 @@ async def test_update_ai_client_span_with_none_response(sentry_init, capture_ite
     assert transaction is not None
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
-async def test_agent_without_name(sentry_init, capture_items):
+async def test_agent_without_name(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     """
     Test that agent without a name is handled correctly.
     """
@@ -1710,14 +2648,26 @@ async def test_agent_without_name(sentry_init, capture_items):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
 
-    items = capture_items("transaction", "span")
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        await agent.run("Test input")
+
+        # Should still create transaction, just with default name
+        (transaction,) = (item.payload for item in items if item.type == "transaction")
+    else:
+        events = capture_events()
+
+        await agent.run("Test input")
+
+        (transaction,) = events
 
-    await agent.run("Test input")
+        # Should still create transaction, just with default name
+        assert transaction["type"] == "transaction"
 
-    # Should still create transaction, just with default name
-    (transaction,) = (item.payload for item in items if item.type == "transaction")
     # Transaction name should be "invoke_agent agent" or similar default
     assert "invoke_agent" in transaction["transaction"]
 
@@ -1869,8 +2819,14 @@ async def test_set_usage_data_with_partial_fields(sentry_init, capture_items):
     assert transaction is not None
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
-async def test_message_parts_with_tool_return(sentry_init, capture_items):
+async def test_message_parts_with_tool_return(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     """
     Test that ToolReturnPart messages are handled correctly.
     """
@@ -1890,18 +2846,30 @@ def test_tool(x: int) -> int:
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
 
-    items = capture_items("transaction", "span")
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
 
-    # Run with history containing tool return
-    await agent.run("Use test_tool with 5")
+        # Run with history containing tool return
+        await agent.run("Use test_tool with 5")
 
-    spans = [item.payload for item in items if item.type == "span"]
+        spans = [item.payload for item in items if item.type == "span"]
 
-    chat_spans = [
-        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
-    ]
+        chat_spans = [
+            s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+        ]
+    else:
+        events = capture_events()
+
+        # Run with history containing tool return
+        await agent.run("Use test_tool with 5")
+
+        (transaction,) = events
+        spans = transaction["spans"]
+
+        chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
 
     # Should have chat spans
     assert len(chat_spans) >= 1
@@ -2791,63 +3759,116 @@ def _find_binary_content(messages_data, expected_modality, expected_mime_type):
     return False
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
-async def test_binary_content_encoding_image(sentry_init, capture_items):
+async def test_binary_content_encoding_image(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     """Test that BinaryContent with image data is properly encoded in messages."""
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
 
-    items = capture_items("transaction", "span")
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
 
-    with sentry_sdk.start_transaction(op="test", name="test"):
-        span = sentry_sdk.start_span(op="test_span")
-        binary_content = BinaryContent(
-            data=b"fake_image_data_12345", media_type="image/png"
-        )
-        user_part = UserPromptPart(content=["Look at this image:", binary_content])
-        mock_msg = MagicMock()
-        mock_msg.parts = [user_part]
-        mock_msg.instructions = None
+        with sentry_sdk.start_transaction(op="test", name="test"):
+            span = sentry_sdk.start_span(op="test_span")
+            binary_content = BinaryContent(
+                data=b"fake_image_data_12345", media_type="image/png"
+            )
+            user_part = UserPromptPart(content=["Look at this image:", binary_content])
+            mock_msg = MagicMock()
+            mock_msg.parts = [user_part]
+            mock_msg.instructions = None
 
-        _set_input_messages(span, [mock_msg])
-        span.finish()
+            _set_input_messages(span, [mock_msg])
+            span.finish()
 
-    (event,) = (item.payload for item in items if item.type == "transaction")
+        (event,) = (item.payload for item in items if item.type == "transaction")
+    else:
+        events = capture_events()
+
+        with sentry_sdk.start_transaction(op="test", name="test"):
+            span = sentry_sdk.start_span(op="test_span")
+            binary_content = BinaryContent(
+                data=b"fake_image_data_12345", media_type="image/png"
+            )
+            user_part = UserPromptPart(content=["Look at this image:", binary_content])
+            mock_msg = MagicMock()
+            mock_msg.parts = [user_part]
+            mock_msg.instructions = None
+
+            _set_input_messages(span, [mock_msg])
+            span.finish()
+
+        (event,) = events
     span_data = event["spans"][0]["data"]
     messages_data = _get_messages_from_span(span_data)
     assert _find_binary_content(messages_data, "image", "image/png")
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
-async def test_binary_content_encoding_mixed_content(sentry_init, capture_items):
+async def test_binary_content_encoding_mixed_content(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     """Test that BinaryContent mixed with text content is properly handled."""
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
 
-    items = capture_items("transaction", "span")
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
 
-    with sentry_sdk.start_transaction(op="test", name="test"):
-        span = sentry_sdk.start_span(op="test_span")
-        binary_content = BinaryContent(
-            data=b"fake_image_bytes", media_type="image/jpeg"
-        )
-        user_part = UserPromptPart(
-            content=["Here is an image:", binary_content, "What do you see?"]
-        )
-        mock_msg = MagicMock()
-        mock_msg.parts = [user_part]
-        mock_msg.instructions = None
+        with sentry_sdk.start_transaction(op="test", name="test"):
+            span = sentry_sdk.start_span(op="test_span")
+            binary_content = BinaryContent(
+                data=b"fake_image_bytes", media_type="image/jpeg"
+            )
+            user_part = UserPromptPart(
+                content=["Here is an image:", binary_content, "What do you see?"]
+            )
+            mock_msg = MagicMock()
+            mock_msg.parts = [user_part]
+            mock_msg.instructions = None
 
-        _set_input_messages(span, [mock_msg])
-        span.finish()
+            _set_input_messages(span, [mock_msg])
+            span.finish()
+
+        (event,) = (item.payload for item in items if item.type == "transaction")
+    else:
+        events = capture_events()
+
+        with sentry_sdk.start_transaction(op="test", name="test"):
+            span = sentry_sdk.start_span(op="test_span")
+            binary_content = BinaryContent(
+                data=b"fake_image_bytes", media_type="image/jpeg"
+            )
+            user_part = UserPromptPart(
+                content=["Here is an image:", binary_content, "What do you see?"]
+            )
+            mock_msg = MagicMock()
+            mock_msg.parts = [user_part]
+            mock_msg.instructions = None
+
+            _set_input_messages(span, [mock_msg])
+            span.finish()
+
+        (event,) = events
 
-    (event,) = (item.payload for item in items if item.type == "transaction")
     span_data = event["spans"][0]["data"]
     messages_data = _get_messages_from_span(span_data)
 
@@ -2862,8 +3883,14 @@ async def test_binary_content_encoding_mixed_content(sentry_init, capture_items)
     assert _find_binary_content(messages_data, "image", "image/jpeg")
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
-async def test_binary_content_in_agent_run(sentry_init, capture_items):
+async def test_binary_content_in_agent_run(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     """Test that BinaryContent in actual agent run is properly captured in spans."""
     agent = Agent("test", name="test_binary_agent")
 
@@ -2871,53 +3898,102 @@ async def test_binary_content_in_agent_run(sentry_init, capture_items):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
 
-    items = capture_items("transaction", "span")
     binary_content = BinaryContent(
         data=b"fake_image_data_for_testing", media_type="image/png"
     )
-    await agent.run(["Analyze this image:", binary_content])
 
-    spans = [item.payload for item in items if item.type == "span"]
-    chat_spans = [
-        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
-    ]
-    assert len(chat_spans) >= 1
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
 
-    chat_span = chat_spans[0]
-    if "gen_ai.request.messages" in chat_span["attributes"]:
-        messages_str = str(chat_span["attributes"]["gen_ai.request.messages"])
-        assert any(keyword in messages_str for keyword in ["blob", "image", "base64"])
+        await agent.run(["Analyze this image:", binary_content])
+
+        spans = [item.payload for item in items if item.type == "span"]
+        chat_spans = [
+            s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+        ]
+
+        assert len(chat_spans) >= 1
+
+        chat_span = chat_spans[0]
+        if "gen_ai.request.messages" in chat_span["attributes"]:
+            messages_str = str(chat_span["attributes"]["gen_ai.request.messages"])
+
+            assert any(
+                keyword in messages_str for keyword in ["blob", "image", "base64"]
+            )
+    else:
+        events = capture_events()
+
+        await agent.run(["Analyze this image:", binary_content])
+
+        (transaction,) = events
+        chat_spans = [s for s in transaction["spans"] if s["op"] == "gen_ai.chat"]
+
+        assert len(chat_spans) >= 1
+
+        chat_span = chat_spans[0]
+        if "gen_ai.request.messages" in chat_span["data"]:
+            messages_str = str(chat_span["data"]["gen_ai.request.messages"])
+            assert any(
+                keyword in messages_str for keyword in ["blob", "image", "base64"]
+            )
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
-async def test_set_usage_data_with_cache_tokens(sentry_init, capture_items):
+async def test_set_usage_data_with_cache_tokens(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     """Test that cache_read_tokens and cache_write_tokens are tracked."""
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
 
-    items = capture_items("transaction", "span")
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
 
-    with sentry_sdk.start_transaction(op="test", name="test"):
-        span = sentry_sdk.start_span(op="test_span")
-        usage = RequestUsage(
-            input_tokens=100,
-            output_tokens=50,
-            cache_read_tokens=80,
-            cache_write_tokens=20,
-        )
-        _set_usage_data(span, usage)
-        span.finish()
+        with sentry_sdk.start_transaction(op="test", name="test"):
+            span = sentry_sdk.start_span(op="test_span")
+            usage = RequestUsage(
+                input_tokens=100,
+                output_tokens=50,
+                cache_read_tokens=80,
+                cache_write_tokens=20,
+            )
+            _set_usage_data(span, usage)
+            span.finish()
+
+        (event,) = (item.payload for item in items if item.type == "transaction")
+    else:
+        events = capture_events()
+
+        with sentry_sdk.start_transaction(op="test", name="test"):
+            span = sentry_sdk.start_span(op="test_span")
+            usage = RequestUsage(
+                input_tokens=100,
+                output_tokens=50,
+                cache_read_tokens=80,
+                cache_write_tokens=20,
+            )
+            _set_usage_data(span, usage)
+            span.finish()
+
+        (event,) = events
 
-    (event,) = (item.payload for item in items if item.type == "transaction")
     (span_data,) = event["spans"]
     assert span_data["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80
     assert span_data["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.parametrize(
     "url,image_url_kwargs,expected_content",
     [
@@ -2960,7 +4036,13 @@ async def test_set_usage_data_with_cache_tokens(sentry_init, capture_items):
     ],
 )
 def test_image_url_base64_content_in_span(
-    sentry_init, capture_items, url, image_url_kwargs, expected_content
+    sentry_init,
+    capture_events,
+    capture_items,
+    url,
+    image_url_kwargs,
+    expected_content,
+    stream_gen_ai_spans,
 ):
     from sentry_sdk.integrations.pydantic_ai.spans.ai_client import ai_client_span
 
@@ -2968,39 +4050,67 @@ def test_image_url_base64_content_in_span(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
 
-    items = capture_items("transaction", "span")
+    found_image = False
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with sentry_sdk.start_transaction(op="test", name="test"):
+            image_url = ImageUrl(url=url, **image_url_kwargs)
+            user_part = UserPromptPart(content=["Look at this image:", image_url])
+            mock_msg = MagicMock()
+            mock_msg.parts = [user_part]
+            mock_msg.instructions = None
+
+            span = ai_client_span([mock_msg], None, None, None)
+            span.finish()
+
+        spans = [item.payload for item in items if item.type == "span"]
+        chat_spans = [
+            s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+        ]
+        assert len(chat_spans) >= 1
+        messages_data = _get_messages_from_span(chat_spans[0]["attributes"])
 
-    with sentry_sdk.start_transaction(op="test", name="test"):
-        image_url = ImageUrl(url=url, **image_url_kwargs)
-        user_part = UserPromptPart(content=["Look at this image:", image_url])
-        mock_msg = MagicMock()
-        mock_msg.parts = [user_part]
-        mock_msg.instructions = None
+        for msg in messages_data:
+            if "content" not in msg:
+                continue
+            for content_item in msg["content"]:
+                if content_item.get("type") == "image":
+                    found_image = True
+                    assert content_item["content"] == expected_content
+    else:
+        events = capture_events()
 
-        span = ai_client_span([mock_msg], None, None, None)
-        span.finish()
+        with sentry_sdk.start_transaction(op="test", name="test"):
+            image_url = ImageUrl(url=url, **image_url_kwargs)
+            user_part = UserPromptPart(content=["Look at this image:", image_url])
+            mock_msg = MagicMock()
+            mock_msg.parts = [user_part]
+            mock_msg.instructions = None
 
-    spans = [item.payload for item in items if item.type == "span"]
-    chat_spans = [
-        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
-    ]
-    assert len(chat_spans) >= 1
-    messages_data = _get_messages_from_span(chat_spans[0]["attributes"])
+            span = ai_client_span([mock_msg], None, None, None)
+            span.finish()
 
-    found_image = False
-    for msg in messages_data:
-        if "content" not in msg:
-            continue
-        for content_item in msg["content"]:
-            if content_item.get("type") == "image":
-                found_image = True
-                assert content_item["content"] == expected_content
+        (event,) = events
+        chat_spans = [s for s in event["spans"] if s["op"] == "gen_ai.chat"]
+        assert len(chat_spans) >= 1
+        messages_data = _get_messages_from_span(chat_spans[0]["data"])
+
+        for msg in messages_data:
+            if "content" not in msg:
+                continue
+            for content_item in msg["content"]:
+                if content_item.get("type") == "image":
+                    found_image = True
+                    assert content_item["content"] == expected_content
 
     assert found_image, "Image content item should be found in messages data"
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
 @pytest.mark.parametrize(
     "url, image_url_kwargs, expected_content",
@@ -3032,41 +4142,76 @@ def test_image_url_base64_content_in_span(
     ],
 )
 async def test_invoke_agent_image_url(
-    sentry_init, capture_items, url, image_url_kwargs, expected_content
+    sentry_init,
+    capture_events,
+    capture_items,
+    url,
+    image_url_kwargs,
+    expected_content,
+    stream_gen_ai_spans,
 ):
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
 
     agent = Agent("test", name="test_image_url_agent")
 
-    items = capture_items("transaction", "span")
     image_url = ImageUrl(url=url, **image_url_kwargs)
-    await agent.run([image_url, "Describe this image"])
 
-    found_image = False
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
 
-    spans = [item.payload for item in items if item.type == "span"]
-    chat_spans = [
-        s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
-    ]
-    for chat_span in chat_spans:
-        messages_data = _get_messages_from_span(chat_span["attributes"])
-        for msg in messages_data:
-            if "content" not in msg:
-                continue
-            for content_item in msg["content"]:
-                if content_item.get("type") == "image":
-                    assert content_item["content"] == expected_content
-                    found_image = True
+        await agent.run([image_url, "Describe this image"])
+
+        found_image = False
+
+        spans = [item.payload for item in items if item.type == "span"]
+        chat_spans = [
+            s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
+        ]
+        for chat_span in chat_spans:
+            messages_data = _get_messages_from_span(chat_span["attributes"])
+            for msg in messages_data:
+                if "content" not in msg:
+                    continue
+                for content_item in msg["content"]:
+                    if content_item.get("type") == "image":
+                        assert content_item["content"] == expected_content
+                        found_image = True
+    else:
+        events = capture_events()
+
+        await agent.run([image_url, "Describe this image"])
+
+        (transaction,) = events
+
+        found_image = False
+
+        chat_spans = [s for s in transaction["spans"] if s["op"] == "gen_ai.chat"]
+        for chat_span in chat_spans:
+            messages_data = _get_messages_from_span(chat_span["data"])
+            for msg in messages_data:
+                if "content" not in msg:
+                    continue
+                for content_item in msg["content"]:
+                    if content_item.get("type") == "image":
+                        assert content_item["content"] == expected_content
+                        found_image = True
 
     assert found_image, "Image content item should be found in messages data"
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
-async def test_tool_description_in_execute_tool_span(sentry_init, capture_items):
+async def test_tool_description_in_execute_tool_span(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     """
     Test that tool description from the tool's docstring is included in execute_tool spans.
     """
@@ -3085,26 +4230,51 @@ def multiply_numbers(a: int, b: int) -> int:
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
 
-    items = capture_items("transaction", "span")
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
 
-    result = await agent.run("What is 5 times 3?")
-    assert result is not None
+        result = await agent.run("What is 5 times 3?")
+        assert result is not None
 
-    spans = [item.payload for item in items if item.type == "span"]
+        spans = [item.payload for item in items if item.type == "span"]
 
-    tool_spans = [
-        s
-        for s in spans
-        if s["attributes"].get("sentry.op", "") == "gen_ai.execute_tool"
-    ]
-    assert len(tool_spans) >= 1
+        tool_spans = [
+            s
+            for s in spans
+            if s["attributes"].get("sentry.op", "") == "gen_ai.execute_tool"
+        ]
 
-    tool_span = tool_spans[0]
-    assert tool_span["attributes"]["gen_ai.tool.name"] == "multiply_numbers"
-    assert SPANDATA.GEN_AI_TOOL_DESCRIPTION in tool_span["attributes"]
-    assert (
-        "Multiply two numbers"
-        in tool_span["attributes"][SPANDATA.GEN_AI_TOOL_DESCRIPTION]
-    )
+        assert len(tool_spans) >= 1
+
+        tool_span = tool_spans[0]
+
+        assert tool_span["attributes"]["gen_ai.tool.name"] == "multiply_numbers"
+        assert SPANDATA.GEN_AI_TOOL_DESCRIPTION in tool_span["attributes"]
+        assert (
+            "Multiply two numbers"
+            in tool_span["attributes"][SPANDATA.GEN_AI_TOOL_DESCRIPTION]
+        )
+    else:
+        events = capture_events()
+
+        result = await agent.run("What is 5 times 3?")
+        assert result is not None
+
+        (transaction,) = events
+        spans = transaction["spans"]
+
+        tool_spans = [s for s in spans if s["op"] == "gen_ai.execute_tool"]
+
+        assert len(tool_spans) >= 1
+
+        tool_span = tool_spans[0]
+
+        assert tool_span["data"]["gen_ai.tool.name"] == "multiply_numbers"
+        assert SPANDATA.GEN_AI_TOOL_DESCRIPTION in tool_span["data"]
+        assert (
+            "Multiply two numbers"
+            in tool_span["data"][SPANDATA.GEN_AI_TOOL_DESCRIPTION]
+        )

From ab477839ff4c6eb5a2d39b8a364e678c27621b33 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Tue, 12 May 2026 14:15:12 +0200
Subject: [PATCH 46/84] cleanup anthropic

---
 .../integrations/anthropic/test_anthropic.py  | 262 ++++++++++--------
 1 file changed, 149 insertions(+), 113 deletions(-)

diff --git a/tests/integrations/anthropic/test_anthropic.py b/tests/integrations/anthropic/test_anthropic.py
index 1378f777df..2e240b9c8f 100644
--- a/tests/integrations/anthropic/test_anthropic.py
+++ b/tests/integrations/anthropic/test_anthropic.py
@@ -3125,24 +3125,14 @@ async def test_stream_message_with_input_json_delta_async(
         assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
         if send_default_pii and include_prompts:
-            if stream_gen_ai_spans:
-                assert (
-                    span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-                    == '[{"role": "user", "content": "What is the weather like in San Francisco?"}]'
-                )
-                assert (
-                    span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
-                    == '{"location": "San Francisco, CA"}'
-                )
-            else:
-                assert (
-                    span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-                    == '[{"role": "user", "content": "What is the weather like in San Francisco?"}]'
-                )
-                assert (
-                    span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
-                    == '{"location": "San Francisco, CA"}'
-                )
+            assert (
+                span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+                == '[{"role": "user", "content": "What is the weather like in San Francisco?"}]'
+            )
+            assert (
+                span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+                == '{"location": "San Francisco, CA"}'
+            )
 
         else:
             assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
@@ -3758,20 +3748,45 @@ async def test_anthropic_message_truncation_async(
 
     if stream_gen_ai_spans:
         items = capture_items("transaction", "span")
-    else:
-        events = capture_events()
 
-    with start_transaction():
-        await client.messages.create(max_tokens=1024, messages=messages, model="model")
+        with start_transaction():
+            await client.messages.create(
+                max_tokens=1024, messages=messages, model="model"
+            )
 
-    if stream_gen_ai_spans:
         spans = [item.payload for item in items if item.type == "span"]
         chat_spans = [
             span
             for span in spans
             if span["attributes"].get("sentry.op") == OP.GEN_AI_CHAT
         ]
+
+        assert len(chat_spans) > 0
+
+        chat_span = chat_spans[0]
+
+        assert chat_span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+        assert chat_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["attributes"]
+
+        messages_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+
+        assert isinstance(messages_data, str)
+
+        parsed_messages = json.loads(messages_data)
+        assert isinstance(parsed_messages, list)
+        assert len(parsed_messages) == 1
+        assert "small message 5" in str(parsed_messages[0])
+
+        tx = next(item.payload for item in items if item.type == "transaction")
     else:
+        events = capture_events()
+
+        with start_transaction():
+            await client.messages.create(
+                max_tokens=1024, messages=messages, model="model"
+            )
+
         assert len(events) > 0
         tx = events[0]
         assert tx["type"] == "transaction"
@@ -3779,32 +3794,23 @@ async def test_anthropic_message_truncation_async(
         chat_spans = [
             span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT
         ]
-    assert len(chat_spans) > 0
 
-    chat_span = chat_spans[0]
-    if stream_gen_ai_spans:
-        assert chat_span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-        assert chat_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["attributes"]
+        assert len(chat_spans) > 0
+
+        chat_span = chat_spans[0]
 
-        messages_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-    else:
         assert chat_span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
         assert chat_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
         assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"]
 
         messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-    assert isinstance(messages_data, str)
+        assert isinstance(messages_data, str)
 
-    parsed_messages = json.loads(messages_data)
-    assert isinstance(parsed_messages, list)
-    assert len(parsed_messages) == 1
-    assert "small message 5" in str(parsed_messages[0])
+        parsed_messages = json.loads(messages_data)
+        assert isinstance(parsed_messages, list)
+        assert len(parsed_messages) == 1
+        assert "small message 5" in str(parsed_messages[0])
 
-    if stream_gen_ai_spans:
-        tx = next(item.payload for item in items if item.type == "transaction")
-    else:
-        pass
     assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5
 
 
@@ -3846,24 +3852,21 @@ def test_nonstreaming_create_message_with_system_prompt(
 
     if stream_gen_ai_spans:
         items = capture_items("transaction", "span")
-    else:
-        events = capture_events()
 
-    with start_transaction(name="anthropic"):
-        response = client.messages.create(
-            max_tokens=1024,
-            messages=messages,
-            model="model",
-            system="You are a helpful assistant.",
-        )
+        with start_transaction(name="anthropic"):
+            response = client.messages.create(
+                max_tokens=1024,
+                messages=messages,
+                model="model",
+                system="You are a helpful assistant.",
+            )
 
-    assert response == EXAMPLE_MESSAGE
-    usage = response.usage
+        assert response == EXAMPLE_MESSAGE
+        usage = response.usage
 
-    assert usage.input_tokens == 10
-    assert usage.output_tokens == 20
+        assert usage.input_tokens == 10
+        assert usage.output_tokens == 20
 
-    if stream_gen_ai_spans:
         (event,) = (item.payload for item in items if item.type == "transaction")
         assert event["transaction"] == "anthropic"
 
@@ -3909,6 +3912,22 @@ def test_nonstreaming_create_message_with_system_prompt(
             "end_turn"
         ]
     else:
+        events = capture_events()
+
+        with start_transaction(name="anthropic"):
+            response = client.messages.create(
+                max_tokens=1024,
+                messages=messages,
+                model="model",
+                system="You are a helpful assistant.",
+            )
+
+        assert response == EXAMPLE_MESSAGE
+        usage = response.usage
+
+        assert usage.input_tokens == 10
+        assert usage.output_tokens == 20
+
         assert len(events) == 1
         (event,) = events
 
@@ -3933,16 +3952,8 @@ def test_nonstreaming_create_message_with_system_prompt(
                 {"type": "text", "content": "You are a helpful assistant."}
             ]
 
-            if stream_gen_ai_spans:
-                assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]
-                stored_messages = json.loads(
-                    span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-                )
-            else:
-                assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]
-                stored_messages = json.loads(
-                    span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-                )
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]
+            stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
             assert len(stored_messages) == 1
             assert stored_messages[0]["role"] == "user"
             assert stored_messages[0]["content"] == "Hello, Claude"
@@ -3998,24 +4009,21 @@ async def test_nonstreaming_create_message_with_system_prompt_async(
 
     if stream_gen_ai_spans:
         items = capture_items("transaction", "span")
-    else:
-        events = capture_events()
 
-    with start_transaction(name="anthropic"):
-        response = await client.messages.create(
-            max_tokens=1024,
-            messages=messages,
-            model="model",
-            system="You are a helpful assistant.",
-        )
+        with start_transaction(name="anthropic"):
+            response = await client.messages.create(
+                max_tokens=1024,
+                messages=messages,
+                model="model",
+                system="You are a helpful assistant.",
+            )
 
-    assert response == EXAMPLE_MESSAGE
-    usage = response.usage
+        assert response == EXAMPLE_MESSAGE
+        usage = response.usage
 
-    assert usage.input_tokens == 10
-    assert usage.output_tokens == 20
+        assert usage.input_tokens == 10
+        assert usage.output_tokens == 20
 
-    if stream_gen_ai_spans:
         (event,) = (item.payload for item in items if item.type == "transaction")
         assert event["transaction"] == "anthropic"
 
@@ -4061,6 +4069,22 @@ async def test_nonstreaming_create_message_with_system_prompt_async(
             "end_turn"
         ]
     else:
+        events = capture_events()
+
+        with start_transaction(name="anthropic"):
+            response = await client.messages.create(
+                max_tokens=1024,
+                messages=messages,
+                model="model",
+                system="You are a helpful assistant.",
+            )
+
+        assert response == EXAMPLE_MESSAGE
+        usage = response.usage
+
+        assert usage.input_tokens == 10
+        assert usage.output_tokens == 20
+
         assert len(events) == 1
         (event,) = events
 
@@ -4566,24 +4590,21 @@ async def test_stream_message_with_system_prompt_async(
 
     if stream_gen_ai_spans:
         items = capture_items("transaction", "span")
-    else:
-        events = capture_events()
 
-    with mock.patch.object(
-        client._client,
-        "send",
-        return_value=response,
-    ) as _, start_transaction(name="anthropic"):
-        async with client.messages.stream(
-            max_tokens=1024,
-            messages=messages,
-            model="model",
-            system="You are a helpful assistant.",
-        ) as stream:
-            async for event in stream:
-                pass
+        with mock.patch.object(
+            client._client,
+            "send",
+            return_value=response,
+        ) as _, start_transaction(name="anthropic"):
+            async with client.messages.stream(
+                max_tokens=1024,
+                messages=messages,
+                model="model",
+                system="You are a helpful assistant.",
+            ) as stream:
+                async for event in stream:
+                    pass
 
-    if stream_gen_ai_spans:
         (event,) = (item.payload for item in items if item.type == "transaction")
         assert event["transaction"] == "anthropic"
 
@@ -4626,6 +4647,22 @@ async def test_stream_message_with_system_prompt_async(
         assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20
         assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
     else:
+        events = capture_events()
+
+        with mock.patch.object(
+            client._client,
+            "send",
+            return_value=response,
+        ) as _, start_transaction(name="anthropic"):
+            async with client.messages.stream(
+                max_tokens=1024,
+                messages=messages,
+                model="model",
+                system="You are a helpful assistant.",
+            ) as stream:
+                async for event in stream:
+                    pass
+
         assert len(events) == 1
         (event,) = events
 
@@ -4764,26 +4801,7 @@ async def test_streaming_create_message_with_system_prompt_async(
 
             async for _ in message:
                 pass
-    else:
-        events = capture_events()
 
-        with mock.patch.object(
-            client._client,
-            "send",
-            return_value=response,
-        ) as _, start_transaction(name="anthropic"):
-            message = await client.messages.create(
-                max_tokens=1024,
-                messages=messages,
-                model="model",
-                stream=True,
-                system="You are a helpful assistant.",
-            )
-
-            async for _ in message:
-                pass
-
-    if stream_gen_ai_spans:
         (event,) = (item.payload for item in items if item.type == "transaction")
         assert event["transaction"] == "anthropic"
 
@@ -4828,6 +4846,24 @@ async def test_streaming_create_message_with_system_prompt_async(
         assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20
         assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
     else:
+        events = capture_events()
+
+        with mock.patch.object(
+            client._client,
+            "send",
+            return_value=response,
+        ) as _, start_transaction(name="anthropic"):
+            message = await client.messages.create(
+                max_tokens=1024,
+                messages=messages,
+                model="model",
+                stream=True,
+                system="You are a helpful assistant.",
+            )
+
+            async for _ in message:
+                pass
+
         assert len(events) == 1
         (event,) = events
 

From 75f4d3aec9a9353135a5d3564b2503af518a6cc7 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Tue, 12 May 2026 14:23:32 +0200
Subject: [PATCH 47/84] cleanup google-genai

---
 .../google_genai/test_google_genai.py         | 395 ++++++++++--------
 1 file changed, 219 insertions(+), 176 deletions(-)

diff --git a/tests/integrations/google_genai/test_google_genai.py b/tests/integrations/google_genai/test_google_genai.py
index 3cc4b42bb2..8da5e7ca22 100644
--- a/tests/integrations/google_genai/test_google_genai.py
+++ b/tests/integrations/google_genai/test_google_genai.py
@@ -222,10 +222,7 @@ def test_nonstreaming_generate_content(
 
         if send_default_pii and include_prompts:
             # Response text is stored as a JSON array
-            if stream_gen_ai_spans:
-                response_text = chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
-            else:
-                response_text = chat_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+            response_text = chat_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
             # Parse the JSON array
             response_texts = json.loads(response_text)
             assert response_texts == ["Hello! How can I help you today?"]
@@ -415,40 +412,52 @@ def get_weather(location: str) -> str:
 
     if stream_gen_ai_spans:
         items = capture_items("span")
-    else:
-        events = capture_events()
 
-    with mock.patch.object(
-        mock_genai_client._api_client, "request", return_value=mock_http_response
-    ), start_transaction(name="google_genai"):
-        config = create_test_config(tools=[get_weather, mock_tool])
-        mock_genai_client.models.generate_content(
-            model="gemini-1.5-flash", contents="What's the weather?", config=config
-        )
+        with mock.patch.object(
+            mock_genai_client._api_client, "request", return_value=mock_http_response
+        ), start_transaction(name="google_genai"):
+            config = create_test_config(tools=[get_weather, mock_tool])
+            mock_genai_client.models.generate_content(
+                model="gemini-1.5-flash", contents="What's the weather?", config=config
+            )
 
-    if stream_gen_ai_spans:
         invoke_span = next(item.payload for item in items if item.type == "span")
 
         # Check that tools are recorded (data is serialized as a string)
         tools_data_str = invoke_span["attributes"][
             SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS
         ]
+
+        # Parse the JSON string to verify content
+        tools_data = json.loads(tools_data_str)
+        assert len(tools_data) == 2
+
+        # The order of tools may not be guaranteed, so sort by name and description for comparison
+        sorted_tools = sorted(
+            tools_data, key=lambda t: (t.get("name", ""), t.get("name", ""))
+        )
     else:
+        events = capture_events()
+
+        with mock.patch.object(
+            mock_genai_client._api_client, "request", return_value=mock_http_response
+        ), start_transaction(name="google_genai"):
+            config = create_test_config(tools=[get_weather, mock_tool])
+            mock_genai_client.models.generate_content(
+                model="gemini-1.5-flash", contents="What's the weather?", config=config
+            )
+
         (event,) = events
         invoke_span = event["spans"][0]
 
         # Check that tools are recorded (data is serialized as a string)
         tools_data_str = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS]
-    # Parse the JSON string to verify content
-    tools_data = json.loads(tools_data_str)
-    assert len(tools_data) == 2
 
-    # The order of tools may not be guaranteed, so sort by name and description for comparison
-    if stream_gen_ai_spans:
-        sorted_tools = sorted(
-            tools_data, key=lambda t: (t.get("name", ""), t.get("name", ""))
-        )
-    else:
+        # Parse the JSON string to verify content
+        tools_data = json.loads(tools_data_str)
+        assert len(tools_data) == 2
+
+        # The order of tools may not be guaranteed, so sort by name and description for comparison
         sorted_tools = sorted(
             tools_data, key=lambda t: (t.get("name", ""), t.get("description", ""))
         )
@@ -488,16 +497,13 @@ def get_weather(location: str) -> str:
 
     if stream_gen_ai_spans:
         items = capture_items("span")
-    else:
-        events = capture_events()
 
-    # Execute the wrapped tool
-    with start_transaction(name="test_tool"):
-        result = wrapped_weather("San Francisco")
+        # Execute the wrapped tool
+        with start_transaction(name="test_tool"):
+            result = wrapped_weather("San Francisco")
 
-    assert result == "The weather in San Francisco is sunny"
+        assert result == "The weather in San Francisco is sunny"
 
-    if stream_gen_ai_spans:
         spans = [item.payload for item in items if item.type == "span"]
         assert len(spans) == 1
         tool_span = next(item.payload for item in items if item.type == "span")
@@ -510,6 +516,14 @@ def get_weather(location: str) -> str:
             == "Get the weather for a location"
         )
     else:
+        events = capture_events()
+
+        # Execute the wrapped tool
+        with start_transaction(name="test_tool"):
+            result = wrapped_weather("San Francisco")
+
+        assert result == "The weather in San Francisco is sunny"
+
         (event,) = events
         assert len(event["spans"]) == 1
         tool_span = event["spans"][0]
@@ -538,24 +552,35 @@ def test_error_handling(
     )
     if stream_gen_ai_spans:
         items = capture_items("event", "transaction")
-    else:
-        events = capture_events()
 
-    # Mock an error at the HTTP level
-    with mock.patch.object(
-        mock_genai_client._api_client, "request", side_effect=Exception("API Error")
-    ), start_transaction(name="google_genai"), pytest.raises(
-        Exception, match="API Error"
-    ):
-        mock_genai_client.models.generate_content(
-            model="gemini-1.5-flash",
-            contents="This will fail",
-            config=create_test_config(),
-        )
+        # Mock an error at the HTTP level
+        with mock.patch.object(
+            mock_genai_client._api_client, "request", side_effect=Exception("API Error")
+        ), start_transaction(name="google_genai"), pytest.raises(
+            Exception, match="API Error"
+        ):
+            mock_genai_client.models.generate_content(
+                model="gemini-1.5-flash",
+                contents="This will fail",
+                config=create_test_config(),
+            )
 
-    if stream_gen_ai_spans:
         (error_event,) = (item.payload for item in items if item.type == "event")
     else:
+        events = capture_events()
+
+        # Mock an error at the HTTP level
+        with mock.patch.object(
+            mock_genai_client._api_client, "request", side_effect=Exception("API Error")
+        ), start_transaction(name="google_genai"), pytest.raises(
+            Exception, match="API Error"
+        ):
+            mock_genai_client.models.generate_content(
+                model="gemini-1.5-flash",
+                contents="This will fail",
+                config=create_test_config(),
+            )
+
         # Should have both transaction and error events
         assert len(events) == 2
         error_event, transaction_event = events
@@ -646,58 +671,48 @@ def test_streaming_generate_content(
 
     if stream_gen_ai_spans:
         items = capture_items("span")
-    else:
-        events = capture_events()
 
-    with mock.patch.object(
-        mock_genai_client._api_client, "request_streamed", return_value=mock_stream
-    ), start_transaction(name="google_genai"):
-        config = create_test_config()
-        stream = mock_genai_client.models.generate_content_stream(
-            model="gemini-1.5-flash", contents="Stream me a response", config=config
-        )
+        with mock.patch.object(
+            mock_genai_client._api_client, "request_streamed", return_value=mock_stream
+        ), start_transaction(name="google_genai"):
+            config = create_test_config()
+            stream = mock_genai_client.models.generate_content_stream(
+                model="gemini-1.5-flash", contents="Stream me a response", config=config
+            )
 
-        # Consume the stream (this is what users do with the integration wrapper)
-        collected_chunks = list(stream)
+            # Consume the stream (this is what users do with the integration wrapper)
+            collected_chunks = list(stream)
 
-    # Verify we got all chunks
-    assert len(collected_chunks) == 3
-    assert collected_chunks[0].candidates[0].content.parts[0].text == "Hello! "
-    assert collected_chunks[1].candidates[0].content.parts[0].text == "How can I "
-    assert collected_chunks[2].candidates[0].content.parts[0].text == "help you today?"
+        # Verify we got all chunks
+        assert len(collected_chunks) == 3
+        assert collected_chunks[0].candidates[0].content.parts[0].text == "Hello! "
+        assert collected_chunks[1].candidates[0].content.parts[0].text == "How can I "
+        assert (
+            collected_chunks[2].candidates[0].content.parts[0].text == "help you today?"
+        )
 
-    if stream_gen_ai_spans:
         spans = [item.payload for item in items if item.type == "span"]
         assert len(spans) == 1
         chat_span = next(item.payload for item in items if item.type == "span")
 
         # Check that streaming flag is set on both spans
         assert chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
-    else:
-        (event,) = events
-
-        assert len(event["spans"]) == 1
-        chat_span = event["spans"][0]
-
-        # Check that streaming flag is set on both spans
-        assert chat_span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
 
-    # Verify accumulated response text (all chunks combined)
-    expected_full_text = "Hello! How can I help you today?"
-    # Response text is stored as a JSON string
-    if stream_gen_ai_spans:
-        chat_response_text = json.loads(
-            chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
-        )
-    else:
-        chat_response_text = json.loads(
-            chat_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
-        )
-    assert chat_response_text == [expected_full_text]
+        # Verify accumulated response text (all chunks combined)
+        expected_full_text = "Hello! How can I help you today?"
+        # Response text is stored as a JSON string
+        if stream_gen_ai_spans:
+            chat_response_text = json.loads(
+                chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+            )
+        else:
+            chat_response_text = json.loads(
+                chat_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+            )
+        assert chat_response_text == [expected_full_text]
 
-    # Verify finish reasons (only the final chunk has a finish reason)
-    # When there's a single finish reason, it's stored as a plain string (not JSON)
-    if stream_gen_ai_spans:
+        # Verify finish reasons (only the final chunk has a finish reason)
+        # When there's a single finish reason, it's stored as a plain string (not JSON)
         assert SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS in chat_span["attributes"]
         assert (
             chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == "STOP"
@@ -715,6 +730,50 @@ def test_streaming_generate_content(
             chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "gemini-1.5-flash"
         )
     else:
+        events = capture_events()
+
+        with mock.patch.object(
+            mock_genai_client._api_client, "request_streamed", return_value=mock_stream
+        ), start_transaction(name="google_genai"):
+            config = create_test_config()
+            stream = mock_genai_client.models.generate_content_stream(
+                model="gemini-1.5-flash", contents="Stream me a response", config=config
+            )
+
+            # Consume the stream (this is what users do with the integration wrapper)
+            collected_chunks = list(stream)
+
+        # Verify we got all chunks
+        assert len(collected_chunks) == 3
+        assert collected_chunks[0].candidates[0].content.parts[0].text == "Hello! "
+        assert collected_chunks[1].candidates[0].content.parts[0].text == "How can I "
+        assert (
+            collected_chunks[2].candidates[0].content.parts[0].text == "help you today?"
+        )
+
+        (event,) = events
+
+        assert len(event["spans"]) == 1
+        chat_span = event["spans"][0]
+
+        # Check that streaming flag is set on both spans
+        assert chat_span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+
+        # Verify accumulated response text (all chunks combined)
+        expected_full_text = "Hello! How can I help you today?"
+        # Response text is stored as a JSON string
+        if stream_gen_ai_spans:
+            chat_response_text = json.loads(
+                chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+            )
+        else:
+            chat_response_text = json.loads(
+                chat_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+            )
+        assert chat_response_text == [expected_full_text]
+
+        # Verify finish reasons (only the final chunk has a finish reason)
+        # When there's a single finish reason, it's stored as a plain string (not JSON)
         assert SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS in chat_span["data"]
         assert chat_span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == "STOP"
         assert chat_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
@@ -745,18 +804,15 @@ def test_span_origin(
 
     if stream_gen_ai_spans:
         items = capture_items("span", "transaction")
-    else:
-        events = capture_events()
 
-    with mock.patch.object(
-        mock_genai_client._api_client, "request", return_value=mock_http_response
-    ), start_transaction(name="google_genai"):
-        config = create_test_config()
-        mock_genai_client.models.generate_content(
-            model="gemini-1.5-flash", contents="Test origin", config=config
-        )
+        with mock.patch.object(
+            mock_genai_client._api_client, "request", return_value=mock_http_response
+        ), start_transaction(name="google_genai"):
+            config = create_test_config()
+            mock_genai_client.models.generate_content(
+                model="gemini-1.5-flash", contents="Test origin", config=config
+            )
 
-    if stream_gen_ai_spans:
         (event,) = (item.payload for item in items if item.type == "transaction")
         assert event["contexts"]["trace"]["origin"] == "manual"
 
@@ -764,6 +820,16 @@ def test_span_origin(
         for span in spans:
             assert span["attributes"]["sentry.origin"] == "auto.ai.google_genai"
     else:
+        events = capture_events()
+
+        with mock.patch.object(
+            mock_genai_client._api_client, "request", return_value=mock_http_response
+        ), start_transaction(name="google_genai"):
+            config = create_test_config()
+            mock_genai_client.models.generate_content(
+                model="gemini-1.5-flash", contents="Test origin", config=config
+            )
+
         (event,) = events
 
         assert event["contexts"]["trace"]["origin"] == "manual"
@@ -893,6 +959,23 @@ def test_multiple_candidates(
             )
 
         chat_span = next(item.payload for item in items if item.type == "span")
+
+        # Should capture all responses
+        # Response text is stored as a JSON string when there are multiple responses
+        response_text = chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+
+        if isinstance(response_text, str) and response_text.startswith("["):
+            # It's a JSON array
+            response_list = json.loads(response_text)
+            assert response_list == ["Response 1", "Response 2"]
+        else:
+            # It's concatenated
+            assert response_text == "Response 1\nResponse 2"
+
+        # Finish reasons are serialized as JSON
+        finish_reasons = json.loads(
+            chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS]
+        )
     else:
         events = capture_events()
 
@@ -907,26 +990,19 @@ def test_multiple_candidates(
         (event,) = events
         chat_span = event["spans"][0]
 
-    # Should capture all responses
-    # Response text is stored as a JSON string when there are multiple responses
-    if stream_gen_ai_spans:
-        response_text = chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
-    else:
+        # Should capture all responses
+        # Response text is stored as a JSON string when there are multiple responses
         response_text = chat_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
-    if isinstance(response_text, str) and response_text.startswith("["):
-        # It's a JSON array
-        response_list = json.loads(response_text)
-        assert response_list == ["Response 1", "Response 2"]
-    else:
-        # It's concatenated
-        assert response_text == "Response 1\nResponse 2"
 
-    # Finish reasons are serialized as JSON
-    if stream_gen_ai_spans:
-        finish_reasons = json.loads(
-            chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS]
-        )
-    else:
+        if isinstance(response_text, str) and response_text.startswith("["):
+            # It's a JSON array
+            response_list = json.loads(response_text)
+            assert response_list == ["Response 1", "Response 2"]
+        else:
+            # It's concatenated
+            assert response_text == "Response 1\nResponse 2"
+
+        # Finish reasons are serialized as JSON
         finish_reasons = json.loads(
             chat_span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS]
         )
@@ -1482,34 +1558,20 @@ def test_embed_content(
 
         # Check input texts if PII is allowed
         if send_default_pii and include_prompts:
-            if stream_gen_ai_spans:
-                input_texts = json.loads(
-                    embed_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
-                )
-            else:
-                input_texts = json.loads(
-                    embed_span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
-                )
+            input_texts = json.loads(
+                embed_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
+            )
             assert input_texts == [
                 "What is your name?",
                 "What is your favorite color?",
             ]
         else:
-            if stream_gen_ai_spans:
-                assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embed_span["attributes"]
-            else:
-                assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embed_span["data"]
+            assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embed_span["attributes"]
 
         # Check usage data (sum of token counts from statistics: 10 + 15 = 25)
         # Note: Only available in newer versions with ContentEmbeddingStatistics
-        if stream_gen_ai_spans:
-            if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["attributes"]:
-                assert (
-                    embed_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 25
-                )
-        else:
-            if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["data"]:
-                assert embed_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 25
+        if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["attributes"]:
+            assert embed_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 25
     else:
         events = capture_events()
 
@@ -1546,34 +1608,20 @@ def test_embed_content(
 
         # Check input texts if PII is allowed
         if send_default_pii and include_prompts:
-            if stream_gen_ai_spans:
-                input_texts = json.loads(
-                    embed_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
-                )
-            else:
-                input_texts = json.loads(
-                    embed_span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
-                )
+            input_texts = json.loads(
+                embed_span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
+            )
             assert input_texts == [
                 "What is your name?",
                 "What is your favorite color?",
             ]
         else:
-            if stream_gen_ai_spans:
-                assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embed_span["attributes"]
-            else:
-                assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embed_span["data"]
+            assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embed_span["data"]
 
         # Check usage data (sum of token counts from statistics: 10 + 15 = 25)
         # Note: Only available in newer versions with ContentEmbeddingStatistics
-        if stream_gen_ai_spans:
-            if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["attributes"]:
-                assert (
-                    embed_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 25
-                )
-        else:
-            if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["data"]:
-                assert embed_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 25
+        if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["data"]:
+            assert embed_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 25
 
 
 @pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
@@ -1631,12 +1679,8 @@ def test_embed_content_string_input(
         assert input_texts == ["Single text input"]
         # Should use token_count from statistics (5), not billable_character_count (10)
         # Note: Only available in newer versions with ContentEmbeddingStatistics
-        if stream_gen_ai_spans:
-            if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["attributes"]:
-                assert embed_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5
-        else:
-            if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["data"]:
-                assert embed_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5
+        if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["attributes"]:
+            assert embed_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5
     else:
         events = capture_events()
 
@@ -1657,12 +1701,8 @@ def test_embed_content_string_input(
         assert input_texts == ["Single text input"]
         # Should use token_count from statistics (5), not billable_character_count (10)
         # Note: Only available in newer versions with ContentEmbeddingStatistics
-        if stream_gen_ai_spans:
-            if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["attributes"]:
-                assert embed_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5
-        else:
-            if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["data"]:
-                assert embed_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5
+        if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["data"]:
+            assert embed_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5
 
 
 @pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
@@ -2024,6 +2064,12 @@ async def test_async_embed_content_string_input(
         input_texts = json.loads(
             embed_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
         )
+
+        assert input_texts == ["Single text input"]
+        # Should use token_count from statistics (5), not billable_character_count (10)
+        # Note: Only available in newer versions with ContentEmbeddingStatistics
+        if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["attributes"]:
+            assert embed_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5
     else:
         events = capture_events()
 
@@ -2043,13 +2089,9 @@ async def test_async_embed_content_string_input(
         # Check that single string is handled correctly
         input_texts = json.loads(embed_span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT])
 
-    assert input_texts == ["Single text input"]
-    # Should use token_count from statistics (5), not billable_character_count (10)
-    # Note: Only available in newer versions with ContentEmbeddingStatistics
-    if stream_gen_ai_spans:
-        if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["attributes"]:
-            assert embed_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5
-    else:
+        assert input_texts == ["Single text input"]
+        # Should use token_count from statistics (5), not billable_character_count (10)
+        # Note: Only available in newer versions with ContentEmbeddingStatistics
         if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["data"]:
             assert embed_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5
 
@@ -2158,6 +2200,10 @@ async def test_async_embed_content_without_statistics(
 
         spans = [item.payload for item in items if item.type == "span"]
         (embed_span,) = spans
+
+        # No usage tokens since there are no statistics in older versions
+        # This is expected and the integration should handle it gracefully
+        assert SPANDATA.GEN_AI_USAGE_INPUT_TOKENS not in embed_span["attributes"]
     else:
         events = capture_events()
 
@@ -2174,11 +2220,8 @@ async def test_async_embed_content_without_statistics(
         (event,) = events
         (embed_span,) = event["spans"]
 
-    # No usage tokens since there are no statistics in older versions
-    # This is expected and the integration should handle it gracefully
-    if stream_gen_ai_spans:
-        assert SPANDATA.GEN_AI_USAGE_INPUT_TOKENS not in embed_span["attributes"]
-    else:
+        # No usage tokens since there are no statistics in older versions
+        # This is expected and the integration should handle it gracefully
         assert SPANDATA.GEN_AI_USAGE_INPUT_TOKENS not in embed_span["data"]
 
 

From 8ba3d94dcbb050d5011a1d9b4b6173324ecde48b Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Tue, 12 May 2026 14:28:21 +0200
Subject: [PATCH 48/84] cleanup huggingface-hub

---
 .../huggingface_hub/test_huggingface_hub.py   | 159 ++++++++----------
 1 file changed, 72 insertions(+), 87 deletions(-)

diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py
index d691a58c31..2d94082e7b 100644
--- a/tests/integrations/huggingface_hub/test_huggingface_hub.py
+++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py
@@ -651,34 +651,23 @@ def test_text_generation_streaming(
         assert span["name"] == "text_completion test-model"
         assert span["attributes"]["sentry.origin"] == "auto.ai.huggingface_hub"
 
-        if stream_gen_ai_spans:
-            expected_data = {
-                "gen_ai.operation.name": "text_completion",
-                "gen_ai.request.model": "test-model",
-                "gen_ai.response.finish_reasons": "length",
-                "gen_ai.response.streaming": True,
-                "gen_ai.usage.total_tokens": 10,
-                "sentry.environment": "production",
-                "sentry.op": "gen_ai.text_completion",
-                "sentry.origin": "auto.ai.huggingface_hub",
-                "sentry.release": mock.ANY,
-                "sentry.sdk.name": "sentry.python",
-                "sentry.sdk.version": mock.ANY,
-                "sentry.segment.id": mock.ANY,
-                "sentry.segment.name": "test",
-                "thread.id": mock.ANY,
-                "thread.name": mock.ANY,
-            }
-        else:
-            expected_data = {
-                "gen_ai.operation.name": "text_completion",
-                "gen_ai.request.model": "test-model",
-                "gen_ai.response.finish_reasons": "length",
-                "gen_ai.response.streaming": True,
-                "gen_ai.usage.total_tokens": 10,
-                "thread.id": mock.ANY,
-                "thread.name": mock.ANY,
-            }
+        expected_data = {
+            "gen_ai.operation.name": "text_completion",
+            "gen_ai.request.model": "test-model",
+            "gen_ai.response.finish_reasons": "length",
+            "gen_ai.response.streaming": True,
+            "gen_ai.usage.total_tokens": 10,
+            "sentry.environment": "production",
+            "sentry.op": "gen_ai.text_completion",
+            "sentry.origin": "auto.ai.huggingface_hub",
+            "sentry.release": mock.ANY,
+            "sentry.sdk.name": "sentry.python",
+            "sentry.sdk.version": mock.ANY,
+            "sentry.segment.id": mock.ANY,
+            "sentry.segment.name": "test",
+            "thread.id": mock.ANY,
+            "thread.name": mock.ANY,
+        }
 
         if send_default_pii and include_prompts:
             expected_data["gen_ai.request.messages"] = "Hello"
@@ -721,34 +710,15 @@ def test_text_generation_streaming(
         assert span["description"] == "text_completion test-model"
         assert span["origin"] == "auto.ai.huggingface_hub"
 
-        if stream_gen_ai_spans:
-            expected_data = {
-                "gen_ai.operation.name": "text_completion",
-                "gen_ai.request.model": "test-model",
-                "gen_ai.response.finish_reasons": "length",
-                "gen_ai.response.streaming": True,
-                "gen_ai.usage.total_tokens": 10,
-                "sentry.environment": "production",
-                "sentry.op": "gen_ai.text_completion",
-                "sentry.origin": "auto.ai.huggingface_hub",
-                "sentry.release": mock.ANY,
-                "sentry.sdk.name": "sentry.python",
-                "sentry.sdk.version": mock.ANY,
-                "sentry.segment.id": mock.ANY,
-                "sentry.segment.name": "test",
-                "thread.id": mock.ANY,
-                "thread.name": mock.ANY,
-            }
-        else:
-            expected_data = {
-                "gen_ai.operation.name": "text_completion",
-                "gen_ai.request.model": "test-model",
-                "gen_ai.response.finish_reasons": "length",
-                "gen_ai.response.streaming": True,
-                "gen_ai.usage.total_tokens": 10,
-                "thread.id": mock.ANY,
-                "thread.name": mock.ANY,
-            }
+        expected_data = {
+            "gen_ai.operation.name": "text_completion",
+            "gen_ai.request.model": "test-model",
+            "gen_ai.response.finish_reasons": "length",
+            "gen_ai.response.streaming": True,
+            "gen_ai.usage.total_tokens": 10,
+            "thread.id": mock.ANY,
+            "thread.name": mock.ANY,
+        }
 
         if send_default_pii and include_prompts:
             expected_data["gen_ai.request.messages"] = "Hello"
@@ -783,20 +753,18 @@ def test_chat_completion(
         integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)],
         _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    if stream_gen_ai_spans:
-        items = capture_items("transaction", "span")
-    else:
-        events = capture_events()
 
     client = get_hf_provider_inference_client()
 
-    with sentry_sdk.start_transaction(name="test"):
-        client.chat_completion(
-            messages=[{"role": "user", "content": "Hello!"}],
-            stream=False,
-        )
-
     if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with sentry_sdk.start_transaction(name="test"):
+            client.chat_completion(
+                messages=[{"role": "user", "content": "Hello!"}],
+                stream=False,
+            )
+
         spans = [item.payload for item in items if item.type == "span"]
         span = None
         for sp in spans:
@@ -849,6 +817,14 @@ def test_chat_completion(
 
         assert span["attributes"] == expected_data
     else:
+        events = capture_events()
+
+        with sentry_sdk.start_transaction(name="test"):
+            client.chat_completion(
+                messages=[{"role": "user", "content": "Hello!"}],
+                stream=False,
+            )
+
         (transaction,) = events
 
         span = None
@@ -863,14 +839,9 @@ def test_chat_completion(
 
         assert span is not None
 
-        if stream_gen_ai_spans:
-            assert span["attributes"]["sentry.op"] == "gen_ai.chat"
-            assert span["name"] == "chat test-model"
-            assert span["attributes"]["sentry.origin"] == "auto.ai.huggingface_hub"
-        else:
-            assert span["op"] == "gen_ai.chat"
-            assert span["description"] == "chat test-model"
-            assert span["origin"] == "auto.ai.huggingface_hub"
+        assert span["op"] == "gen_ai.chat"
+        assert span["description"] == "chat test-model"
+        assert span["origin"] == "auto.ai.huggingface_hub"
 
         expected_data = {
             "gen_ai.operation.name": "chat",
@@ -1397,10 +1368,6 @@ def test_chat_completion_streaming_with_tools(
         integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)],
         _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-    if stream_gen_ai_spans:
-        items = capture_items("transaction", "span")
-    else:
-        events = capture_events()
 
     client = get_hf_provider_inference_client()
 
@@ -1419,17 +1386,21 @@ def test_chat_completion_streaming_with_tools(
         }
     ]
 
-    with sentry_sdk.start_transaction(name="test"):
-        _ = list(
-            client.chat_completion(
-                messages=[{"role": "user", "content": "What is the weather in Paris?"}],
-                stream=True,
-                tools=tools,
-                tool_choice="auto",
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
+
+        with sentry_sdk.start_transaction(name="test"):
+            _ = list(
+                client.chat_completion(
+                    messages=[
+                        {"role": "user", "content": "What is the weather in Paris?"}
+                    ],
+                    stream=True,
+                    tools=tools,
+                    tool_choice="auto",
+                )
             )
-        )
 
-    if stream_gen_ai_spans:
         spans = [item.payload for item in items if item.type == "span"]
         span = None
         for sp in spans:
@@ -1487,6 +1458,20 @@ def test_chat_completion_streaming_with_tools(
 
         assert span["attributes"] == expected_data
     else:
+        events = capture_events()
+
+        with sentry_sdk.start_transaction(name="test"):
+            _ = list(
+                client.chat_completion(
+                    messages=[
+                        {"role": "user", "content": "What is the weather in Paris?"}
+                    ],
+                    stream=True,
+                    tools=tools,
+                    tool_choice="auto",
+                )
+            )
+
         (transaction,) = events
 
         span = None

From f156e9295d45205bae45072015b1f2d82e68f70b Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Tue, 12 May 2026 14:31:12 +0200
Subject: [PATCH 49/84] cleanup langgraph

---
 .../integrations/langgraph/test_langgraph.py  | 22 +++++++++----------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/tests/integrations/langgraph/test_langgraph.py b/tests/integrations/langgraph/test_langgraph.py
index 0052fefa29..991c1f2269 100644
--- a/tests/integrations/langgraph/test_langgraph.py
+++ b/tests/integrations/langgraph/test_langgraph.py
@@ -171,12 +171,11 @@ def original_compile(self, *args, **kwargs):
     if stream_gen_ai_spans:
         items = capture_items("transaction", "span")
 
-        with patch("sentry_sdk.integrations.langgraph.StateGraph"):
-            with start_transaction():
-                wrapped_compile = _wrap_state_graph_compile(original_compile)
-                compiled_graph = wrapped_compile(
-                    graph, model="test-model", checkpointer=None
-                )
+        with patch("sentry_sdk.integrations.langgraph.StateGraph"), start_transaction():
+            wrapped_compile = _wrap_state_graph_compile(original_compile)
+            compiled_graph = wrapped_compile(
+                graph, model="test-model", checkpointer=None
+            )
 
         assert compiled_graph is not None
         assert compiled_graph.name == "test_graph"
@@ -209,12 +208,11 @@ def original_compile(self, *args, **kwargs):
     else:
         events = capture_events()
 
-        with patch("sentry_sdk.integrations.langgraph.StateGraph"):
-            with start_transaction():
-                wrapped_compile = _wrap_state_graph_compile(original_compile)
-                compiled_graph = wrapped_compile(
-                    graph, model="test-model", checkpointer=None
-                )
+        with patch("sentry_sdk.integrations.langgraph.StateGraph"), start_transaction():
+            wrapped_compile = _wrap_state_graph_compile(original_compile)
+            compiled_graph = wrapped_compile(
+                graph, model="test-model", checkpointer=None
+            )
 
         assert compiled_graph is not None
         assert compiled_graph.name == "test_graph"

From 3b03ddf4277132c2e963987f4ae073ae42ae32c9 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Tue, 12 May 2026 14:35:04 +0200
Subject: [PATCH 50/84] cleanup litellm

---
 tests/integrations/litellm/test_litellm.py | 75 +++++++++++++---------
 1 file changed, 46 insertions(+), 29 deletions(-)

diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py
index ab60779ed6..8ae8dca99e 100644
--- a/tests/integrations/litellm/test_litellm.py
+++ b/tests/integrations/litellm/test_litellm.py
@@ -324,24 +324,21 @@ async def test_async_nonstreaming_chat_completion(
 
     if stream_gen_ai_spans:
         items = capture_items("transaction", "span")
-    else:
-        events = capture_events()
 
-    with mock.patch.object(
-        client.completions._client._client,
-        "send",
-        return_value=model_response,
-    ), start_transaction(name="litellm test"):
-        await litellm.acompletion(
-            model="gpt-3.5-turbo",
-            messages=messages,
-            client=client,
-        )
+        with mock.patch.object(
+            client.completions._client._client,
+            "send",
+            return_value=model_response,
+        ), start_transaction(name="litellm test"):
+            await litellm.acompletion(
+                model="gpt-3.5-turbo",
+                messages=messages,
+                client=client,
+            )
 
-        await GLOBAL_LOGGING_WORKER.flush()
-        await asyncio.sleep(0.5)
+            await GLOBAL_LOGGING_WORKER.flush()
+            await asyncio.sleep(0.5)
 
-    if stream_gen_ai_spans:
         (event,) = (item.payload for item in items if item.type == "transaction")
         assert event["transaction"] == "litellm test"
 
@@ -373,6 +370,22 @@ async def test_async_nonstreaming_chat_completion(
         assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
         assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
     else:
+        events = capture_events()
+
+        with mock.patch.object(
+            client.completions._client._client,
+            "send",
+            return_value=model_response,
+        ), start_transaction(name="litellm test"):
+            await litellm.acompletion(
+                model="gpt-3.5-turbo",
+                messages=messages,
+                client=client,
+            )
+
+            await GLOBAL_LOGGING_WORKER.flush()
+            await asyncio.sleep(0.5)
+
         assert len(events) == 1
         (event,) = events
 
@@ -2385,6 +2398,13 @@ def test_litellm_message_truncation(
         assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["attributes"]
 
         messages_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        assert isinstance(messages_data, str)
+
+        parsed_messages = json.loads(messages_data)
+        assert isinstance(parsed_messages, list)
+        assert len(parsed_messages) == 1
+        assert "small message 5" in str(parsed_messages[0])
+        tx = next(item.payload for item in items if item.type == "transaction")
     else:
         events = capture_events()
 
@@ -2415,16 +2435,12 @@ def test_litellm_message_truncation(
         assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"]
 
         messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-    assert isinstance(messages_data, str)
+        assert isinstance(messages_data, str)
 
-    parsed_messages = json.loads(messages_data)
-    assert isinstance(parsed_messages, list)
-    assert len(parsed_messages) == 1
-    assert "small message 5" in str(parsed_messages[0])
-    if stream_gen_ai_spans:
-        tx = next(item.payload for item in items if item.type == "transaction")
-    else:
-        pass
+        parsed_messages = json.loads(messages_data)
+        assert isinstance(parsed_messages, list)
+        assert len(parsed_messages) == 1
+        assert "small message 5" in str(parsed_messages[0])
     assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5
 
 
@@ -2857,6 +2873,10 @@ async def test_async_binary_content_encoding_mixed_content(
             if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
             and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
         )
+
+        assert len(chat_spans) == 1
+        span = chat_spans[0]
+        messages_data = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     else:
         events = capture_events()
 
@@ -2882,11 +2902,8 @@ async def test_async_binary_content_encoding_mixed_content(
             if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm"
         )
 
-    assert len(chat_spans) == 1
-    span = chat_spans[0]
-    if stream_gen_ai_spans:
-        messages_data = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
-    else:
+        assert len(chat_spans) == 1
+        span = chat_spans[0]
         messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
 
     content_items = [

From 261b9f0c3db16e4c9371667e73d6d53a717b0dad Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Tue, 12 May 2026 14:41:54 +0200
Subject: [PATCH 51/84] cleanup openai

---
 tests/integrations/openai/test_openai.py | 1011 ++++++++++++++--------
 1 file changed, 670 insertions(+), 341 deletions(-)

diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py
index c80b2df513..0da39e842d 100644
--- a/tests/integrations/openai/test_openai.py
+++ b/tests/integrations/openai/test_openai.py
@@ -3039,24 +3039,31 @@ def test_span_origin_streaming_chat(
 
     if stream_gen_ai_spans:
         items = capture_items("transaction", "span")
-    else:
-        events = capture_events()
 
-    client.chat.completions._post = mock.Mock(return_value=returned_stream)
-    with start_transaction(name="openai tx"):
-        response_stream = client.chat.completions.create(
-            model="some-model", messages=[{"role": "system", "content": "hello"}]
-        )
+        client.chat.completions._post = mock.Mock(return_value=returned_stream)
+        with start_transaction(name="openai tx"):
+            response_stream = client.chat.completions.create(
+                model="some-model", messages=[{"role": "system", "content": "hello"}]
+            )
 
-        "".join(map(lambda x: x.choices[0].delta.content, response_stream))
+            "".join(map(lambda x: x.choices[0].delta.content, response_stream))
 
-    if stream_gen_ai_spans:
         (event,) = (item.payload for item in items if item.type == "transaction")
         assert event["contexts"]["trace"]["origin"] == "manual"
 
         spans = [item.payload for item in items if item.type == "span"]
         assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.openai"
     else:
+        events = capture_events()
+
+        client.chat.completions._post = mock.Mock(return_value=returned_stream)
+        with start_transaction(name="openai tx"):
+            response_stream = client.chat.completions.create(
+                model="some-model", messages=[{"role": "system", "content": "hello"}]
+            )
+
+            "".join(map(lambda x: x.choices[0].delta.content, response_stream))
+
         (event,) = events
 
         assert event["contexts"]["trace"]["origin"] == "manual"
@@ -3124,25 +3131,33 @@ async def test_span_origin_streaming_chat_async(
 
     if stream_gen_ai_spans:
         items = capture_items("transaction", "span")
-    else:
-        events = capture_events()
 
-    with start_transaction(name="openai tx"):
-        response_stream = await client.chat.completions.create(
-            model="some-model", messages=[{"role": "system", "content": "hello"}]
-        )
-        async for _ in response_stream:
-            pass
+        with start_transaction(name="openai tx"):
+            response_stream = await client.chat.completions.create(
+                model="some-model", messages=[{"role": "system", "content": "hello"}]
+            )
+            async for _ in response_stream:
+                pass
 
-        # "".join(map(lambda x: x.choices[0].delta.content, response_stream))
+            # "".join(map(lambda x: x.choices[0].delta.content, response_stream))
 
-    if stream_gen_ai_spans:
         (event,) = (item.payload for item in items if item.type == "transaction")
         assert event["contexts"]["trace"]["origin"] == "manual"
 
         spans = [item.payload for item in items if item.type == "span"]
         assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.openai"
     else:
+        events = capture_events()
+
+        with start_transaction(name="openai tx"):
+            response_stream = await client.chat.completions.create(
+                model="some-model", messages=[{"role": "system", "content": "hello"}]
+            )
+            async for _ in response_stream:
+                pass
+
+            # "".join(map(lambda x: x.choices[0].delta.content, response_stream))
+
         (event,) = events
 
         assert event["contexts"]["trace"]["origin"] == "manual"
@@ -3178,19 +3193,21 @@ def test_span_origin_embeddings(
 
     if stream_gen_ai_spans:
         items = capture_items("transaction", "span")
-    else:
-        events = capture_events()
 
-    with start_transaction(name="openai tx"):
-        client.embeddings.create(input="hello", model="text-embedding-3-large")
+        with start_transaction(name="openai tx"):
+            client.embeddings.create(input="hello", model="text-embedding-3-large")
 
-    if stream_gen_ai_spans:
         (event,) = [item.payload for item in items if item.type == "transaction"]
         assert event["contexts"]["trace"]["origin"] == "manual"
 
         spans = [item.payload for item in items if item.type == "span"]
         assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.openai"
     else:
+        events = capture_events()
+
+        with start_transaction(name="openai tx"):
+            client.embeddings.create(input="hello", model="text-embedding-3-large")
+
         (event,) = events
 
         assert event["contexts"]["trace"]["origin"] == "manual"
@@ -3227,19 +3244,25 @@ async def test_span_origin_embeddings_async(
 
     if stream_gen_ai_spans:
         items = capture_items("transaction", "span")
-    else:
-        events = capture_events()
 
-    with start_transaction(name="openai tx"):
-        await client.embeddings.create(input="hello", model="text-embedding-3-large")
+        with start_transaction(name="openai tx"):
+            await client.embeddings.create(
+                input="hello", model="text-embedding-3-large"
+            )
 
-    if stream_gen_ai_spans:
         (event,) = [item.payload for item in items if item.type == "transaction"]
         assert event["contexts"]["trace"]["origin"] == "manual"
 
         spans = [item.payload for item in items if item.type == "span"]
         assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.openai"
     else:
+        events = capture_events()
+
+        with start_transaction(name="openai tx"):
+            await client.embeddings.create(
+                input="hello", model="text-embedding-3-large"
+            )
+
         (event,) = events
 
         assert event["contexts"]["trace"]["origin"] == "manual"
@@ -3840,6 +3863,162 @@ def test_ai_client_span_responses_api(
             "thread.id": mock.ANY,
             "thread.name": mock.ANY,
         }
+
+        param_id = request.node.callspec.id
+        if "string" in param_id and (
+            instructions is None or isinstance(instructions, Omit)
+        ):  # type: ignore
+            expected_data.update(
+                {
+                    "gen_ai.request.messages": safe_serialize(
+                        ["How do I check if a Python object is an instance of a class?"]
+                    ),
+                }
+            )
+        elif "string" in param_id:
+            expected_data.update(
+                {
+                    "gen_ai.system_instructions": safe_serialize(
+                        [
+                            {
+                                "type": "text",
+                                "content": "You are a coding assistant that talks like a pirate.",
+                            }
+                        ]
+                    ),
+                    "gen_ai.request.messages": safe_serialize(
+                        ["How do I check if a Python object is an instance of a class?"]
+                    ),
+                }
+            )
+        elif "blocks_no_type" in param_id and (
+            instructions is None or isinstance(instructions, Omit)
+        ):  # type: ignore
+            expected_data.update(
+                {
+                    "gen_ai.system_instructions": safe_serialize(
+                        [{"type": "text", "content": "You are a helpful assistant."}]
+                    ),
+                    "gen_ai.request.messages": safe_serialize(
+                        [{"role": "user", "content": "hello"}]
+                    ),
+                }
+            )
+        elif "blocks_no_type" in param_id:
+            expected_data.update(
+                {
+                    "gen_ai.system_instructions": safe_serialize(
+                        [
+                            {
+                                "type": "text",
+                                "content": "You are a coding assistant that talks like a pirate.",
+                            },
+                            {"type": "text", "content": "You are a helpful assistant."},
+                        ]
+                    ),
+                    "gen_ai.request.messages": safe_serialize(
+                        [{"role": "user", "content": "hello"}]
+                    ),
+                }
+            )
+        elif "blocks" in param_id and (
+            instructions is None or isinstance(instructions, Omit)
+        ):  # type: ignore
+            expected_data.update(
+                {
+                    "gen_ai.system_instructions": safe_serialize(
+                        [{"type": "text", "content": "You are a helpful assistant."}]
+                    ),
+                    "gen_ai.request.messages": safe_serialize(
+                        [{"type": "message", "role": "user", "content": "hello"}]
+                    ),
+                }
+            )
+        elif "blocks" in param_id:
+            expected_data.update(
+                {
+                    "gen_ai.system_instructions": safe_serialize(
+                        [
+                            {
+                                "type": "text",
+                                "content": "You are a coding assistant that talks like a pirate.",
+                            },
+                            {"type": "text", "content": "You are a helpful assistant."},
+                        ]
+                    ),
+                    "gen_ai.request.messages": safe_serialize(
+                        [{"type": "message", "role": "user", "content": "hello"}]
+                    ),
+                }
+            )
+        elif "parts_no_type" in param_id and (
+            instructions is None or isinstance(instructions, Omit)
+        ):  # type: ignore
+            expected_data.update(
+                {
+                    "gen_ai.system_instructions": safe_serialize(
+                        [
+                            {"type": "text", "content": "You are a helpful assistant."},
+                            {"type": "text", "content": "Be concise and clear."},
+                        ]
+                    ),
+                    "gen_ai.request.messages": safe_serialize(
+                        [{"role": "user", "content": "hello"}]
+                    ),
+                }
+            )
+        elif "parts_no_type" in param_id:
+            expected_data.update(
+                {
+                    "gen_ai.system_instructions": safe_serialize(
+                        [
+                            {
+                                "type": "text",
+                                "content": "You are a coding assistant that talks like a pirate.",
+                            },
+                            {"type": "text", "content": "You are a helpful assistant."},
+                            {"type": "text", "content": "Be concise and clear."},
+                        ]
+                    ),
+                    "gen_ai.request.messages": safe_serialize(
+                        [{"role": "user", "content": "hello"}]
+                    ),
+                }
+            )
+        elif instructions is None or isinstance(instructions, Omit):  # type: ignore
+            expected_data.update(
+                {
+                    "gen_ai.system_instructions": safe_serialize(
+                        [
+                            {"type": "text", "content": "You are a helpful assistant."},
+                            {"type": "text", "content": "Be concise and clear."},
+                        ]
+                    ),
+                    "gen_ai.request.messages": safe_serialize(
+                        [{"type": "message", "role": "user", "content": "hello"}]
+                    ),
+                }
+            )
+        else:
+            expected_data.update(
+                {
+                    "gen_ai.system_instructions": safe_serialize(
+                        [
+                            {
+                                "type": "text",
+                                "content": "You are a coding assistant that talks like a pirate.",
+                            },
+                            {"type": "text", "content": "You are a helpful assistant."},
+                            {"type": "text", "content": "Be concise and clear."},
+                        ]
+                    ),
+                    "gen_ai.request.messages": safe_serialize(
+                        [{"type": "message", "role": "user", "content": "hello"}]
+                    ),
+                }
+            )
+
+        assert spans[0]["attributes"] == expected_data
     else:
         events = capture_events()
 
@@ -3879,163 +4058,160 @@ def test_ai_client_span_responses_api(
             "thread.name": mock.ANY,
         }
 
-    param_id = request.node.callspec.id
-    if "string" in param_id and (
-        instructions is None or isinstance(instructions, Omit)
-    ):  # type: ignore
-        expected_data.update(
-            {
-                "gen_ai.request.messages": safe_serialize(
-                    ["How do I check if a Python object is an instance of a class?"]
-                ),
-            }
-        )
-    elif "string" in param_id:
-        expected_data.update(
-            {
-                "gen_ai.system_instructions": safe_serialize(
-                    [
-                        {
-                            "type": "text",
-                            "content": "You are a coding assistant that talks like a pirate.",
-                        }
-                    ]
-                ),
-                "gen_ai.request.messages": safe_serialize(
-                    ["How do I check if a Python object is an instance of a class?"]
-                ),
-            }
-        )
-    elif "blocks_no_type" in param_id and (
-        instructions is None or isinstance(instructions, Omit)
-    ):  # type: ignore
-        expected_data.update(
-            {
-                "gen_ai.system_instructions": safe_serialize(
-                    [{"type": "text", "content": "You are a helpful assistant."}]
-                ),
-                "gen_ai.request.messages": safe_serialize(
-                    [{"role": "user", "content": "hello"}]
-                ),
-            }
-        )
-    elif "blocks_no_type" in param_id:
-        expected_data.update(
-            {
-                "gen_ai.system_instructions": safe_serialize(
-                    [
-                        {
-                            "type": "text",
-                            "content": "You are a coding assistant that talks like a pirate.",
-                        },
-                        {"type": "text", "content": "You are a helpful assistant."},
-                    ]
-                ),
-                "gen_ai.request.messages": safe_serialize(
-                    [{"role": "user", "content": "hello"}]
-                ),
-            }
-        )
-    elif "blocks" in param_id and (
-        instructions is None or isinstance(instructions, Omit)
-    ):  # type: ignore
-        expected_data.update(
-            {
-                "gen_ai.system_instructions": safe_serialize(
-                    [{"type": "text", "content": "You are a helpful assistant."}]
-                ),
-                "gen_ai.request.messages": safe_serialize(
-                    [{"type": "message", "role": "user", "content": "hello"}]
-                ),
-            }
-        )
-    elif "blocks" in param_id:
-        expected_data.update(
-            {
-                "gen_ai.system_instructions": safe_serialize(
-                    [
-                        {
-                            "type": "text",
-                            "content": "You are a coding assistant that talks like a pirate.",
-                        },
-                        {"type": "text", "content": "You are a helpful assistant."},
-                    ]
-                ),
-                "gen_ai.request.messages": safe_serialize(
-                    [{"type": "message", "role": "user", "content": "hello"}]
-                ),
-            }
-        )
-    elif "parts_no_type" in param_id and (
-        instructions is None or isinstance(instructions, Omit)
-    ):  # type: ignore
-        expected_data.update(
-            {
-                "gen_ai.system_instructions": safe_serialize(
-                    [
-                        {"type": "text", "content": "You are a helpful assistant."},
-                        {"type": "text", "content": "Be concise and clear."},
-                    ]
-                ),
-                "gen_ai.request.messages": safe_serialize(
-                    [{"role": "user", "content": "hello"}]
-                ),
-            }
-        )
-    elif "parts_no_type" in param_id:
-        expected_data.update(
-            {
-                "gen_ai.system_instructions": safe_serialize(
-                    [
-                        {
-                            "type": "text",
-                            "content": "You are a coding assistant that talks like a pirate.",
-                        },
-                        {"type": "text", "content": "You are a helpful assistant."},
-                        {"type": "text", "content": "Be concise and clear."},
-                    ]
-                ),
-                "gen_ai.request.messages": safe_serialize(
-                    [{"role": "user", "content": "hello"}]
-                ),
-            }
-        )
-    elif instructions is None or isinstance(instructions, Omit):  # type: ignore
-        expected_data.update(
-            {
-                "gen_ai.system_instructions": safe_serialize(
-                    [
-                        {"type": "text", "content": "You are a helpful assistant."},
-                        {"type": "text", "content": "Be concise and clear."},
-                    ]
-                ),
-                "gen_ai.request.messages": safe_serialize(
-                    [{"type": "message", "role": "user", "content": "hello"}]
-                ),
-            }
-        )
-    else:
-        expected_data.update(
-            {
-                "gen_ai.system_instructions": safe_serialize(
-                    [
-                        {
-                            "type": "text",
-                            "content": "You are a coding assistant that talks like a pirate.",
-                        },
-                        {"type": "text", "content": "You are a helpful assistant."},
-                        {"type": "text", "content": "Be concise and clear."},
-                    ]
-                ),
-                "gen_ai.request.messages": safe_serialize(
-                    [{"type": "message", "role": "user", "content": "hello"}]
-                ),
-            }
-        )
+        param_id = request.node.callspec.id
+        if "string" in param_id and (
+            instructions is None or isinstance(instructions, Omit)
+        ):  # type: ignore
+            expected_data.update(
+                {
+                    "gen_ai.request.messages": safe_serialize(
+                        ["How do I check if a Python object is an instance of a class?"]
+                    ),
+                }
+            )
+        elif "string" in param_id:
+            expected_data.update(
+                {
+                    "gen_ai.system_instructions": safe_serialize(
+                        [
+                            {
+                                "type": "text",
+                                "content": "You are a coding assistant that talks like a pirate.",
+                            }
+                        ]
+                    ),
+                    "gen_ai.request.messages": safe_serialize(
+                        ["How do I check if a Python object is an instance of a class?"]
+                    ),
+                }
+            )
+        elif "blocks_no_type" in param_id and (
+            instructions is None or isinstance(instructions, Omit)
+        ):  # type: ignore
+            expected_data.update(
+                {
+                    "gen_ai.system_instructions": safe_serialize(
+                        [{"type": "text", "content": "You are a helpful assistant."}]
+                    ),
+                    "gen_ai.request.messages": safe_serialize(
+                        [{"role": "user", "content": "hello"}]
+                    ),
+                }
+            )
+        elif "blocks_no_type" in param_id:
+            expected_data.update(
+                {
+                    "gen_ai.system_instructions": safe_serialize(
+                        [
+                            {
+                                "type": "text",
+                                "content": "You are a coding assistant that talks like a pirate.",
+                            },
+                            {"type": "text", "content": "You are a helpful assistant."},
+                        ]
+                    ),
+                    "gen_ai.request.messages": safe_serialize(
+                        [{"role": "user", "content": "hello"}]
+                    ),
+                }
+            )
+        elif "blocks" in param_id and (
+            instructions is None or isinstance(instructions, Omit)
+        ):  # type: ignore
+            expected_data.update(
+                {
+                    "gen_ai.system_instructions": safe_serialize(
+                        [{"type": "text", "content": "You are a helpful assistant."}]
+                    ),
+                    "gen_ai.request.messages": safe_serialize(
+                        [{"type": "message", "role": "user", "content": "hello"}]
+                    ),
+                }
+            )
+        elif "blocks" in param_id:
+            expected_data.update(
+                {
+                    "gen_ai.system_instructions": safe_serialize(
+                        [
+                            {
+                                "type": "text",
+                                "content": "You are a coding assistant that talks like a pirate.",
+                            },
+                            {"type": "text", "content": "You are a helpful assistant."},
+                        ]
+                    ),
+                    "gen_ai.request.messages": safe_serialize(
+                        [{"type": "message", "role": "user", "content": "hello"}]
+                    ),
+                }
+            )
+        elif "parts_no_type" in param_id and (
+            instructions is None or isinstance(instructions, Omit)
+        ):  # type: ignore
+            expected_data.update(
+                {
+                    "gen_ai.system_instructions": safe_serialize(
+                        [
+                            {"type": "text", "content": "You are a helpful assistant."},
+                            {"type": "text", "content": "Be concise and clear."},
+                        ]
+                    ),
+                    "gen_ai.request.messages": safe_serialize(
+                        [{"role": "user", "content": "hello"}]
+                    ),
+                }
+            )
+        elif "parts_no_type" in param_id:
+            expected_data.update(
+                {
+                    "gen_ai.system_instructions": safe_serialize(
+                        [
+                            {
+                                "type": "text",
+                                "content": "You are a coding assistant that talks like a pirate.",
+                            },
+                            {"type": "text", "content": "You are a helpful assistant."},
+                            {"type": "text", "content": "Be concise and clear."},
+                        ]
+                    ),
+                    "gen_ai.request.messages": safe_serialize(
+                        [{"role": "user", "content": "hello"}]
+                    ),
+                }
+            )
+        elif instructions is None or isinstance(instructions, Omit):  # type: ignore
+            expected_data.update(
+                {
+                    "gen_ai.system_instructions": safe_serialize(
+                        [
+                            {"type": "text", "content": "You are a helpful assistant."},
+                            {"type": "text", "content": "Be concise and clear."},
+                        ]
+                    ),
+                    "gen_ai.request.messages": safe_serialize(
+                        [{"type": "message", "role": "user", "content": "hello"}]
+                    ),
+                }
+            )
+        else:
+            expected_data.update(
+                {
+                    "gen_ai.system_instructions": safe_serialize(
+                        [
+                            {
+                                "type": "text",
+                                "content": "You are a coding assistant that talks like a pirate.",
+                            },
+                            {"type": "text", "content": "You are a helpful assistant."},
+                            {"type": "text", "content": "Be concise and clear."},
+                        ]
+                    ),
+                    "gen_ai.request.messages": safe_serialize(
+                        [{"type": "message", "role": "user", "content": "hello"}]
+                    ),
+                }
+            )
 
-    if stream_gen_ai_spans:
-        assert spans[0]["attributes"] == expected_data
-    else:
         assert spans[0]["data"] == expected_data
 
 
@@ -4299,6 +4475,162 @@ async def test_ai_client_span_responses_async_api(
             "thread.id": mock.ANY,
             "thread.name": mock.ANY,
         }
+
+        param_id = request.node.callspec.id
+        if "string" in param_id and (
+            instructions is None or isinstance(instructions, Omit)
+        ):  # type: ignore
+            expected_data.update(
+                {
+                    "gen_ai.request.messages": safe_serialize(
+                        ["How do I check if a Python object is an instance of a class?"]
+                    ),
+                }
+            )
+        elif "string" in param_id:
+            expected_data.update(
+                {
+                    "gen_ai.system_instructions": safe_serialize(
+                        [
+                            {
+                                "type": "text",
+                                "content": "You are a coding assistant that talks like a pirate.",
+                            }
+                        ]
+                    ),
+                    "gen_ai.request.messages": safe_serialize(
+                        ["How do I check if a Python object is an instance of a class?"]
+                    ),
+                }
+            )
+        elif "blocks_no_type" in param_id and (
+            instructions is None or isinstance(instructions, Omit)
+        ):  # type: ignore
+            expected_data.update(
+                {
+                    "gen_ai.system_instructions": safe_serialize(
+                        [{"type": "text", "content": "You are a helpful assistant."}]
+                    ),
+                    "gen_ai.request.messages": safe_serialize(
+                        [{"role": "user", "content": "hello"}]
+                    ),
+                }
+            )
+        elif "blocks_no_type" in param_id:
+            expected_data.update(
+                {
+                    "gen_ai.system_instructions": safe_serialize(
+                        [
+                            {
+                                "type": "text",
+                                "content": "You are a coding assistant that talks like a pirate.",
+                            },
+                            {"type": "text", "content": "You are a helpful assistant."},
+                        ]
+                    ),
+                    "gen_ai.request.messages": safe_serialize(
+                        [{"role": "user", "content": "hello"}]
+                    ),
+                }
+            )
+        elif "blocks" in param_id and (
+            instructions is None or isinstance(instructions, Omit)
+        ):  # type: ignore
+            expected_data.update(
+                {
+                    "gen_ai.system_instructions": safe_serialize(
+                        [{"type": "text", "content": "You are a helpful assistant."}]
+                    ),
+                    "gen_ai.request.messages": safe_serialize(
+                        [{"type": "message", "role": "user", "content": "hello"}]
+                    ),
+                }
+            )
+        elif "blocks" in param_id:
+            expected_data.update(
+                {
+                    "gen_ai.system_instructions": safe_serialize(
+                        [
+                            {
+                                "type": "text",
+                                "content": "You are a coding assistant that talks like a pirate.",
+                            },
+                            {"type": "text", "content": "You are a helpful assistant."},
+                        ]
+                    ),
+                    "gen_ai.request.messages": safe_serialize(
+                        [{"type": "message", "role": "user", "content": "hello"}]
+                    ),
+                }
+            )
+        elif "parts_no_type" in param_id and (
+            instructions is None or isinstance(instructions, Omit)
+        ):  # type: ignore
+            expected_data.update(
+                {
+                    "gen_ai.system_instructions": safe_serialize(
+                        [
+                            {"type": "text", "content": "You are a helpful assistant."},
+                            {"type": "text", "content": "Be concise and clear."},
+                        ]
+                    ),
+                    "gen_ai.request.messages": safe_serialize(
+                        [{"role": "user", "content": "hello"}]
+                    ),
+                }
+            )
+        elif "parts_no_type" in param_id:
+            expected_data.update(
+                {
+                    "gen_ai.system_instructions": safe_serialize(
+                        [
+                            {
+                                "type": "text",
+                                "content": "You are a coding assistant that talks like a pirate.",
+                            },
+                            {"type": "text", "content": "You are a helpful assistant."},
+                            {"type": "text", "content": "Be concise and clear."},
+                        ]
+                    ),
+                    "gen_ai.request.messages": safe_serialize(
+                        [{"role": "user", "content": "hello"}]
+                    ),
+                }
+            )
+        elif instructions is None or isinstance(instructions, Omit):  # type: ignore
+            expected_data.update(
+                {
+                    "gen_ai.system_instructions": safe_serialize(
+                        [
+                            {"type": "text", "content": "You are a helpful assistant."},
+                            {"type": "text", "content": "Be concise and clear."},
+                        ]
+                    ),
+                    "gen_ai.request.messages": safe_serialize(
+                        [{"type": "message", "role": "user", "content": "hello"}]
+                    ),
+                }
+            )
+        else:
+            expected_data.update(
+                {
+                    "gen_ai.system_instructions": safe_serialize(
+                        [
+                            {
+                                "type": "text",
+                                "content": "You are a coding assistant that talks like a pirate.",
+                            },
+                            {"type": "text", "content": "You are a helpful assistant."},
+                            {"type": "text", "content": "Be concise and clear."},
+                        ]
+                    ),
+                    "gen_ai.request.messages": safe_serialize(
+                        [{"type": "message", "role": "user", "content": "hello"}]
+                    ),
+                }
+            )
+
+        assert spans[0]["attributes"] == expected_data
     else:
         events = capture_events()
 
@@ -4339,163 +4671,160 @@ async def test_ai_client_span_responses_async_api(
             "thread.name": mock.ANY,
         }
 
-    param_id = request.node.callspec.id
-    if "string" in param_id and (
-        instructions is None or isinstance(instructions, Omit)
-    ):  # type: ignore
-        expected_data.update(
-            {
-                "gen_ai.request.messages": safe_serialize(
-                    ["How do I check if a Python object is an instance of a class?"]
-                ),
-            }
-        )
-    elif "string" in param_id:
-        expected_data.update(
-            {
-                "gen_ai.system_instructions": safe_serialize(
-                    [
-                        {
-                            "type": "text",
-                            "content": "You are a coding assistant that talks like a pirate.",
-                        }
-                    ]
-                ),
-                "gen_ai.request.messages": safe_serialize(
-                    ["How do I check if a Python object is an instance of a class?"]
-                ),
-            }
-        )
-    elif "blocks_no_type" in param_id and (
-        instructions is None or isinstance(instructions, Omit)
-    ):  # type: ignore
-        expected_data.update(
-            {
-                "gen_ai.system_instructions": safe_serialize(
-                    [{"type": "text", "content": "You are a helpful assistant."}]
-                ),
-                "gen_ai.request.messages": safe_serialize(
-                    [{"role": "user", "content": "hello"}]
-                ),
-            }
-        )
-    elif "blocks_no_type" in param_id:
-        expected_data.update(
-            {
-                "gen_ai.system_instructions": safe_serialize(
-                    [
-                        {
-                            "type": "text",
-                            "content": "You are a coding assistant that talks like a pirate.",
-                        },
-                        {"type": "text", "content": "You are a helpful assistant."},
-                    ]
-                ),
-                "gen_ai.request.messages": safe_serialize(
-                    [{"role": "user", "content": "hello"}]
-                ),
-            }
-        )
-    elif "blocks" in param_id and (
-        instructions is None or isinstance(instructions, Omit)
-    ):  # type: ignore
-        expected_data.update(
-            {
-                "gen_ai.system_instructions": safe_serialize(
-                    [{"type": "text", "content": "You are a helpful assistant."}]
-                ),
-                "gen_ai.request.messages": safe_serialize(
-                    [{"type": "message", "role": "user", "content": "hello"}]
-                ),
-            }
-        )
-    elif "blocks" in param_id:
-        expected_data.update(
-            {
-                "gen_ai.system_instructions": safe_serialize(
-                    [
-                        {
-                            "type": "text",
-                            "content": "You are a coding assistant that talks like a pirate.",
-                        },
-                        {"type": "text", "content": "You are a helpful assistant."},
-                    ]
-                ),
-                "gen_ai.request.messages": safe_serialize(
-                    [{"type": "message", "role": "user", "content": "hello"}]
-                ),
-            }
-        )
-    elif "parts_no_type" in param_id and (
-        instructions is None or isinstance(instructions, Omit)
-    ):  # type: ignore
-        expected_data.update(
-            {
-                "gen_ai.system_instructions": safe_serialize(
-                    [
-                        {"type": "text", "content": "You are a helpful assistant."},
-                        {"type": "text", "content": "Be concise and clear."},
-                    ]
-                ),
-                "gen_ai.request.messages": safe_serialize(
-                    [{"role": "user", "content": "hello"}]
-                ),
-            }
-        )
-    elif "parts_no_type" in param_id:
-        expected_data.update(
-            {
-                "gen_ai.system_instructions": safe_serialize(
-                    [
-                        {
-                            "type": "text",
-                            "content": "You are a coding assistant that talks like a pirate.",
-                        },
-                        {"type": "text", "content": "You are a helpful assistant."},
-                        {"type": "text", "content": "Be concise and clear."},
-                    ]
-                ),
-                "gen_ai.request.messages": safe_serialize(
-                    [{"role": "user", "content": "hello"}]
-                ),
-            }
-        )
-    elif instructions is None or isinstance(instructions, Omit):  # type: ignore
-        expected_data.update(
-            {
-                "gen_ai.system_instructions": safe_serialize(
-                    [
-                        {"type": "text", "content": "You are a helpful assistant."},
-                        {"type": "text", "content": "Be concise and clear."},
-                    ]
-                ),
-                "gen_ai.request.messages": safe_serialize(
-                    [{"type": "message", "role": "user", "content": "hello"}]
-                ),
-            }
-        )
-    else:
-        expected_data.update(
-            {
-                "gen_ai.system_instructions": safe_serialize(
-                    [
-                        {
-                            "type": "text",
-                            "content": "You are a coding assistant that talks like a pirate.",
-                        },
-                        {"type": "text", "content": "You are a helpful assistant."},
-                        {"type": "text", "content": "Be concise and clear."},
-                    ]
-                ),
-                "gen_ai.request.messages": safe_serialize(
-                    [{"type": "message", "role": "user", "content": "hello"}]
-                ),
-            }
-        )
+        param_id = request.node.callspec.id
+        if "string" in param_id and (
+            instructions is None or isinstance(instructions, Omit)
+        ):  # type: ignore
+            expected_data.update(
+                {
+                    "gen_ai.request.messages": safe_serialize(
+                        ["How do I check if a Python object is an instance of a class?"]
+                    ),
+                }
+            )
+        elif "string" in param_id:
+            expected_data.update(
+                {
+                    "gen_ai.system_instructions": safe_serialize(
+                        [
+                            {
+                                "type": "text",
+                                "content": "You are a coding assistant that talks like a pirate.",
+                            }
+                        ]
+                    ),
+                    "gen_ai.request.messages": safe_serialize(
+                        ["How do I check if a Python object is an instance of a class?"]
+                    ),
+                }
+            )
+        elif "blocks_no_type" in param_id and (
+            instructions is None or isinstance(instructions, Omit)
+        ):  # type: ignore
+            expected_data.update(
+                {
+                    "gen_ai.system_instructions": safe_serialize(
+                        [{"type": "text", "content": "You are a helpful assistant."}]
+                    ),
+                    "gen_ai.request.messages": safe_serialize(
+                        [{"role": "user", "content": "hello"}]
+                    ),
+                }
+            )
+        elif "blocks_no_type" in param_id:
+            expected_data.update(
+                {
+                    "gen_ai.system_instructions": safe_serialize(
+                        [
+                            {
+                                "type": "text",
+                                "content": "You are a coding assistant that talks like a pirate.",
+                            },
+                            {"type": "text", "content": "You are a helpful assistant."},
+                        ]
+                    ),
+                    "gen_ai.request.messages": safe_serialize(
+                        [{"role": "user", "content": "hello"}]
+                    ),
+                }
+            )
+        elif "blocks" in param_id and (
+            instructions is None or isinstance(instructions, Omit)
+        ):  # type: ignore
+            expected_data.update(
+                {
+                    "gen_ai.system_instructions": safe_serialize(
+                        [{"type": "text", "content": "You are a helpful assistant."}]
+                    ),
+                    "gen_ai.request.messages": safe_serialize(
+                        [{"type": "message", "role": "user", "content": "hello"}]
+                    ),
+                }
+            )
+        elif "blocks" in param_id:
+            expected_data.update(
+                {
+                    "gen_ai.system_instructions": safe_serialize(
+                        [
+                            {
+                                "type": "text",
+                                "content": "You are a coding assistant that talks like a pirate.",
+                            },
+                            {"type": "text", "content": "You are a helpful assistant."},
+                        ]
+                    ),
+                    "gen_ai.request.messages": safe_serialize(
+                        [{"type": "message", "role": "user", "content": "hello"}]
+                    ),
+                }
+            )
+        elif "parts_no_type" in param_id and (
+            instructions is None or isinstance(instructions, Omit)
+        ):  # type: ignore
+            expected_data.update(
+                {
+                    "gen_ai.system_instructions": safe_serialize(
+                        [
+                            {"type": "text", "content": "You are a helpful assistant."},
+                            {"type": "text", "content": "Be concise and clear."},
+                        ]
+                    ),
+                    "gen_ai.request.messages": safe_serialize(
+                        [{"role": "user", "content": "hello"}]
+                    ),
+                }
+            )
+        elif "parts_no_type" in param_id:
+            expected_data.update(
+                {
+                    "gen_ai.system_instructions": safe_serialize(
+                        [
+                            {
+                                "type": "text",
+                                "content": "You are a coding assistant that talks like a pirate.",
+                            },
+                            {"type": "text", "content": "You are a helpful assistant."},
+                            {"type": "text", "content": "Be concise and clear."},
+                        ]
+                    ),
+                    "gen_ai.request.messages": safe_serialize(
+                        [{"role": "user", "content": "hello"}]
+                    ),
+                }
+            )
+        elif instructions is None or isinstance(instructions, Omit):  # type: ignore
+            expected_data.update(
+                {
+                    "gen_ai.system_instructions": safe_serialize(
+                        [
+                            {"type": "text", "content": "You are a helpful assistant."},
+                            {"type": "text", "content": "Be concise and clear."},
+                        ]
+                    ),
+                    "gen_ai.request.messages": safe_serialize(
+                        [{"type": "message", "role": "user", "content": "hello"}]
+                    ),
+                }
+            )
+        else:
+            expected_data.update(
+                {
+                    "gen_ai.system_instructions": safe_serialize(
+                        [
+                            {
+                                "type": "text",
+                                "content": "You are a coding assistant that talks like a pirate.",
+                            },
+                            {"type": "text", "content": "You are a helpful assistant."},
+                            {"type": "text", "content": "Be concise and clear."},
+                        ]
+                    ),
+                    "gen_ai.request.messages": safe_serialize(
+                        [{"type": "message", "role": "user", "content": "hello"}]
+                    ),
+                }
+            )
 
-    if stream_gen_ai_spans:
-        assert spans[0]["attributes"] == expected_data
-    else:
         assert spans[0]["data"] == expected_data
 
 

From 4f8a4c80a2b612b9220ed399ab96da2ec22db096 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Tue, 12 May 2026 15:09:01 +0200
Subject: [PATCH 52/84] cleanup openai_agents

---
 .../openai_agents/test_openai_agents.py       | 2474 +++++++++--------
 1 file changed, 1296 insertions(+), 1178 deletions(-)

diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py
index 46196893d8..60f88cd7f4 100644
--- a/tests/integrations/openai_agents/test_openai_agents.py
+++ b/tests/integrations/openai_agents/test_openai_agents.py
@@ -427,42 +427,10 @@ async def test_agent_invocation_span(
         invoke_agent_span, ai_client_span = spans
 
         assert invoke_agent_span["name"] == "invoke_agent test_agent"
-    else:
-        with patch.object(
-            agent.model._client._client,
-            "send",
-            return_value=response,
-        ) as _:
-            sentry_init(
-                integrations=[OpenAIAgentsIntegration()],
-                traces_sample_rate=1.0,
-                send_default_pii=True,
-                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
-            )
-            events = capture_events()
-
-            result = await agents.Runner.run(
-                agent,
-                input,
-                run_config=test_run_config,
-            )
-
-            assert result is not None
-            assert result.final_output == "Hello, how can I help you?"
-
-        (transaction,) = events
-        spans = transaction["spans"]
-        invoke_agent_span, ai_client_span = spans
 
-        assert transaction["transaction"] == "test_agent workflow"
-        assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
-
-        assert invoke_agent_span["description"] == "invoke_agent test_agent"
-
-    # Only first case checks "gen_ai.request.messages" until further input handling work.
-    param_id = request.node.callspec.id
-    if "string" in param_id and instructions is None:  # type: ignore
-        if stream_gen_ai_spans:
+        # Only first case checks "gen_ai.request.messages" until further input handling work.
+        param_id = request.node.callspec.id
+        if "string" in param_id and instructions is None:  # type: ignore
             assert "gen_ai.system_instructions" not in ai_client_span["attributes"]
 
             assert invoke_agent_span["attributes"][
@@ -475,34 +443,27 @@ async def test_agent_invocation_span(
                     },
                 ]
             )
-        else:
-            assert "gen_ai.system_instructions" not in ai_client_span["data"]
-
-            assert invoke_agent_span["data"][
-                "gen_ai.request.messages"
+        elif "string" in param_id:
+            assert ai_client_span["attributes"][
+                "gen_ai.system_instructions"
             ] == safe_serialize(
                 [
                     {
-                        "content": [{"text": "Test input", "type": "text"}],
-                        "role": "user",
+                        "type": "text",
+                        "content": "You are a coding assistant that talks like a pirate.",
                     },
                 ]
             )
-
-    elif "string" in param_id:
-        if stream_gen_ai_spans:
+        elif "blocks_no_type" in param_id and instructions is None:  # type: ignore
             assert ai_client_span["attributes"][
                 "gen_ai.system_instructions"
             ] == safe_serialize(
                 [
-                    {
-                        "type": "text",
-                        "content": "You are a coding assistant that talks like a pirate.",
-                    },
+                    {"type": "text", "content": "You are a helpful assistant."},
                 ]
             )
-        else:
-            assert ai_client_span["data"][
+        elif "blocks_no_type" in param_id:
+            assert ai_client_span["attributes"][
                 "gen_ai.system_instructions"
             ] == safe_serialize(
                 [
@@ -510,10 +471,10 @@ async def test_agent_invocation_span(
                         "type": "text",
                         "content": "You are a coding assistant that talks like a pirate.",
                     },
+                    {"type": "text", "content": "You are a helpful assistant."},
                 ]
             )
-    elif "blocks_no_type" in param_id and instructions is None:  # type: ignore
-        if stream_gen_ai_spans:
+        elif "blocks" in param_id and instructions is None:  # type: ignore
             assert ai_client_span["attributes"][
                 "gen_ai.system_instructions"
             ] == safe_serialize(
@@ -521,29 +482,29 @@ async def test_agent_invocation_span(
                     {"type": "text", "content": "You are a helpful assistant."},
                 ]
             )
-        else:
-            assert ai_client_span["data"][
+        elif "blocks" in param_id:
+            assert ai_client_span["attributes"][
                 "gen_ai.system_instructions"
             ] == safe_serialize(
                 [
+                    {
+                        "type": "text",
+                        "content": "You are a coding assistant that talks like a pirate.",
+                    },
                     {"type": "text", "content": "You are a helpful assistant."},
                 ]
             )
-    elif "blocks_no_type" in param_id:
-        if stream_gen_ai_spans:
+        elif "parts_no_type" in param_id and instructions is None:
             assert ai_client_span["attributes"][
                 "gen_ai.system_instructions"
             ] == safe_serialize(
                 [
-                    {
-                        "type": "text",
-                        "content": "You are a coding assistant that talks like a pirate.",
-                    },
                     {"type": "text", "content": "You are a helpful assistant."},
+                    {"type": "text", "content": "Be concise and clear."},
                 ]
             )
-        else:
-            assert ai_client_span["data"][
+        elif "parts_no_type" in param_id:
+            assert ai_client_span["attributes"][
                 "gen_ai.system_instructions"
             ] == safe_serialize(
                 [
@@ -552,39 +513,105 @@ async def test_agent_invocation_span(
                         "content": "You are a coding assistant that talks like a pirate.",
                     },
                     {"type": "text", "content": "You are a helpful assistant."},
+                    {"type": "text", "content": "Be concise and clear."},
                 ]
             )
-    elif "blocks" in param_id and instructions is None:  # type: ignore
-        if stream_gen_ai_spans:
+        elif instructions is None:  # type: ignore
             assert ai_client_span["attributes"][
                 "gen_ai.system_instructions"
             ] == safe_serialize(
                 [
                     {"type": "text", "content": "You are a helpful assistant."},
+                    {"type": "text", "content": "Be concise and clear."},
                 ]
             )
         else:
-            assert ai_client_span["data"][
+            assert ai_client_span["attributes"][
                 "gen_ai.system_instructions"
             ] == safe_serialize(
                 [
+                    {
+                        "type": "text",
+                        "content": "You are a coding assistant that talks like a pirate.",
+                    },
                     {"type": "text", "content": "You are a helpful assistant."},
+                    {"type": "text", "content": "Be concise and clear."},
                 ]
             )
-    elif "blocks" in param_id:
-        if stream_gen_ai_spans:
-            assert ai_client_span["attributes"][
-                "gen_ai.system_instructions"
+
+        assert (
+            invoke_agent_span["attributes"]["gen_ai.response.text"]
+            == "Hello, how can I help you?"
+        )
+
+        assert (
+            invoke_agent_span["attributes"]["gen_ai.operation.name"] == "invoke_agent"
+        )
+        assert invoke_agent_span["attributes"]["gen_ai.system"] == "openai"
+        assert invoke_agent_span["attributes"]["gen_ai.agent.name"] == "test_agent"
+        assert invoke_agent_span["attributes"]["gen_ai.request.max_tokens"] == 100
+        assert invoke_agent_span["attributes"]["gen_ai.request.model"] == "gpt-4"
+        assert invoke_agent_span["attributes"]["gen_ai.request.temperature"] == 0.7
+        assert invoke_agent_span["attributes"]["gen_ai.request.top_p"] == 1.0
+
+        assert ai_client_span["name"] == "chat gpt-4"
+        assert ai_client_span["attributes"]["gen_ai.operation.name"] == "chat"
+        assert ai_client_span["attributes"]["gen_ai.system"] == "openai"
+        assert ai_client_span["attributes"]["gen_ai.agent.name"] == "test_agent"
+        assert ai_client_span["attributes"]["gen_ai.request.max_tokens"] == 100
+        assert ai_client_span["attributes"]["gen_ai.request.model"] == "gpt-4"
+        assert ai_client_span["attributes"]["gen_ai.request.temperature"] == 0.7
+        assert ai_client_span["attributes"]["gen_ai.request.top_p"] == 1.0
+
+    else:
+        with patch.object(
+            agent.model._client._client,
+            "send",
+            return_value=response,
+        ) as _:
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+                send_default_pii=True,
+                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+            )
+            events = capture_events()
+
+            result = await agents.Runner.run(
+                agent,
+                input,
+                run_config=test_run_config,
+            )
+
+            assert result is not None
+            assert result.final_output == "Hello, how can I help you?"
+
+        (transaction,) = events
+        spans = transaction["spans"]
+        invoke_agent_span, ai_client_span = spans
+
+        assert transaction["transaction"] == "test_agent workflow"
+        assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
+
+        assert invoke_agent_span["description"] == "invoke_agent test_agent"
+
+        # Only first case checks "gen_ai.request.messages" until further input handling work.
+        param_id = request.node.callspec.id
+        if "string" in param_id and instructions is None:  # type: ignore
+            assert "gen_ai.system_instructions" not in ai_client_span["data"]
+
+            assert invoke_agent_span["data"][
+                "gen_ai.request.messages"
             ] == safe_serialize(
                 [
                     {
-                        "type": "text",
-                        "content": "You are a coding assistant that talks like a pirate.",
+                        "content": [{"text": "Test input", "type": "text"}],
+                        "role": "user",
                     },
-                    {"type": "text", "content": "You are a helpful assistant."},
                 ]
             )
-        else:
+
+        elif "string" in param_id:
             assert ai_client_span["data"][
                 "gen_ai.system_instructions"
             ] == safe_serialize(
@@ -593,43 +620,37 @@ async def test_agent_invocation_span(
                         "type": "text",
                         "content": "You are a coding assistant that talks like a pirate.",
                     },
-                    {"type": "text", "content": "You are a helpful assistant."},
                 ]
             )
-    elif "parts_no_type" in param_id and instructions is None:
-        if stream_gen_ai_spans:
-            assert ai_client_span["attributes"][
+        elif "blocks_no_type" in param_id and instructions is None:  # type: ignore
+            assert ai_client_span["data"][
                 "gen_ai.system_instructions"
             ] == safe_serialize(
                 [
                     {"type": "text", "content": "You are a helpful assistant."},
-                    {"type": "text", "content": "Be concise and clear."},
                 ]
             )
-        else:
+        elif "blocks_no_type" in param_id:
             assert ai_client_span["data"][
                 "gen_ai.system_instructions"
             ] == safe_serialize(
                 [
+                    {
+                        "type": "text",
+                        "content": "You are a coding assistant that talks like a pirate.",
+                    },
                     {"type": "text", "content": "You are a helpful assistant."},
-                    {"type": "text", "content": "Be concise and clear."},
                 ]
             )
-    elif "parts_no_type" in param_id:
-        if stream_gen_ai_spans:
-            assert ai_client_span["attributes"][
+        elif "blocks" in param_id and instructions is None:  # type: ignore
+            assert ai_client_span["data"][
                 "gen_ai.system_instructions"
             ] == safe_serialize(
                 [
-                    {
-                        "type": "text",
-                        "content": "You are a coding assistant that talks like a pirate.",
-                    },
                     {"type": "text", "content": "You are a helpful assistant."},
-                    {"type": "text", "content": "Be concise and clear."},
                 ]
             )
-        else:
+        elif "blocks" in param_id:
             assert ai_client_span["data"][
                 "gen_ai.system_instructions"
             ] == safe_serialize(
@@ -639,12 +660,10 @@ async def test_agent_invocation_span(
                         "content": "You are a coding assistant that talks like a pirate.",
                     },
                     {"type": "text", "content": "You are a helpful assistant."},
-                    {"type": "text", "content": "Be concise and clear."},
                 ]
             )
-    elif instructions is None:  # type: ignore
-        if stream_gen_ai_spans:
-            assert ai_client_span["attributes"][
+        elif "parts_no_type" in param_id and instructions is None:
+            assert ai_client_span["data"][
                 "gen_ai.system_instructions"
             ] == safe_serialize(
                 [
@@ -652,25 +671,24 @@ async def test_agent_invocation_span(
                     {"type": "text", "content": "Be concise and clear."},
                 ]
             )
-        else:
+        elif "parts_no_type" in param_id:
             assert ai_client_span["data"][
                 "gen_ai.system_instructions"
             ] == safe_serialize(
                 [
+                    {
+                        "type": "text",
+                        "content": "You are a coding assistant that talks like a pirate.",
+                    },
                     {"type": "text", "content": "You are a helpful assistant."},
                     {"type": "text", "content": "Be concise and clear."},
                 ]
             )
-    else:
-        if stream_gen_ai_spans:
-            assert ai_client_span["attributes"][
+        elif instructions is None:  # type: ignore
+            assert ai_client_span["data"][
                 "gen_ai.system_instructions"
             ] == safe_serialize(
                 [
-                    {
-                        "type": "text",
-                        "content": "You are a coding assistant that talks like a pirate.",
-                    },
                     {"type": "text", "content": "You are a helpful assistant."},
                     {"type": "text", "content": "Be concise and clear."},
                 ]
@@ -689,37 +707,11 @@ async def test_agent_invocation_span(
                 ]
             )
 
-    if stream_gen_ai_spans:
-        assert (
-            invoke_agent_span["attributes"]["gen_ai.response.text"]
-            == "Hello, how can I help you?"
-        )
-    else:
         assert (
             invoke_agent_span["data"]["gen_ai.response.text"]
             == "Hello, how can I help you?"
         )
 
-    if stream_gen_ai_spans:
-        assert (
-            invoke_agent_span["attributes"]["gen_ai.operation.name"] == "invoke_agent"
-        )
-        assert invoke_agent_span["attributes"]["gen_ai.system"] == "openai"
-        assert invoke_agent_span["attributes"]["gen_ai.agent.name"] == "test_agent"
-        assert invoke_agent_span["attributes"]["gen_ai.request.max_tokens"] == 100
-        assert invoke_agent_span["attributes"]["gen_ai.request.model"] == "gpt-4"
-        assert invoke_agent_span["attributes"]["gen_ai.request.temperature"] == 0.7
-        assert invoke_agent_span["attributes"]["gen_ai.request.top_p"] == 1.0
-
-        assert ai_client_span["name"] == "chat gpt-4"
-        assert ai_client_span["attributes"]["gen_ai.operation.name"] == "chat"
-        assert ai_client_span["attributes"]["gen_ai.system"] == "openai"
-        assert ai_client_span["attributes"]["gen_ai.agent.name"] == "test_agent"
-        assert ai_client_span["attributes"]["gen_ai.request.max_tokens"] == 100
-        assert ai_client_span["attributes"]["gen_ai.request.model"] == "gpt-4"
-        assert ai_client_span["attributes"]["gen_ai.request.temperature"] == 0.7
-        assert ai_client_span["attributes"]["gen_ai.request.top_p"] == 1.0
-    else:
         assert invoke_agent_span["data"]["gen_ai.operation.name"] == "invoke_agent"
         assert invoke_agent_span["data"]["gen_ai.system"] == "openai"
         assert invoke_agent_span["data"]["gen_ai.agent.name"] == "test_agent"
@@ -782,18 +774,13 @@ async def test_client_span_custom_model(
             assert result is not None
             assert result.final_output == "Hello, how can I help you?"
 
-            spans = [item.payload for item in items if item.type == "span"]
-            ai_client_span = next(
-                span
-                for span in spans
-                if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-            )
+        spans = [item.payload for item in items if item.type == "span"]
+        ai_client_span = next(
+            span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        )
 
-            assert ai_client_span["name"] == "chat my-custom-model"
-            assert (
-                ai_client_span["attributes"]["gen_ai.request.model"]
-                == "my-custom-model"
-            )
+        assert ai_client_span["name"] == "chat my-custom-model"
+        assert ai_client_span["attributes"]["gen_ai.request.model"] == "my-custom-model"
     else:
         with patch.object(
             agent.model._client._client,
@@ -814,14 +801,12 @@ async def test_client_span_custom_model(
             assert result is not None
             assert result.final_output == "Hello, how can I help you?"
 
-            (transaction,) = events
-            spans = transaction["spans"]
-            ai_client_span = next(
-                span for span in spans if span["op"] == OP.GEN_AI_CHAT
-            )
+        (transaction,) = events
+        spans = transaction["spans"]
+        ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT)
 
-            assert ai_client_span["description"] == "chat my-custom-model"
-            assert ai_client_span["data"]["gen_ai.request.model"] == "my-custom-model"
+        assert ai_client_span["description"] == "chat my-custom-model"
+        assert ai_client_span["data"]["gen_ai.request.model"] == "my-custom-model"
 
 
 @pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
@@ -867,50 +852,44 @@ def test_agent_invocation_span_sync_no_pii(
             assert result is not None
             assert result.final_output == "Hello, how can I help you?"
 
-            (transaction,) = (
-                item.payload for item in items if item.type == "transaction"
-            )
+        (transaction,) = (item.payload for item in items if item.type == "transaction")
 
-            assert transaction["transaction"] == "test_agent workflow"
-            assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
+        assert transaction["transaction"] == "test_agent workflow"
+        assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
 
-            spans = [item.payload for item in items if item.type == "span"]
-            invoke_agent_span = next(
-                span
-                for span in spans
-                if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
-            )
-            ai_client_span = next(
-                span
-                for span in spans
-                if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-            )
+        spans = [item.payload for item in items if item.type == "span"]
+        invoke_agent_span = next(
+            span
+            for span in spans
+            if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
+        )
+        ai_client_span = next(
+            span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        )
 
-            assert invoke_agent_span["name"] == "invoke_agent test_agent"
-            assert (
-                invoke_agent_span["attributes"]["gen_ai.operation.name"]
-                == "invoke_agent"
-            )
-            assert invoke_agent_span["attributes"]["gen_ai.system"] == "openai"
-            assert invoke_agent_span["attributes"]["gen_ai.agent.name"] == "test_agent"
-            assert invoke_agent_span["attributes"]["gen_ai.request.max_tokens"] == 100
-            assert invoke_agent_span["attributes"]["gen_ai.request.model"] == "gpt-4"
-            assert invoke_agent_span["attributes"]["gen_ai.request.temperature"] == 0.7
-            assert invoke_agent_span["attributes"]["gen_ai.request.top_p"] == 1.0
-
-            assert ai_client_span["name"] == "chat gpt-4"
-            assert ai_client_span["attributes"]["gen_ai.operation.name"] == "chat"
-            assert ai_client_span["attributes"]["gen_ai.system"] == "openai"
-            assert ai_client_span["attributes"]["gen_ai.agent.name"] == "test_agent"
-            assert ai_client_span["attributes"]["gen_ai.request.max_tokens"] == 100
-            assert ai_client_span["attributes"]["gen_ai.request.model"] == "gpt-4"
-            assert ai_client_span["attributes"]["gen_ai.request.temperature"] == 0.7
-            assert ai_client_span["attributes"]["gen_ai.request.top_p"] == 1.0
+        assert invoke_agent_span["name"] == "invoke_agent test_agent"
+        assert (
+            invoke_agent_span["attributes"]["gen_ai.operation.name"] == "invoke_agent"
+        )
+        assert invoke_agent_span["attributes"]["gen_ai.system"] == "openai"
+        assert invoke_agent_span["attributes"]["gen_ai.agent.name"] == "test_agent"
+        assert invoke_agent_span["attributes"]["gen_ai.request.max_tokens"] == 100
+        assert invoke_agent_span["attributes"]["gen_ai.request.model"] == "gpt-4"
+        assert invoke_agent_span["attributes"]["gen_ai.request.temperature"] == 0.7
+        assert invoke_agent_span["attributes"]["gen_ai.request.top_p"] == 1.0
 
-            assert (
-                SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS
-                not in invoke_agent_span["attributes"]
-            )
+        assert ai_client_span["name"] == "chat gpt-4"
+        assert ai_client_span["attributes"]["gen_ai.operation.name"] == "chat"
+        assert ai_client_span["attributes"]["gen_ai.system"] == "openai"
+        assert ai_client_span["attributes"]["gen_ai.agent.name"] == "test_agent"
+        assert ai_client_span["attributes"]["gen_ai.request.max_tokens"] == 100
+        assert ai_client_span["attributes"]["gen_ai.request.model"] == "gpt-4"
+        assert ai_client_span["attributes"]["gen_ai.request.temperature"] == 0.7
+        assert ai_client_span["attributes"]["gen_ai.request.top_p"] == 1.0
+
+        assert (
+            SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in invoke_agent_span["attributes"]
+        )
     else:
         with patch.object(
             agent.model._client._client,
@@ -932,37 +911,35 @@ def test_agent_invocation_span_sync_no_pii(
             assert result is not None
             assert result.final_output == "Hello, how can I help you?"
 
-            (transaction,) = events
-            spans = transaction["spans"]
-            invoke_agent_span = next(
-                span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT
-            )
-            ai_client_span = next(
-                span for span in spans if span["op"] == OP.GEN_AI_CHAT
-            )
+        (transaction,) = events
+        spans = transaction["spans"]
+        invoke_agent_span = next(
+            span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT
+        )
+        ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT)
 
-            assert transaction["transaction"] == "test_agent workflow"
-            assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
+        assert transaction["transaction"] == "test_agent workflow"
+        assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
 
-            assert invoke_agent_span["description"] == "invoke_agent test_agent"
-            assert invoke_agent_span["data"]["gen_ai.operation.name"] == "invoke_agent"
-            assert invoke_agent_span["data"]["gen_ai.system"] == "openai"
-            assert invoke_agent_span["data"]["gen_ai.agent.name"] == "test_agent"
-            assert invoke_agent_span["data"]["gen_ai.request.max_tokens"] == 100
-            assert invoke_agent_span["data"]["gen_ai.request.model"] == "gpt-4"
-            assert invoke_agent_span["data"]["gen_ai.request.temperature"] == 0.7
-            assert invoke_agent_span["data"]["gen_ai.request.top_p"] == 1.0
+        assert invoke_agent_span["description"] == "invoke_agent test_agent"
+        assert invoke_agent_span["data"]["gen_ai.operation.name"] == "invoke_agent"
+        assert invoke_agent_span["data"]["gen_ai.system"] == "openai"
+        assert invoke_agent_span["data"]["gen_ai.agent.name"] == "test_agent"
+        assert invoke_agent_span["data"]["gen_ai.request.max_tokens"] == 100
+        assert invoke_agent_span["data"]["gen_ai.request.model"] == "gpt-4"
+        assert invoke_agent_span["data"]["gen_ai.request.temperature"] == 0.7
+        assert invoke_agent_span["data"]["gen_ai.request.top_p"] == 1.0
 
-            assert ai_client_span["description"] == "chat gpt-4"
-            assert ai_client_span["data"]["gen_ai.operation.name"] == "chat"
-            assert ai_client_span["data"]["gen_ai.system"] == "openai"
-            assert ai_client_span["data"]["gen_ai.agent.name"] == "test_agent"
-            assert ai_client_span["data"]["gen_ai.request.max_tokens"] == 100
-            assert ai_client_span["data"]["gen_ai.request.model"] == "gpt-4"
-            assert ai_client_span["data"]["gen_ai.request.temperature"] == 0.7
-            assert ai_client_span["data"]["gen_ai.request.top_p"] == 1.0
+        assert ai_client_span["description"] == "chat gpt-4"
+        assert ai_client_span["data"]["gen_ai.operation.name"] == "chat"
+        assert ai_client_span["data"]["gen_ai.system"] == "openai"
+        assert ai_client_span["data"]["gen_ai.agent.name"] == "test_agent"
+        assert ai_client_span["data"]["gen_ai.request.max_tokens"] == 100
+        assert ai_client_span["data"]["gen_ai.request.model"] == "gpt-4"
+        assert ai_client_span["data"]["gen_ai.request.temperature"] == 0.7
+        assert ai_client_span["data"]["gen_ai.request.top_p"] == 1.0
 
-            assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in invoke_agent_span["data"]
+        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in invoke_agent_span["data"]
 
 
 @pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
@@ -1485,19 +1462,17 @@ async def test_handoff_span(
 
             assert result is not None
 
-            spans = [item.payload for item in items if item.type == "span"]
-            handoff_span = next(
-                span
-                for span in spans
-                if span["attributes"].get("sentry.op") == OP.GEN_AI_HANDOFF
-            )
+        spans = [item.payload for item in items if item.type == "span"]
+        handoff_span = next(
+            span
+            for span in spans
+            if span["attributes"].get("sentry.op") == OP.GEN_AI_HANDOFF
+        )
 
-            # Verify handoff span was created
-            assert handoff_span is not None
-            assert (
-                handoff_span["name"] == "handoff from primary_agent to secondary_agent"
-            )
-            assert handoff_span["attributes"]["gen_ai.operation.name"] == "handoff"
+        # Verify handoff span was created
+        assert handoff_span is not None
+        assert handoff_span["name"] == "handoff from primary_agent to secondary_agent"
+        assert handoff_span["attributes"]["gen_ai.operation.name"] == "handoff"
     else:
         with patch.object(
             primary_agent.model._client._client,
@@ -1519,19 +1494,19 @@ async def test_handoff_span(
 
             assert result is not None
 
-            (transaction,) = events
-            spans = transaction["spans"]
-            handoff_span = next(
-                span for span in spans if span.get("op") == OP.GEN_AI_HANDOFF
-            )
+        (transaction,) = events
+        spans = transaction["spans"]
+        handoff_span = next(
+            span for span in spans if span.get("op") == OP.GEN_AI_HANDOFF
+        )
 
-            # Verify handoff span was created
-            assert handoff_span is not None
-            assert (
-                handoff_span["description"]
-                == "handoff from primary_agent to secondary_agent"
-            )
-            assert handoff_span["data"]["gen_ai.operation.name"] == "handoff"
+        # Verify handoff span was created
+        assert handoff_span is not None
+        assert (
+            handoff_span["description"]
+            == "handoff from primary_agent to secondary_agent"
+        )
+        assert handoff_span["data"]["gen_ai.operation.name"] == "handoff"
 
 
 @pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
@@ -1657,19 +1632,17 @@ async def test_max_turns_before_handoff_span(
                     max_turns=1,
                 )
 
-            spans = [item.payload for item in items if item.type == "span"]
-            handoff_span = next(
-                span
-                for span in spans
-                if span["attributes"].get("sentry.op") == OP.GEN_AI_HANDOFF
-            )
+        spans = [item.payload for item in items if item.type == "span"]
+        handoff_span = next(
+            span
+            for span in spans
+            if span["attributes"].get("sentry.op") == OP.GEN_AI_HANDOFF
+        )
 
-            # Verify handoff span was created
-            assert handoff_span is not None
-            assert (
-                handoff_span["name"] == "handoff from primary_agent to secondary_agent"
-            )
-            assert handoff_span["attributes"]["gen_ai.operation.name"] == "handoff"
+        # Verify handoff span was created
+        assert handoff_span is not None
+        assert handoff_span["name"] == "handoff from primary_agent to secondary_agent"
+        assert handoff_span["attributes"]["gen_ai.operation.name"] == "handoff"
     else:
         with patch.object(
             primary_agent.model._client._client,
@@ -1691,19 +1664,19 @@ async def test_max_turns_before_handoff_span(
                     max_turns=1,
                 )
 
-            (error, transaction) = events
-            spans = transaction["spans"]
-            handoff_span = next(
-                span for span in spans if span.get("op") == OP.GEN_AI_HANDOFF
-            )
+        (error, transaction) = events
+        spans = transaction["spans"]
+        handoff_span = next(
+            span for span in spans if span.get("op") == OP.GEN_AI_HANDOFF
+        )
 
-            # Verify handoff span was created
-            assert handoff_span is not None
-            assert (
-                handoff_span["description"]
-                == "handoff from primary_agent to secondary_agent"
-            )
-            assert handoff_span["data"]["gen_ai.operation.name"] == "handoff"
+        # Verify handoff span was created
+        assert handoff_span is not None
+        assert (
+            handoff_span["description"]
+            == "handoff from primary_agent to secondary_agent"
+        )
+        assert handoff_span["data"]["gen_ai.operation.name"] == "handoff"
 
 
 @pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
@@ -1773,30 +1746,27 @@ def simple_test_tool(message: str) -> str:
         serialize_pydantic=True,
     )
 
-    with patch.object(
-        agent_with_tool.model._client._client,
-        "send",
-        side_effect=[tool_response, final_response],
-    ) as _:
-        sentry_init(
-            integrations=[OpenAIAgentsIntegration()],
-            traces_sample_rate=1.0,
-            send_default_pii=True,
-            _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
-        )
+    if stream_gen_ai_spans:
+        with patch.object(
+            agent_with_tool.model._client._client,
+            "send",
+            side_effect=[tool_response, final_response],
+        ) as _:
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+                send_default_pii=True,
+                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+            )
 
-        if stream_gen_ai_spans:
             items = capture_items("transaction", "span")
-        else:
-            events = capture_events()
 
-        await agents.Runner.run(
-            agent_with_tool,
-            "Please use the simple test tool",
-            run_config=test_run_config,
-        )
+            await agents.Runner.run(
+                agent_with_tool,
+                "Please use the simple test tool",
+                run_config=test_run_config,
+            )
 
-    if stream_gen_ai_spans:
         (transaction,) = (item.payload for item in items if item.type == "transaction")
         assert transaction["transaction"] == "test_agent workflow"
         assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
@@ -1815,56 +1785,45 @@ def simple_test_tool(message: str) -> str:
             for span in spans
             if span["attributes"]["sentry.op"] == OP.GEN_AI_EXECUTE_TOOL
         )
-    else:
-        (transaction,) = events
-        spans = transaction["spans"]
-        agent_span = next(
-            span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT
-        )
-        ai_client_span1, ai_client_span2 = (
-            span for span in spans if span["op"] == OP.GEN_AI_CHAT
-        )
-        tool_span = next(span for span in spans if span["op"] == OP.GEN_AI_EXECUTE_TOOL)
-
-    available_tool = {
-        "name": "simple_test_tool",
-        "description": "A simple tool",
-        "params_json_schema": {
-            "properties": {"message": {"title": "Message", "type": "string"}},
-            "required": ["message"],
-            "title": "simple_test_tool_args",
-            "type": "object",
-            "additionalProperties": False,
-        },
-        "on_invoke_tool": mock.ANY,
-        "strict_json_schema": True,
-        "is_enabled": True,
-    }
-
-    if parse_version(OPENAI_AGENTS_VERSION) >= (0, 3, 3):
-        available_tool.update(
-            {"tool_input_guardrails": None, "tool_output_guardrails": None}
-        )
 
-    if parse_version(OPENAI_AGENTS_VERSION) >= (
-        0,
-        8,
-    ):
-        available_tool["needs_approval"] = False
-    if parse_version(OPENAI_AGENTS_VERSION) >= (
-        0,
-        9,
-        0,
-    ):
-        available_tool.update(
-            {
-                "timeout_seconds": None,
-                "timeout_behavior": "error_as_result",
-                "timeout_error_function": None,
-            }
-        )
+        available_tool = {
+            "name": "simple_test_tool",
+            "description": "A simple tool",
+            "params_json_schema": {
+                "properties": {"message": {"title": "Message", "type": "string"}},
+                "required": ["message"],
+                "title": "simple_test_tool_args",
+                "type": "object",
+                "additionalProperties": False,
+            },
+            "on_invoke_tool": mock.ANY,
+            "strict_json_schema": True,
+            "is_enabled": True,
+        }
+
+        if parse_version(OPENAI_AGENTS_VERSION) >= (0, 3, 3):
+            available_tool.update(
+                {"tool_input_guardrails": None, "tool_output_guardrails": None}
+            )
+
+        if parse_version(OPENAI_AGENTS_VERSION) >= (
+            0,
+            8,
+        ):
+            available_tool["needs_approval"] = False
+        if parse_version(OPENAI_AGENTS_VERSION) >= (
+            0,
+            9,
+            0,
+        ):
+            available_tool.update(
+                {
+                    "timeout_seconds": None,
+                    "timeout_behavior": "error_as_result",
+                    "timeout_error_function": None,
+                }
+            )
 
-    if stream_gen_ai_spans:
         assert agent_span["name"] == "invoke_agent test_agent"
         assert agent_span["attributes"]["sentry.origin"] == "auto.ai.openai_agents"
         assert agent_span["attributes"]["gen_ai.agent.name"] == "test_agent"
@@ -1873,21 +1832,9 @@ def simple_test_tool(message: str) -> str:
         agent_span_available_tool = json.loads(
             agent_span["attributes"]["gen_ai.request.available_tools"]
         )[0]
-    else:
-        assert transaction["transaction"] == "test_agent workflow"
-        assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
-
-        assert agent_span["description"] == "invoke_agent test_agent"
-        assert agent_span["origin"] == "auto.ai.openai_agents"
-        assert agent_span["data"]["gen_ai.agent.name"] == "test_agent"
-        assert agent_span["data"]["gen_ai.operation.name"] == "invoke_agent"
 
-        agent_span_available_tool = json.loads(
-            agent_span["data"]["gen_ai.request.available_tools"]
-        )[0]
-    assert all(agent_span_available_tool[k] == v for k, v in available_tool.items())
+        assert all(agent_span_available_tool[k] == v for k, v in available_tool.items())
 
-    if stream_gen_ai_spans:
         assert agent_span["attributes"]["gen_ai.request.max_tokens"] == 100
         assert agent_span["attributes"]["gen_ai.request.model"] == "gpt-4"
         assert agent_span["attributes"]["gen_ai.request.temperature"] == 0.7
@@ -1902,26 +1849,11 @@ def simple_test_tool(message: str) -> str:
         ai_client_span1_available_tool = json.loads(
             ai_client_span1["attributes"]["gen_ai.request.available_tools"]
         )[0]
-    else:
-        assert agent_span["data"]["gen_ai.request.max_tokens"] == 100
-        assert agent_span["data"]["gen_ai.request.model"] == "gpt-4"
-        assert agent_span["data"]["gen_ai.request.temperature"] == 0.7
-        assert agent_span["data"]["gen_ai.request.top_p"] == 1.0
-        assert agent_span["data"]["gen_ai.system"] == "openai"
-
-        assert ai_client_span1["description"] == "chat gpt-4"
-        assert ai_client_span1["data"]["gen_ai.operation.name"] == "chat"
-        assert ai_client_span1["data"]["gen_ai.system"] == "openai"
-        assert ai_client_span1["data"]["gen_ai.agent.name"] == "test_agent"
 
-        ai_client_span1_available_tool = json.loads(
-            ai_client_span1["data"]["gen_ai.request.available_tools"]
-        )[0]
-    assert all(
-        ai_client_span1_available_tool[k] == v for k, v in available_tool.items()
-    )
+        assert all(
+            ai_client_span1_available_tool[k] == v for k, v in available_tool.items()
+        )
 
-    if stream_gen_ai_spans:
         assert ai_client_span1["attributes"]["gen_ai.request.max_tokens"] == 100
         assert ai_client_span1["attributes"][
             "gen_ai.request.messages"
@@ -1945,49 +1877,23 @@ def simple_test_tool(message: str) -> str:
             ai_client_span1["attributes"]["gen_ai.usage.output_tokens.reasoning"] == 0
         )
         assert ai_client_span1["attributes"]["gen_ai.usage.total_tokens"] == 15
-    else:
-        assert ai_client_span1["data"]["gen_ai.request.max_tokens"] == 100
-        assert ai_client_span1["data"]["gen_ai.request.messages"] == safe_serialize(
-            [
-                {
-                    "role": "user",
-                    "content": [
-                        {"type": "text", "text": "Please use the simple test tool"}
-                    ],
-                },
-            ]
-        )
-        assert ai_client_span1["data"]["gen_ai.request.model"] == "gpt-4"
-        assert ai_client_span1["data"]["gen_ai.request.temperature"] == 0.7
-        assert ai_client_span1["data"]["gen_ai.request.top_p"] == 1.0
-        assert ai_client_span1["data"]["gen_ai.usage.input_tokens"] == 10
-        assert ai_client_span1["data"]["gen_ai.usage.input_tokens.cached"] == 0
-        assert ai_client_span1["data"]["gen_ai.usage.output_tokens"] == 5
-        assert ai_client_span1["data"]["gen_ai.usage.output_tokens.reasoning"] == 0
-        assert ai_client_span1["data"]["gen_ai.usage.total_tokens"] == 15
 
-    tool_call = {
-        "arguments": '{"message": "hello"}',
-        "call_id": "call_123",
-        "name": "simple_test_tool",
-        "type": "function_call",
-        "id": "call_123",
-        "status": None,
-    }
+        tool_call = {
+            "arguments": '{"message": "hello"}',
+            "call_id": "call_123",
+            "name": "simple_test_tool",
+            "type": "function_call",
+            "id": "call_123",
+            "status": None,
+        }
 
-    if OPENAI_VERSION >= (2, 25, 0):
-        tool_call["namespace"] = None
+        if OPENAI_VERSION >= (2, 25, 0):
+            tool_call["namespace"] = None
 
-    if stream_gen_ai_spans:
         assert json.loads(
             ai_client_span1["attributes"]["gen_ai.response.tool_calls"]
         ) == [tool_call]
-    else:
-        assert json.loads(ai_client_span1["data"]["gen_ai.response.tool_calls"]) == [
-            tool_call
-        ]
 
-    if stream_gen_ai_spans:
         assert tool_span["name"] == "execute_tool simple_test_tool"
         assert tool_span["attributes"]["gen_ai.agent.name"] == "test_agent"
         assert tool_span["attributes"]["gen_ai.operation.name"] == "execute_tool"
@@ -1995,17 +1901,9 @@ def simple_test_tool(message: str) -> str:
         tool_span_available_tool = json.loads(
             tool_span["attributes"]["gen_ai.request.available_tools"]
         )[0]
-    else:
-        assert tool_span["description"] == "execute_tool simple_test_tool"
-        assert tool_span["data"]["gen_ai.agent.name"] == "test_agent"
-        assert tool_span["data"]["gen_ai.operation.name"] == "execute_tool"
 
-        tool_span_available_tool = json.loads(
-            tool_span["data"]["gen_ai.request.available_tools"]
-        )[0]
-    assert all(tool_span_available_tool[k] == v for k, v in available_tool.items())
+        assert all(tool_span_available_tool[k] == v for k, v in available_tool.items())
 
-    if stream_gen_ai_spans:
         assert tool_span["attributes"]["gen_ai.request.max_tokens"] == 100
         assert tool_span["attributes"]["gen_ai.request.model"] == "gpt-4"
         assert tool_span["attributes"]["gen_ai.request.temperature"] == 0.7
@@ -2021,10 +1919,192 @@ def simple_test_tool(message: str) -> str:
         assert ai_client_span2["attributes"]["gen_ai.agent.name"] == "test_agent"
         assert ai_client_span2["attributes"]["gen_ai.operation.name"] == "chat"
 
-        ai_client_span2_available_tool = json.loads(
-            ai_client_span2["attributes"]["gen_ai.request.available_tools"]
+        ai_client_span2_available_tool = json.loads(
+            ai_client_span2["attributes"]["gen_ai.request.available_tools"]
+        )[0]
+
+        assert all(
+            ai_client_span2_available_tool[k] == v for k, v in available_tool.items()
+        )
+
+        assert ai_client_span2["attributes"]["gen_ai.request.max_tokens"] == 100
+        assert ai_client_span2["attributes"][
+            "gen_ai.request.messages"
+        ] == safe_serialize(
+            [
+                {
+                    "role": "tool",
+                    "content": [
+                        {
+                            "call_id": "call_123",
+                            "output": "Tool executed with: hello",
+                            "type": "function_call_output",
+                        }
+                    ],
+                },
+            ]
+        )
+        assert ai_client_span2["attributes"]["gen_ai.request.model"] == "gpt-4"
+        assert ai_client_span2["attributes"]["gen_ai.request.temperature"] == 0.7
+        assert ai_client_span2["attributes"]["gen_ai.request.top_p"] == 1.0
+        assert (
+            ai_client_span2["attributes"]["gen_ai.response.text"]
+            == "Task completed using the tool"
+        )
+        assert ai_client_span2["attributes"]["gen_ai.system"] == "openai"
+        assert ai_client_span2["attributes"]["gen_ai.usage.input_tokens.cached"] == 0
+        assert ai_client_span2["attributes"]["gen_ai.usage.input_tokens"] == 15
+        assert (
+            ai_client_span2["attributes"]["gen_ai.usage.output_tokens.reasoning"] == 0
+        )
+        assert ai_client_span2["attributes"]["gen_ai.usage.output_tokens"] == 10
+        assert ai_client_span2["attributes"]["gen_ai.usage.total_tokens"] == 25
+
+    else:
+        with patch.object(
+            agent_with_tool.model._client._client,
+            "send",
+            side_effect=[tool_response, final_response],
+        ) as _:
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+                send_default_pii=True,
+                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+            )
+
+            events = capture_events()
+
+            await agents.Runner.run(
+                agent_with_tool,
+                "Please use the simple test tool",
+                run_config=test_run_config,
+            )
+
+        (transaction,) = events
+        spans = transaction["spans"]
+        agent_span = next(
+            span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT
+        )
+        ai_client_span1, ai_client_span2 = (
+            span for span in spans if span["op"] == OP.GEN_AI_CHAT
+        )
+        tool_span = next(span for span in spans if span["op"] == OP.GEN_AI_EXECUTE_TOOL)
+
+        available_tool = {
+            "name": "simple_test_tool",
+            "description": "A simple tool",
+            "params_json_schema": {
+                "properties": {"message": {"title": "Message", "type": "string"}},
+                "required": ["message"],
+                "title": "simple_test_tool_args",
+                "type": "object",
+                "additionalProperties": False,
+            },
+            "on_invoke_tool": mock.ANY,
+            "strict_json_schema": True,
+            "is_enabled": True,
+        }
+
+        if parse_version(OPENAI_AGENTS_VERSION) >= (0, 3, 3):
+            available_tool.update(
+                {"tool_input_guardrails": None, "tool_output_guardrails": None}
+            )
+
+        if parse_version(OPENAI_AGENTS_VERSION) >= (
+            0,
+            8,
+        ):
+            available_tool["needs_approval"] = False
+        if parse_version(OPENAI_AGENTS_VERSION) >= (
+            0,
+            9,
+            0,
+        ):
+            available_tool.update(
+                {
+                    "timeout_seconds": None,
+                    "timeout_behavior": "error_as_result",
+                    "timeout_error_function": None,
+                }
+            )
+
+        assert transaction["transaction"] == "test_agent workflow"
+        assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
+
+        assert agent_span["description"] == "invoke_agent test_agent"
+        assert agent_span["origin"] == "auto.ai.openai_agents"
+        assert agent_span["data"]["gen_ai.agent.name"] == "test_agent"
+        assert agent_span["data"]["gen_ai.operation.name"] == "invoke_agent"
+
+        agent_span_available_tool = json.loads(
+            agent_span["data"]["gen_ai.request.available_tools"]
+        )[0]
+        assert all(agent_span_available_tool[k] == v for k, v in available_tool.items())
+
+        assert agent_span["data"]["gen_ai.request.max_tokens"] == 100
+        assert agent_span["data"]["gen_ai.request.model"] == "gpt-4"
+        assert agent_span["data"]["gen_ai.request.temperature"] == 0.7
+        assert agent_span["data"]["gen_ai.request.top_p"] == 1.0
+        assert agent_span["data"]["gen_ai.system"] == "openai"
+
+        assert ai_client_span1["description"] == "chat gpt-4"
+        assert ai_client_span1["data"]["gen_ai.operation.name"] == "chat"
+        assert ai_client_span1["data"]["gen_ai.system"] == "openai"
+        assert ai_client_span1["data"]["gen_ai.agent.name"] == "test_agent"
+
+        ai_client_span1_available_tool = json.loads(
+            ai_client_span1["data"]["gen_ai.request.available_tools"]
+        )[0]
+        assert all(
+            ai_client_span1_available_tool[k] == v for k, v in available_tool.items()
+        )
+
+        assert ai_client_span1["data"]["gen_ai.request.max_tokens"] == 100
+        assert ai_client_span1["data"]["gen_ai.request.messages"] == safe_serialize(
+            [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "text", "text": "Please use the simple test tool"}
+                    ],
+                },
+            ]
+        )
+        assert ai_client_span1["data"]["gen_ai.request.model"] == "gpt-4"
+        assert ai_client_span1["data"]["gen_ai.request.temperature"] == 0.7
+        assert ai_client_span1["data"]["gen_ai.request.top_p"] == 1.0
+        assert ai_client_span1["data"]["gen_ai.usage.input_tokens"] == 10
+        assert ai_client_span1["data"]["gen_ai.usage.input_tokens.cached"] == 0
+        assert ai_client_span1["data"]["gen_ai.usage.output_tokens"] == 5
+        assert ai_client_span1["data"]["gen_ai.usage.output_tokens.reasoning"] == 0
+        assert ai_client_span1["data"]["gen_ai.usage.total_tokens"] == 15
+
+        tool_call = {
+            "arguments": '{"message": "hello"}',
+            "call_id": "call_123",
+            "name": "simple_test_tool",
+            "type": "function_call",
+            "id": "call_123",
+            "status": None,
+        }
+
+        if OPENAI_VERSION >= (2, 25, 0):
+            tool_call["namespace"] = None
+
+        assert json.loads(ai_client_span1["data"]["gen_ai.response.tool_calls"]) == [
+            tool_call
+        ]
+
+        assert tool_span["description"] == "execute_tool simple_test_tool"
+        assert tool_span["data"]["gen_ai.agent.name"] == "test_agent"
+        assert tool_span["data"]["gen_ai.operation.name"] == "execute_tool"
+
+        tool_span_available_tool = json.loads(
+            tool_span["data"]["gen_ai.request.available_tools"]
         )[0]
-    else:
+        assert all(tool_span_available_tool[k] == v for k, v in available_tool.items())
+
         assert tool_span["data"]["gen_ai.request.max_tokens"] == 100
         assert tool_span["data"]["gen_ai.request.model"] == "gpt-4"
         assert tool_span["data"]["gen_ai.request.temperature"] == 0.7
@@ -2041,44 +2121,10 @@ def simple_test_tool(message: str) -> str:
         ai_client_span2_available_tool = json.loads(
             ai_client_span2["data"]["gen_ai.request.available_tools"]
         )[0]
-    assert all(
-        ai_client_span2_available_tool[k] == v for k, v in available_tool.items()
-    )
-
-    if stream_gen_ai_spans:
-        assert ai_client_span2["attributes"]["gen_ai.request.max_tokens"] == 100
-        assert ai_client_span2["attributes"][
-            "gen_ai.request.messages"
-        ] == safe_serialize(
-            [
-                {
-                    "role": "tool",
-                    "content": [
-                        {
-                            "call_id": "call_123",
-                            "output": "Tool executed with: hello",
-                            "type": "function_call_output",
-                        }
-                    ],
-                },
-            ]
-        )
-        assert ai_client_span2["attributes"]["gen_ai.request.model"] == "gpt-4"
-        assert ai_client_span2["attributes"]["gen_ai.request.temperature"] == 0.7
-        assert ai_client_span2["attributes"]["gen_ai.request.top_p"] == 1.0
-        assert (
-            ai_client_span2["attributes"]["gen_ai.response.text"]
-            == "Task completed using the tool"
-        )
-        assert ai_client_span2["attributes"]["gen_ai.system"] == "openai"
-        assert ai_client_span2["attributes"]["gen_ai.usage.input_tokens.cached"] == 0
-        assert ai_client_span2["attributes"]["gen_ai.usage.input_tokens"] == 15
-        assert (
-            ai_client_span2["attributes"]["gen_ai.usage.output_tokens.reasoning"] == 0
+        assert all(
+            ai_client_span2_available_tool[k] == v for k, v in available_tool.items()
         )
-        assert ai_client_span2["attributes"]["gen_ai.usage.output_tokens"] == 10
-        assert ai_client_span2["attributes"]["gen_ai.usage.total_tokens"] == 25
-    else:
+
         assert ai_client_span2["data"]["gen_ai.request.max_tokens"] == 100
         assert ai_client_span2["data"]["gen_ai.request.messages"] == safe_serialize(
             [
@@ -2386,117 +2432,113 @@ def simple_test_tool(message: str) -> str:
     agent_with_tool = test_agent.clone(tools=[simple_test_tool])
 
     if stream_gen_ai_spans:
-        with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
-            with patch(
-                "agents.models.openai_responses.OpenAIResponsesModel.get_response"
-            ) as mock_get_response:
-                # Create a mock response that includes tool calls
-                tool_call = ResponseFunctionToolCall(
-                    id="call_123",
-                    call_id="call_123",
-                    name="wrong_tool",
-                    type="function_call",
-                    arguments='{"message": "hello"}',
-                )
+        with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}), patch(
+            "agents.models.openai_responses.OpenAIResponsesModel.get_response"
+        ) as mock_get_response:
+            # Create a mock response that includes tool calls
+            tool_call = ResponseFunctionToolCall(
+                id="call_123",
+                call_id="call_123",
+                name="wrong_tool",
+                type="function_call",
+                arguments='{"message": "hello"}',
+            )
+
+            tool_response = ModelResponse(
+                output=[tool_call],
+                usage=Usage(
+                    requests=1, input_tokens=10, output_tokens=5, total_tokens=15
+                ),
+                response_id="resp_tool_123",
+            )
 
-                tool_response = ModelResponse(
-                    output=[tool_call],
-                    usage=Usage(
-                        requests=1, input_tokens=10, output_tokens=5, total_tokens=15
-                    ),
-                    response_id="resp_tool_123",
-                )
+            mock_get_response.side_effect = [tool_response]
 
-                mock_get_response.side_effect = [tool_response]
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+                send_default_pii=True,
+                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+            )
 
-                sentry_init(
-                    integrations=[OpenAIAgentsIntegration()],
-                    traces_sample_rate=1.0,
-                    send_default_pii=True,
-                    _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
-                )
+            items = capture_items("span", "transaction")
 
-                items = capture_items("span", "transaction")
+            with pytest.raises(ModelBehaviorError):
+                await agents.Runner.run(
+                    agent_with_tool,
+                    "Please use the simple test tool",
+                    run_config=test_run_config,
+                )
 
-                with pytest.raises(ModelBehaviorError):
-                    await agents.Runner.run(
-                        agent_with_tool,
-                        "Please use the simple test tool",
-                        run_config=test_run_config,
-                    )
+        (transaction,) = (item.payload for item in items if item.type == "transaction")
+        assert transaction["transaction"] == "test_agent workflow"
+        assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
 
-            (transaction,) = (
-                item.payload for item in items if item.type == "transaction"
-            )
-            assert transaction["transaction"] == "test_agent workflow"
-            assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
+        spans = [item.payload for item in items if item.type == "span"]
 
-            spans = [item.payload for item in items if item.type == "span"]
+        (
+            agent_span,
+            ai_client_span1,
+        ) = spans
+        assert agent_span["name"] == "invoke_agent test_agent"
+        assert agent_span["attributes"]["sentry.origin"] == "auto.ai.openai_agents"
 
-            (
-                agent_span,
-                ai_client_span1,
-            ) = spans
-            assert agent_span["name"] == "invoke_agent test_agent"
-            assert agent_span["attributes"]["sentry.origin"] == "auto.ai.openai_agents"
-
-            # Error due to unrecognized tool in model response.
-            assert agent_span["status"] == "error"
+        # Error due to unrecognized tool in model response.
+        assert agent_span["status"] == "error"
     else:
-        with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
-            with patch(
-                "agents.models.openai_responses.OpenAIResponsesModel.get_response"
-            ) as mock_get_response:
-                # Create a mock response that includes tool calls
-                tool_call = ResponseFunctionToolCall(
-                    id="call_123",
-                    call_id="call_123",
-                    name="wrong_tool",
-                    type="function_call",
-                    arguments='{"message": "hello"}',
-                )
+        with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}), patch(
+            "agents.models.openai_responses.OpenAIResponsesModel.get_response"
+        ) as mock_get_response:
+            # Create a mock response that includes tool calls
+            tool_call = ResponseFunctionToolCall(
+                id="call_123",
+                call_id="call_123",
+                name="wrong_tool",
+                type="function_call",
+                arguments='{"message": "hello"}',
+            )
+
+            tool_response = ModelResponse(
+                output=[tool_call],
+                usage=Usage(
+                    requests=1, input_tokens=10, output_tokens=5, total_tokens=15
+                ),
+                response_id="resp_tool_123",
+            )
 
-                tool_response = ModelResponse(
-                    output=[tool_call],
-                    usage=Usage(
-                        requests=1, input_tokens=10, output_tokens=5, total_tokens=15
-                    ),
-                    response_id="resp_tool_123",
-                )
+            mock_get_response.side_effect = [tool_response]
 
-                mock_get_response.side_effect = [tool_response]
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+                send_default_pii=True,
+                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+            )
+            events = capture_events()
 
-                sentry_init(
-                    integrations=[OpenAIAgentsIntegration()],
-                    traces_sample_rate=1.0,
-                    send_default_pii=True,
-                    _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+            with pytest.raises(ModelBehaviorError):
+                await agents.Runner.run(
+                    agent_with_tool,
+                    "Please use the simple test tool",
+                    run_config=test_run_config,
                 )
-                events = capture_events()
 
-                with pytest.raises(ModelBehaviorError):
-                    await agents.Runner.run(
-                        agent_with_tool,
-                        "Please use the simple test tool",
-                        run_config=test_run_config,
-                    )
-
-            (error, transaction) = events
-            spans = transaction["spans"]
-            (
-                agent_span,
-                ai_client_span1,
-            ) = spans
+        (error, transaction) = events
+        spans = transaction["spans"]
+        (
+            agent_span,
+            ai_client_span1,
+        ) = spans
 
-            assert transaction["transaction"] == "test_agent workflow"
-            assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
+        assert transaction["transaction"] == "test_agent workflow"
+        assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
 
-            assert agent_span["description"] == "invoke_agent test_agent"
-            assert agent_span["origin"] == "auto.ai.openai_agents"
+        assert agent_span["description"] == "invoke_agent test_agent"
+        assert agent_span["origin"] == "auto.ai.openai_agents"
 
-            # Error due to unrecognized tool in model response.
-            assert agent_span["status"] == "internal_error"
-            assert agent_span["tags"]["status"] == "internal_error"
+        # Error due to unrecognized tool in model response.
+        assert agent_span["status"] == "internal_error"
+        assert agent_span["tags"]["status"] == "internal_error"
 
 
 @pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
@@ -2512,8 +2554,8 @@ async def test_error_handling(
     Test error handling in agent execution.
     """
 
-    with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
-        with patch(
+    if stream_gen_ai_spans:
+        with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}), patch(
             "agents.models.openai_responses.OpenAIResponsesModel.get_response"
         ) as mock_get_response:
             mock_get_response.side_effect = Exception("Model Error")
@@ -2527,86 +2569,84 @@ async def test_error_handling(
                 _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
             )
 
-            if stream_gen_ai_spans:
-                items = capture_items("event", "span", "transaction")
+            items = capture_items("event", "span", "transaction")
 
-                with pytest.raises(Exception, match="Model Error"):
-                    await agents.Runner.run(
-                        test_agent, "Test input", run_config=test_run_config
-                    )
-
-                (error_event,) = (
-                    item.payload for item in items if item.type == "event"
+            with pytest.raises(Exception, match="Model Error"):
+                await agents.Runner.run(
+                    test_agent, "Test input", run_config=test_run_config
                 )
 
-                assert error_event["exception"]["values"][0]["type"] == "Exception"
-                assert error_event["exception"]["values"][0]["value"] == "Model Error"
-                assert (
-                    error_event["exception"]["values"][0]["mechanism"]["type"]
-                    == "openai_agents"
-                )
+        (error_event,) = (item.payload for item in items if item.type == "event")
 
-                (transaction,) = (
-                    item.payload for item in items if item.type == "transaction"
-                )
+        assert error_event["exception"]["values"][0]["type"] == "Exception"
+        assert error_event["exception"]["values"][0]["value"] == "Model Error"
+        assert (
+            error_event["exception"]["values"][0]["mechanism"]["type"]
+            == "openai_agents"
+        )
 
-                assert transaction["transaction"] == "test_agent workflow"
-                assert (
-                    transaction["contexts"]["trace"]["origin"]
-                    == "auto.ai.openai_agents"
-                )
+        (transaction,) = (item.payload for item in items if item.type == "transaction")
 
-                spans = [item.payload for item in items if item.type == "span"]
-                (invoke_agent_span, ai_client_span) = spans
+        assert transaction["transaction"] == "test_agent workflow"
+        assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
 
-                assert invoke_agent_span["name"] == "invoke_agent test_agent"
-                assert (
-                    invoke_agent_span["attributes"]["sentry.origin"]
-                    == "auto.ai.openai_agents"
-                )
+        spans = [item.payload for item in items if item.type == "span"]
+        (invoke_agent_span, ai_client_span) = spans
 
-                assert ai_client_span["name"] == "chat gpt-4"
-                assert (
-                    ai_client_span["attributes"]["sentry.origin"]
-                    == "auto.ai.openai_agents"
-                )
-                assert ai_client_span["status"] == "error"
-            else:
-                events = capture_events()
-
-                with pytest.raises(Exception, match="Model Error"):
-                    await agents.Runner.run(
-                        test_agent, "Test input", run_config=test_run_config
-                    )
-
-                (
-                    error_event,
-                    transaction,
-                ) = events
-
-                assert error_event["exception"]["values"][0]["type"] == "Exception"
-                assert error_event["exception"]["values"][0]["value"] == "Model Error"
-                assert (
-                    error_event["exception"]["values"][0]["mechanism"]["type"]
-                    == "openai_agents"
-                )
+        assert invoke_agent_span["name"] == "invoke_agent test_agent"
+        assert (
+            invoke_agent_span["attributes"]["sentry.origin"] == "auto.ai.openai_agents"
+        )
 
-                spans = transaction["spans"]
-                (invoke_agent_span, ai_client_span) = spans
+        assert ai_client_span["name"] == "chat gpt-4"
+        assert ai_client_span["attributes"]["sentry.origin"] == "auto.ai.openai_agents"
+        assert ai_client_span["status"] == "error"
+    else:
+        with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}), patch(
+            "agents.models.openai_responses.OpenAIResponsesModel.get_response"
+        ) as mock_get_response:
+            mock_get_response.side_effect = Exception("Model Error")
+
+            sentry_init(
+                integrations=[
+                    OpenAIAgentsIntegration(),
+                    LoggingIntegration(event_level=logging.CRITICAL),
+                ],
+                traces_sample_rate=1.0,
+                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+            )
+            events = capture_events()
 
-                assert transaction["transaction"] == "test_agent workflow"
-                assert (
-                    transaction["contexts"]["trace"]["origin"]
-                    == "auto.ai.openai_agents"
+            with pytest.raises(Exception, match="Model Error"):
+                await agents.Runner.run(
+                    test_agent, "Test input", run_config=test_run_config
                 )
 
-                assert invoke_agent_span["description"] == "invoke_agent test_agent"
-                assert invoke_agent_span["origin"] == "auto.ai.openai_agents"
+            (
+                error_event,
+                transaction,
+            ) = events
+
+            assert error_event["exception"]["values"][0]["type"] == "Exception"
+            assert error_event["exception"]["values"][0]["value"] == "Model Error"
+            assert (
+                error_event["exception"]["values"][0]["mechanism"]["type"]
+                == "openai_agents"
+            )
+
+            spans = transaction["spans"]
+            (invoke_agent_span, ai_client_span) = spans
+
+            assert transaction["transaction"] == "test_agent workflow"
+            assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
+
+            assert invoke_agent_span["description"] == "invoke_agent test_agent"
+            assert invoke_agent_span["origin"] == "auto.ai.openai_agents"
 
-                assert ai_client_span["description"] == "chat gpt-4"
-                assert ai_client_span["origin"] == "auto.ai.openai_agents"
-                assert ai_client_span["status"] == "internal_error"
-                assert ai_client_span["tags"]["status"] == "internal_error"
+            assert ai_client_span["description"] == "chat gpt-4"
+            assert ai_client_span["origin"] == "auto.ai.openai_agents"
+            assert ai_client_span["status"] == "internal_error"
+            assert ai_client_span["tags"]["status"] == "internal_error"
 
 
 @pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
@@ -2636,41 +2676,32 @@ async def test_error_captures_input_data(
         request=model_request,
     )
 
-    with patch.object(
-        agent.model._client._client,
-        "send",
-        return_value=response,
-    ) as _:
-        sentry_init(
-            integrations=[
-                OpenAIAgentsIntegration(),
-                LoggingIntegration(event_level=logging.CRITICAL),
-            ],
-            traces_sample_rate=1.0,
-            send_default_pii=True,
-            _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
-        )
+    if stream_gen_ai_spans:
+        with patch.object(
+            agent.model._client._client,
+            "send",
+            return_value=response,
+        ) as _:
+            sentry_init(
+                integrations=[
+                    OpenAIAgentsIntegration(),
+                    LoggingIntegration(event_level=logging.CRITICAL),
+                ],
+                traces_sample_rate=1.0,
+                send_default_pii=True,
+                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+            )
 
-        if stream_gen_ai_spans:
             items = capture_items("event", "span")
-        else:
-            events = capture_events()
 
-        with pytest.raises(InternalServerError, match="Error code: 500"):
-            await agents.Runner.run(agent, "Test input", run_config=test_run_config)
+            with pytest.raises(InternalServerError, match="Error code: 500"):
+                await agents.Runner.run(agent, "Test input", run_config=test_run_config)
 
-    if stream_gen_ai_spans:
         (error_event,) = (item.payload for item in items if item.type == "event")
-    else:
-        (
-            error_event,
-            transaction,
-        ) = events
 
-    assert error_event["exception"]["values"][0]["type"] == "InternalServerError"
-    assert error_event["exception"]["values"][0]["value"] == "Error code: 500"
+        assert error_event["exception"]["values"][0]["type"] == "InternalServerError"
+        assert error_event["exception"]["values"][0]["value"] == "Error code: 500"
 
-    if stream_gen_ai_spans:
         spans = [item.payload for item in items if item.type == "span"]
         ai_client_span = [
             s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
@@ -2680,7 +2711,43 @@ async def test_error_captures_input_data(
         assert ai_client_span["status"] == "error"
 
         assert "gen_ai.request.messages" in ai_client_span["attributes"]
+        request_messages = safe_serialize(
+            [
+                {"role": "user", "content": [{"type": "text", "text": "Test input"}]},
+            ]
+        )
+        assert (
+            ai_client_span["attributes"]["gen_ai.request.messages"] == request_messages
+        )
     else:
+        with patch.object(
+            agent.model._client._client,
+            "send",
+            return_value=response,
+        ) as _:
+            sentry_init(
+                integrations=[
+                    OpenAIAgentsIntegration(),
+                    LoggingIntegration(event_level=logging.CRITICAL),
+                ],
+                traces_sample_rate=1.0,
+                send_default_pii=True,
+                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+            )
+
+            events = capture_events()
+
+            with pytest.raises(InternalServerError, match="Error code: 500"):
+                await agents.Runner.run(agent, "Test input", run_config=test_run_config)
+
+        (
+            error_event,
+            transaction,
+        ) = events
+
+        assert error_event["exception"]["values"][0]["type"] == "InternalServerError"
+        assert error_event["exception"]["values"][0]["value"] == "Error code: 500"
+
         spans = transaction["spans"]
         ai_client_span = [s for s in spans if s["op"] == "gen_ai.chat"][0]
 
@@ -2689,16 +2756,11 @@ async def test_error_captures_input_data(
         assert ai_client_span["tags"]["status"] == "internal_error"
 
         assert "gen_ai.request.messages" in ai_client_span["data"]
-    request_messages = safe_serialize(
-        [
-            {"role": "user", "content": [{"type": "text", "text": "Test input"}]},
-        ]
-    )
-    if stream_gen_ai_spans:
-        assert (
-            ai_client_span["attributes"]["gen_ai.request.messages"] == request_messages
+        request_messages = safe_serialize(
+            [
+                {"role": "user", "content": [{"type": "text", "text": "Test input"}]},
+            ]
         )
-    else:
         assert ai_client_span["data"]["gen_ai.request.messages"] == request_messages
 
 
@@ -2711,8 +2773,8 @@ async def test_span_status_error(
     test_agent,
     stream_gen_ai_spans,
 ):
-    with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
-        with patch(
+    if stream_gen_ai_spans:
+        with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}), patch(
             "agents.models.openai_responses.OpenAIResponsesModel.get_response"
         ) as mock_get_response:
             mock_get_response.side_effect = ValueError("Model Error")
@@ -2726,35 +2788,46 @@ async def test_span_status_error(
                 _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
             )
 
-            if stream_gen_ai_spans:
-                items = capture_items("event", "transaction", "span")
+            items = capture_items("event", "transaction", "span")
 
-                with pytest.raises(ValueError, match="Model Error"):
-                    await agents.Runner.run(
-                        test_agent, "Test input", run_config=test_run_config
-                    )
+            with pytest.raises(ValueError, match="Model Error"):
+                await agents.Runner.run(
+                    test_agent, "Test input", run_config=test_run_config
+                )
 
-                (error,) = (item.payload for item in items if item.type == "event")
-                assert error["level"] == "error"
+        (error,) = (item.payload for item in items if item.type == "event")
+        assert error["level"] == "error"
 
-                spans = [item.payload for item in items if item.type == "span"]
-                assert spans[0]["status"] == "error"
+        spans = [item.payload for item in items if item.type == "span"]
+        assert spans[0]["status"] == "error"
 
-                (transaction,) = (
-                    item.payload for item in items if item.type == "transaction"
-                )
-            else:
-                events = capture_events()
+        (transaction,) = (item.payload for item in items if item.type == "transaction")
+    else:
+        with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}), patch(
+            "agents.models.openai_responses.OpenAIResponsesModel.get_response"
+        ) as mock_get_response:
+            mock_get_response.side_effect = ValueError("Model Error")
+
+            sentry_init(
+                integrations=[
+                    OpenAIAgentsIntegration(),
+                    LoggingIntegration(event_level=logging.CRITICAL),
+                ],
+                traces_sample_rate=1.0,
+                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+            )
+
+            events = capture_events()
 
-                with pytest.raises(ValueError, match="Model Error"):
-                    await agents.Runner.run(
-                        test_agent, "Test input", run_config=test_run_config
-                    )
+            with pytest.raises(ValueError, match="Model Error"):
+                await agents.Runner.run(
+                    test_agent, "Test input", run_config=test_run_config
+                )
 
-                (error, transaction) = events
-                assert error["level"] == "error"
-                assert transaction["spans"][0]["status"] == "internal_error"
-                assert transaction["spans"][0]["tags"]["status"] == "internal_error"
+        (error, transaction) = events
+        assert error["level"] == "error"
+        assert transaction["spans"][0]["status"] == "internal_error"
+        assert transaction["spans"][0]["tags"]["status"] == "internal_error"
 
     assert transaction["contexts"]["trace"]["status"] == "internal_error"
 
@@ -2850,19 +2923,19 @@ async def test_mcp_tool_execution_spans(
         serialize_pydantic=True,
     )
 
-    with patch.object(
-        agent.model._client._client,
-        "send",
-        side_effect=[mcp_response, final_response],
-    ) as _:
-        sentry_init(
-            integrations=[OpenAIAgentsIntegration()],
-            traces_sample_rate=1.0,
-            send_default_pii=True,
-            _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
-        )
+    if stream_gen_ai_spans:
+        with patch.object(
+            agent.model._client._client,
+            "send",
+            side_effect=[mcp_response, final_response],
+        ) as _:
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+                send_default_pii=True,
+                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+            )
 
-        if stream_gen_ai_spans:
             items = capture_items("span", "transaction")
 
             await agents.Runner.run(
@@ -2871,32 +2944,43 @@ async def test_mcp_tool_execution_spans(
                 run_config=test_run_config,
             )
 
-            spans = [item.payload for item in items if item.type == "span"]
+        spans = [item.payload for item in items if item.type == "span"]
 
-            # Find the MCP execute_tool span
-            mcp_tool_span = None
-            for span in spans:
-                if span.get("name") == "execute_tool test_mcp_tool":
-                    mcp_tool_span = span
-                    break
+        # Find the MCP execute_tool span
+        mcp_tool_span = None
+        for span in spans:
+            if span.get("name") == "execute_tool test_mcp_tool":
+                mcp_tool_span = span
+                break
 
-            # Verify the MCP tool span was created
-            assert mcp_tool_span is not None, "MCP execute_tool span was not created"
-            assert mcp_tool_span["name"] == "execute_tool test_mcp_tool"
-            assert mcp_tool_span["attributes"]["gen_ai.tool.name"] == "test_mcp_tool"
-            assert (
-                mcp_tool_span["attributes"]["gen_ai.tool.input"]
-                == '{"query": "search term"}'
-            )
-            assert (
-                mcp_tool_span["attributes"]["gen_ai.tool.output"]
-                == "MCP tool executed successfully"
-            )
+        # Verify the MCP tool span was created
+        assert mcp_tool_span is not None, "MCP execute_tool span was not created"
+        assert mcp_tool_span["name"] == "execute_tool test_mcp_tool"
+        assert mcp_tool_span["attributes"]["gen_ai.tool.name"] == "test_mcp_tool"
+        assert (
+            mcp_tool_span["attributes"]["gen_ai.tool.input"]
+            == '{"query": "search term"}'
+        )
+        assert (
+            mcp_tool_span["attributes"]["gen_ai.tool.output"]
+            == "MCP tool executed successfully"
+        )
 
-            # Verify no error status since error was None
-            assert mcp_tool_span.get("status") != "error"
-            assert mcp_tool_span.get("tags", {}).get("status") != "error"
-        else:
+        # Verify no error status since error was None
+        assert mcp_tool_span.get("status") != "error"
+        assert mcp_tool_span.get("tags", {}).get("status") != "error"
+    else:
+        with patch.object(
+            agent.model._client._client,
+            "send",
+            side_effect=[mcp_response, final_response],
+        ) as _:
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+                send_default_pii=True,
+                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+            )
             events = capture_events()
 
             await agents.Runner.run(
@@ -2905,31 +2989,29 @@ async def test_mcp_tool_execution_spans(
                 run_config=test_run_config,
             )
 
-            (transaction,) = events
-            spans = transaction["spans"]
+        (transaction,) = events
+        spans = transaction["spans"]
 
-            # Find the MCP execute_tool span
-            mcp_tool_span = None
-            for span in spans:
-                if span.get("description") == "execute_tool test_mcp_tool":
-                    mcp_tool_span = span
-                    break
-
-            # Verify the MCP tool span was created
-            assert mcp_tool_span is not None, "MCP execute_tool span was not created"
-            assert mcp_tool_span["description"] == "execute_tool test_mcp_tool"
-            assert mcp_tool_span["data"]["gen_ai.tool.name"] == "test_mcp_tool"
-            assert (
-                mcp_tool_span["data"]["gen_ai.tool.input"] == '{"query": "search term"}'
-            )
-            assert (
-                mcp_tool_span["data"]["gen_ai.tool.output"]
-                == "MCP tool executed successfully"
-            )
+        # Find the MCP execute_tool span
+        mcp_tool_span = None
+        for span in spans:
+            if span.get("description") == "execute_tool test_mcp_tool":
+                mcp_tool_span = span
+                break
+
+        # Verify the MCP tool span was created
+        assert mcp_tool_span is not None, "MCP execute_tool span was not created"
+        assert mcp_tool_span["description"] == "execute_tool test_mcp_tool"
+        assert mcp_tool_span["data"]["gen_ai.tool.name"] == "test_mcp_tool"
+        assert mcp_tool_span["data"]["gen_ai.tool.input"] == '{"query": "search term"}'
+        assert (
+            mcp_tool_span["data"]["gen_ai.tool.output"]
+            == "MCP tool executed successfully"
+        )
 
-            # Verify no error status since error was None
-            assert mcp_tool_span.get("status") != "internal_error"
-            assert mcp_tool_span.get("tags", {}).get("status") != "internal_error"
+        # Verify no error status since error was None
+        assert mcp_tool_span.get("status") != "internal_error"
+        assert mcp_tool_span.get("tags", {}).get("status") != "internal_error"
 
 
 @pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
@@ -3023,30 +3105,27 @@ async def test_mcp_tool_execution_with_error(
         serialize_pydantic=True,
     )
 
-    with patch.object(
-        agent.model._client._client,
-        "send",
-        side_effect=[mcp_response, final_response],
-    ) as _:
-        sentry_init(
-            integrations=[OpenAIAgentsIntegration()],
-            traces_sample_rate=1.0,
-            send_default_pii=True,
-            _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
-        )
+    if stream_gen_ai_spans:
+        with patch.object(
+            agent.model._client._client,
+            "send",
+            side_effect=[mcp_response, final_response],
+        ) as _:
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+                send_default_pii=True,
+                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+            )
 
-        if stream_gen_ai_spans:
             items = capture_items("span", "transaction")
-        else:
-            events = capture_events()
 
-        await agents.Runner.run(
-            agent,
-            "Please use failing MCP tool",
-            run_config=test_run_config,
-        )
+            await agents.Runner.run(
+                agent,
+                "Please use failing MCP tool",
+                run_config=test_run_config,
+            )
 
-    if stream_gen_ai_spans:
         spans = [item.payload for item in items if item.type == "span"]
 
         # Find the MCP execute_tool span with error
@@ -3066,6 +3145,25 @@ async def test_mcp_tool_execution_with_error(
         # Verify error status was set
         assert mcp_tool_span["status"] == "error"
     else:
+        with patch.object(
+            agent.model._client._client,
+            "send",
+            side_effect=[mcp_response, final_response],
+        ) as _:
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+                send_default_pii=True,
+                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+            )
+            events = capture_events()
+
+            await agents.Runner.run(
+                agent,
+                "Please use failing MCP tool",
+                run_config=test_run_config,
+            )
+
         (transaction,) = events
         spans = transaction["spans"]
 
@@ -3179,19 +3277,19 @@ async def test_mcp_tool_execution_without_pii(
         serialize_pydantic=True,
     )
 
-    with patch.object(
-        agent.model._client._client,
-        "send",
-        side_effect=[mcp_response, final_response],
-    ) as _:
-        sentry_init(
-            integrations=[OpenAIAgentsIntegration()],
-            traces_sample_rate=1.0,
-            send_default_pii=False,  # PII disabled
-            _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
-        )
+    if stream_gen_ai_spans:
+        with patch.object(
+            agent.model._client._client,
+            "send",
+            side_effect=[mcp_response, final_response],
+        ) as _:
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+                send_default_pii=False,  # PII disabled
+                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+            )
 
-        if stream_gen_ai_spans:
             items = capture_items("span", "transaction")
 
             await agents.Runner.run(
@@ -3200,24 +3298,35 @@ async def test_mcp_tool_execution_without_pii(
                 run_config=test_run_config,
             )
 
-            spans = [item.payload for item in items if item.type == "span"]
+        spans = [item.payload for item in items if item.type == "span"]
 
-            # Find the MCP execute_tool span
-            mcp_tool_span = None
-            for span in spans:
-                if span.get("name") == "execute_tool test_mcp_tool":
-                    mcp_tool_span = span
-                    break
+        # Find the MCP execute_tool span
+        mcp_tool_span = None
+        for span in spans:
+            if span.get("name") == "execute_tool test_mcp_tool":
+                mcp_tool_span = span
+                break
 
-            # Verify the MCP tool span was created but without input/output
-            assert mcp_tool_span is not None, "MCP execute_tool span was not created"
-            assert mcp_tool_span["name"] == "execute_tool test_mcp_tool"
-            assert mcp_tool_span["attributes"]["gen_ai.tool.name"] == "test_mcp_tool"
+        # Verify the MCP tool span was created but without input/output
+        assert mcp_tool_span is not None, "MCP execute_tool span was not created"
+        assert mcp_tool_span["name"] == "execute_tool test_mcp_tool"
+        assert mcp_tool_span["attributes"]["gen_ai.tool.name"] == "test_mcp_tool"
 
-            # Verify input and output are not included when send_default_pii is False
-            assert "gen_ai.tool.input" not in mcp_tool_span["attributes"]
-            assert "gen_ai.tool.output" not in mcp_tool_span["attributes"]
-        else:
+        # Verify input and output are not included when send_default_pii is False
+        assert "gen_ai.tool.input" not in mcp_tool_span["attributes"]
+        assert "gen_ai.tool.output" not in mcp_tool_span["attributes"]
+    else:
+        with patch.object(
+            agent.model._client._client,
+            "send",
+            side_effect=[mcp_response, final_response],
+        ) as _:
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+                send_default_pii=False,  # PII disabled
+                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+            )
             events = capture_events()
 
             await agents.Runner.run(
@@ -3226,24 +3335,24 @@ async def test_mcp_tool_execution_without_pii(
                 run_config=test_run_config,
             )
 
-            (transaction,) = events
-            spans = transaction["spans"]
+        (transaction,) = events
+        spans = transaction["spans"]
 
-            # Find the MCP execute_tool span
-            mcp_tool_span = None
-            for span in spans:
-                if span.get("description") == "execute_tool test_mcp_tool":
-                    mcp_tool_span = span
-                    break
+        # Find the MCP execute_tool span
+        mcp_tool_span = None
+        for span in spans:
+            if span.get("description") == "execute_tool test_mcp_tool":
+                mcp_tool_span = span
+                break
 
-            # Verify the MCP tool span was created but without input/output
-            assert mcp_tool_span is not None, "MCP execute_tool span was not created"
-            assert mcp_tool_span["description"] == "execute_tool test_mcp_tool"
-            assert mcp_tool_span["data"]["gen_ai.tool.name"] == "test_mcp_tool"
+        # Verify the MCP tool span was created but without input/output
+        assert mcp_tool_span is not None, "MCP execute_tool span was not created"
+        assert mcp_tool_span["description"] == "execute_tool test_mcp_tool"
+        assert mcp_tool_span["data"]["gen_ai.tool.name"] == "test_mcp_tool"
 
-            # Verify input and output are not included when send_default_pii is False
-            assert "gen_ai.tool.input" not in mcp_tool_span["data"]
-            assert "gen_ai.tool.output" not in mcp_tool_span["data"]
+        # Verify input and output are not included when send_default_pii is False
+        assert "gen_ai.tool.input" not in mcp_tool_span["data"]
+        assert "gen_ai.tool.output" not in mcp_tool_span["data"]
 
 
 @pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
@@ -3269,50 +3378,67 @@ async def test_multiple_agents_asyncio(
         nonstreaming_responses_model_response, serialize_pydantic=True
     )
 
-    with patch.object(
-        agent.model._client._client,
-        "send",
-        return_value=response,
-    ) as _:
-        sentry_init(
-            integrations=[OpenAIAgentsIntegration()],
-            traces_sample_rate=1.0,
-            _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
-        )
-
-        async def run():
-            await agents.Runner.run(
-                starting_agent=agent,
-                input="Test input",
-                run_config=test_run_config,
+    if stream_gen_ai_spans:
+        with patch.object(
+            agent.model._client._client,
+            "send",
+            return_value=response,
+        ) as _:
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
             )
 
-        if stream_gen_ai_spans:
             items = capture_items("span", "transaction")
 
+            async def run():
+                await agents.Runner.run(
+                    starting_agent=agent,
+                    input="Test input",
+                    run_config=test_run_config,
+                )
+
             await asyncio.gather(*[run() for _ in range(3)])
 
-            txn1, txn2, txn3 = (
-                item.payload for item in items if item.type == "transaction"
-            )
+        txn1, txn2, txn3 = (
+            item.payload for item in items if item.type == "transaction"
+        )
 
-            assert txn1["transaction"] == "test_agent workflow"
-            assert txn2["transaction"] == "test_agent workflow"
-        else:
+        assert txn1["transaction"] == "test_agent workflow"
+        assert txn2["transaction"] == "test_agent workflow"
+        assert txn3["transaction"] == "test_agent workflow"
+    else:
+        with patch.object(
+            agent.model._client._client,
+            "send",
+            return_value=response,
+        ) as _:
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+            )
             events = capture_events()
 
-            await asyncio.gather(*[run() for _ in range(3)])
+            async def run():
+                await agents.Runner.run(
+                    starting_agent=agent,
+                    input="Test input",
+                    run_config=test_run_config,
+                )
 
-            assert len(events) == 3
-            txn1, txn2, txn3 = events
+            await asyncio.gather(*[run() for _ in range(3)])
 
-            assert txn1["type"] == "transaction"
-            assert txn1["transaction"] == "test_agent workflow"
-            assert txn2["type"] == "transaction"
-            assert txn2["transaction"] == "test_agent workflow"
-            assert txn3["type"] == "transaction"
+        assert len(events) == 3
+        txn1, txn2, txn3 = events
 
-    assert txn3["transaction"] == "test_agent workflow"
+        assert txn1["type"] == "transaction"
+        assert txn1["transaction"] == "test_agent workflow"
+        assert txn2["type"] == "transaction"
+        assert txn2["transaction"] == "test_agent workflow"
+        assert txn3["type"] == "transaction"
+        assert txn3["transaction"] == "test_agent workflow"
 
 
 # Test input messages with mixed roles including "ai"
@@ -3431,19 +3557,18 @@ def failing_tool(message: str) -> str:
         serialize_pydantic=True,
     )
 
-    with patch.object(
-        agent_with_tool.model._client._client,
-        "send",
-        side_effect=[tool_response, final_response],
-    ) as _:
-        sentry_init(
-            integrations=[OpenAIAgentsIntegration()],
-            traces_sample_rate=1.0,
-            send_default_pii=True,
-            _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
-        )
-
-        if stream_gen_ai_spans:
+    if stream_gen_ai_spans:
+        with patch.object(
+            agent_with_tool.model._client._client,
+            "send",
+            side_effect=[tool_response, final_response],
+        ) as _:
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+                send_default_pii=True,
+                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+            )
             items = capture_items("span", "transaction")
 
             # Note: The agents library catches tool exceptions internally,
@@ -3454,27 +3579,38 @@ def failing_tool(message: str) -> str:
                 run_config=test_run_config,
             )
 
-            spans = [item.payload for item in items if item.type == "span"]
+        spans = [item.payload for item in items if item.type == "span"]
 
-            # Find the execute_tool span
-            execute_tool_span = None
-            for span in spans:
-                description = span.get("name", "")
-                if description is not None and description.startswith(
-                    "execute_tool failing_tool"
-                ):
-                    execute_tool_span = span
-                    break
+        # Find the execute_tool span
+        execute_tool_span = None
+        for span in spans:
+            description = span.get("name", "")
+            if description is not None and description.startswith(
+                "execute_tool failing_tool"
+            ):
+                execute_tool_span = span
+                break
 
-            # Verify the execute_tool span was created
-            assert execute_tool_span is not None, "execute_tool span was not created"
-            assert execute_tool_span["name"] == "execute_tool failing_tool"
-            assert execute_tool_span["attributes"]["gen_ai.tool.name"] == "failing_tool"
+        # Verify the execute_tool span was created
+        assert execute_tool_span is not None, "execute_tool span was not created"
+        assert execute_tool_span["name"] == "execute_tool failing_tool"
+        assert execute_tool_span["attributes"]["gen_ai.tool.name"] == "failing_tool"
 
-            # Verify error status was set (this is the key test for our patch)
-            # The span should be marked as error because the tool execution failed
-            assert execute_tool_span["status"] == "error"
-        else:
+        # Verify error status was set (this is the key test for our patch)
+        # The span should be marked as error because the tool execution failed
+        assert execute_tool_span["status"] == "error"
+    else:
+        with patch.object(
+            agent_with_tool.model._client._client,
+            "send",
+            side_effect=[tool_response, final_response],
+        ) as _:
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+                send_default_pii=True,
+                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+            )
             events = capture_events()
 
             # Note: The agents library catches tool exceptions internally,
@@ -3485,28 +3621,28 @@ def failing_tool(message: str) -> str:
                 run_config=test_run_config,
             )
 
-            (transaction,) = events
-            spans = transaction["spans"]
+        (transaction,) = events
+        spans = transaction["spans"]
 
-            # Find the execute_tool span
-            execute_tool_span = None
-            for span in spans:
-                description = span.get("description", "")
-                if description is not None and description.startswith(
-                    "execute_tool failing_tool"
-                ):
-                    execute_tool_span = span
-                    break
+        # Find the execute_tool span
+        execute_tool_span = None
+        for span in spans:
+            description = span.get("description", "")
+            if description is not None and description.startswith(
+                "execute_tool failing_tool"
+            ):
+                execute_tool_span = span
+                break
 
-            # Verify the execute_tool span was created
-            assert execute_tool_span is not None, "execute_tool span was not created"
-            assert execute_tool_span["description"] == "execute_tool failing_tool"
-            assert execute_tool_span["data"]["gen_ai.tool.name"] == "failing_tool"
+        # Verify the execute_tool span was created
+        assert execute_tool_span is not None, "execute_tool span was not created"
+        assert execute_tool_span["description"] == "execute_tool failing_tool"
+        assert execute_tool_span["data"]["gen_ai.tool.name"] == "failing_tool"
 
-            # Verify error status was set (this is the key test for our patch)
-            # The span should be marked as error because the tool execution failed
-            assert execute_tool_span["status"] == "internal_error"
-            assert execute_tool_span["tags"]["status"] == "internal_error"
+        # Verify error status was set (this is the key test for our patch)
+        # The span should be marked as error because the tool execution failed
+        assert execute_tool_span["status"] == "internal_error"
+        assert execute_tool_span["tags"]["status"] == "internal_error"
 
 
 @pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
@@ -3566,19 +3702,18 @@ async def test_invoke_agent_span_includes_usage_data(
         serialize_pydantic=True,
     )
 
-    with patch.object(
-        agent.model._client._client,
-        "send",
-        return_value=response,
-    ) as _:
-        sentry_init(
-            integrations=[OpenAIAgentsIntegration()],
-            traces_sample_rate=1.0,
-            send_default_pii=True,
-            _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
-        )
-
-        if stream_gen_ai_spans:
+    if stream_gen_ai_spans:
+        with patch.object(
+            agent.model._client._client,
+            "send",
+            return_value=response,
+        ) as _:
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+                send_default_pii=True,
+                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+            )
             items = capture_items("span", "transaction")
 
             result = await agents.Runner.run(
@@ -3587,30 +3722,38 @@ async def test_invoke_agent_span_includes_usage_data(
 
             assert result is not None
 
-            spans = [item.payload for item in items if item.type == "span"]
-            invoke_agent_span = next(
-                span
-                for span in spans
-                if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
-            )
+        spans = [item.payload for item in items if item.type == "span"]
+        invoke_agent_span = next(
+            span
+            for span in spans
+            if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
+        )
 
-            # Verify invoke_agent span has usage data from context_wrapper
-            assert invoke_agent_span["name"] == "invoke_agent test_agent"
-            assert "gen_ai.usage.input_tokens" in invoke_agent_span["attributes"]
-            assert "gen_ai.usage.output_tokens" in invoke_agent_span["attributes"]
-            assert "gen_ai.usage.total_tokens" in invoke_agent_span["attributes"]
+        # Verify invoke_agent span has usage data from context_wrapper
+        assert invoke_agent_span["name"] == "invoke_agent test_agent"
+        assert "gen_ai.usage.input_tokens" in invoke_agent_span["attributes"]
+        assert "gen_ai.usage.output_tokens" in invoke_agent_span["attributes"]
+        assert "gen_ai.usage.total_tokens" in invoke_agent_span["attributes"]
 
-            assert invoke_agent_span["attributes"]["gen_ai.usage.input_tokens"] == 10
-            assert invoke_agent_span["attributes"]["gen_ai.usage.output_tokens"] == 20
-            assert invoke_agent_span["attributes"]["gen_ai.usage.total_tokens"] == 30
-            assert (
-                invoke_agent_span["attributes"]["gen_ai.usage.input_tokens.cached"] == 0
-            )
-            assert (
-                invoke_agent_span["attributes"]["gen_ai.usage.output_tokens.reasoning"]
-                == 5
+        assert invoke_agent_span["attributes"]["gen_ai.usage.input_tokens"] == 10
+        assert invoke_agent_span["attributes"]["gen_ai.usage.output_tokens"] == 20
+        assert invoke_agent_span["attributes"]["gen_ai.usage.total_tokens"] == 30
+        assert invoke_agent_span["attributes"]["gen_ai.usage.input_tokens.cached"] == 0
+        assert (
+            invoke_agent_span["attributes"]["gen_ai.usage.output_tokens.reasoning"] == 5
+        )
+    else:
+        with patch.object(
+            agent.model._client._client,
+            "send",
+            return_value=response,
+        ) as _:
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+                send_default_pii=True,
+                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
             )
-        else:
             events = capture_events()
 
             result = await agents.Runner.run(
@@ -3619,25 +3762,23 @@ async def test_invoke_agent_span_includes_usage_data(
 
             assert result is not None
 
-            (transaction,) = events
-            spans = transaction["spans"]
-            invoke_agent_span = next(
-                span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT
-            )
+        (transaction,) = events
+        spans = transaction["spans"]
+        invoke_agent_span = next(
+            span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT
+        )
 
-            # Verify invoke_agent span has usage data from context_wrapper
-            assert invoke_agent_span["description"] == "invoke_agent test_agent"
-            assert "gen_ai.usage.input_tokens" in invoke_agent_span["data"]
-            assert "gen_ai.usage.output_tokens" in invoke_agent_span["data"]
-            assert "gen_ai.usage.total_tokens" in invoke_agent_span["data"]
-
-            assert invoke_agent_span["data"]["gen_ai.usage.input_tokens"] == 10
-            assert invoke_agent_span["data"]["gen_ai.usage.output_tokens"] == 20
-            assert invoke_agent_span["data"]["gen_ai.usage.total_tokens"] == 30
-            assert invoke_agent_span["data"]["gen_ai.usage.input_tokens.cached"] == 0
-            assert (
-                invoke_agent_span["data"]["gen_ai.usage.output_tokens.reasoning"] == 5
-            )
+        # Verify invoke_agent span has usage data from context_wrapper
+        assert invoke_agent_span["description"] == "invoke_agent test_agent"
+        assert "gen_ai.usage.input_tokens" in invoke_agent_span["data"]
+        assert "gen_ai.usage.output_tokens" in invoke_agent_span["data"]
+        assert "gen_ai.usage.total_tokens" in invoke_agent_span["data"]
+
+        assert invoke_agent_span["data"]["gen_ai.usage.input_tokens"] == 10
+        assert invoke_agent_span["data"]["gen_ai.usage.output_tokens"] == 20
+        assert invoke_agent_span["data"]["gen_ai.usage.total_tokens"] == 30
+        assert invoke_agent_span["data"]["gen_ai.usage.input_tokens.cached"] == 0
+        assert invoke_agent_span["data"]["gen_ai.usage.output_tokens.reasoning"] == 5
 
 
 @pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
@@ -3697,19 +3838,18 @@ async def test_ai_client_span_includes_response_model(
         serialize_pydantic=True,
     )
 
-    with patch.object(
-        agent.model._client._client,
-        "send",
-        return_value=response,
-    ) as _:
-        sentry_init(
-            integrations=[OpenAIAgentsIntegration()],
-            traces_sample_rate=1.0,
-            send_default_pii=True,
-            _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
-        )
-
-        if stream_gen_ai_spans:
+    if stream_gen_ai_spans:
+        with patch.object(
+            agent.model._client._client,
+            "send",
+            return_value=response,
+        ) as _:
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+                send_default_pii=True,
+                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+            )
             items = capture_items("span", "transaction")
 
             result = await agents.Runner.run(
@@ -3718,21 +3858,30 @@ async def test_ai_client_span_includes_response_model(
 
             assert result is not None
 
-            spans = [item.payload for item in items if item.type == "span"]
-            ai_client_span = next(
-                span
-                for span in spans
-                if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-            )
+        spans = [item.payload for item in items if item.type == "span"]
+        ai_client_span = next(
+            span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        )
 
-            # Verify ai_client span has response model from API response
-            assert ai_client_span["name"] == "chat gpt-4"
-            assert "gen_ai.response.model" in ai_client_span["attributes"]
-            assert (
-                ai_client_span["attributes"]["gen_ai.response.model"]
-                == "gpt-4.1-2025-04-14"
+        # Verify ai_client span has response model from API response
+        assert ai_client_span["name"] == "chat gpt-4"
+        assert "gen_ai.response.model" in ai_client_span["attributes"]
+        assert (
+            ai_client_span["attributes"]["gen_ai.response.model"]
+            == "gpt-4.1-2025-04-14"
+        )
+    else:
+        with patch.object(
+            agent.model._client._client,
+            "send",
+            return_value=response,
+        ) as _:
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+                send_default_pii=True,
+                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
             )
-        else:
             events = capture_events()
 
             result = await agents.Runner.run(
@@ -3741,18 +3890,14 @@ async def test_ai_client_span_includes_response_model(
 
             assert result is not None
 
-            (transaction,) = events
-            spans = transaction["spans"]
-            ai_client_span = next(
-                span for span in spans if span["op"] == OP.GEN_AI_CHAT
-            )
+        (transaction,) = events
+        spans = transaction["spans"]
+        ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT)
 
-            # Verify ai_client span has response model from API response
-            assert ai_client_span["description"] == "chat gpt-4"
-            assert "gen_ai.response.model" in ai_client_span["data"]
-            assert (
-                ai_client_span["data"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14"
-            )
+        # Verify ai_client span has response model from API response
+        assert ai_client_span["description"] == "chat gpt-4"
+        assert "gen_ai.response.model" in ai_client_span["data"]
+        assert ai_client_span["data"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14"
 
 
 @pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
@@ -3817,18 +3962,18 @@ async def test_ai_client_span_response_model_with_chat_completions(
         serialize_pydantic=True,
     )
 
-    with patch.object(
-        agent.model._client._client,
-        "send",
-        return_value=response,
-    ) as _:
-        sentry_init(
-            integrations=[OpenAIAgentsIntegration()],
-            traces_sample_rate=1.0,
-            _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
-        )
+    if stream_gen_ai_spans:
+        with patch.object(
+            agent.model._client._client,
+            "send",
+            return_value=response,
+        ) as _:
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+            )
 
-        if stream_gen_ai_spans:
             items = capture_items("span", "transaction")
 
             result = await agents.Runner.run(
@@ -3837,20 +3982,28 @@ async def test_ai_client_span_response_model_with_chat_completions(
 
             assert result is not None
 
-            spans = [item.payload for item in items if item.type == "span"]
-            ai_client_span = next(
-                span
-                for span in spans
-                if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-            )
+        spans = [item.payload for item in items if item.type == "span"]
+        ai_client_span = next(
+            span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        )
 
-            # Verify response model from API response is captured
-            assert "gen_ai.response.model" in ai_client_span["attributes"]
-            assert (
-                ai_client_span["attributes"]["gen_ai.response.model"]
-                == "gpt-4o-mini-2024-07-18"
+        # Verify response model from API response is captured
+        assert "gen_ai.response.model" in ai_client_span["attributes"]
+        assert (
+            ai_client_span["attributes"]["gen_ai.response.model"]
+            == "gpt-4o-mini-2024-07-18"
+        )
+    else:
+        with patch.object(
+            agent.model._client._client,
+            "send",
+            return_value=response,
+        ) as _:
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
             )
-        else:
             events = capture_events()
 
             result = await agents.Runner.run(
@@ -3859,18 +4012,15 @@ async def test_ai_client_span_response_model_with_chat_completions(
 
             assert result is not None
 
-            (transaction,) = events
-            spans = transaction["spans"]
-            ai_client_span = next(
-                span for span in spans if span["op"] == OP.GEN_AI_CHAT
-            )
+        (transaction,) = events
+        spans = transaction["spans"]
+        ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT)
 
-            # Verify response model from API response is captured
-            assert "gen_ai.response.model" in ai_client_span["data"]
-            assert (
-                ai_client_span["data"]["gen_ai.response.model"]
-                == "gpt-4o-mini-2024-07-18"
-            )
+        # Verify response model from API response is captured
+        assert "gen_ai.response.model" in ai_client_span["data"]
+        assert (
+            ai_client_span["data"]["gen_ai.response.model"] == "gpt-4o-mini-2024-07-18"
+        )
 
 
 @pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
@@ -4122,32 +4272,30 @@ async def test_invoke_agent_span_includes_response_model(
 
             assert result is not None
 
-            spans = [item.payload for item in items if item.type == "span"]
-            invoke_agent_span = next(
-                span
-                for span in spans
-                if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
-            )
-            ai_client_span = next(
-                span
-                for span in spans
-                if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-            )
+        spans = [item.payload for item in items if item.type == "span"]
+        invoke_agent_span = next(
+            span
+            for span in spans
+            if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
+        )
+        ai_client_span = next(
+            span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        )
 
-            # Verify invoke_agent span has response model from API
-            assert invoke_agent_span["name"] == "invoke_agent test_agent"
-            assert "gen_ai.response.model" in invoke_agent_span["attributes"]
-            assert (
-                invoke_agent_span["attributes"]["gen_ai.response.model"]
-                == "gpt-4.1-2025-04-14"
-            )
+        # Verify invoke_agent span has response model from API
+        assert invoke_agent_span["name"] == "invoke_agent test_agent"
+        assert "gen_ai.response.model" in invoke_agent_span["attributes"]
+        assert (
+            invoke_agent_span["attributes"]["gen_ai.response.model"]
+            == "gpt-4.1-2025-04-14"
+        )
 
-            # Also verify ai_client span has it
-            assert "gen_ai.response.model" in ai_client_span["attributes"]
-            assert (
-                ai_client_span["attributes"]["gen_ai.response.model"]
-                == "gpt-4.1-2025-04-14"
-            )
+        # Also verify ai_client span has it
+        assert "gen_ai.response.model" in ai_client_span["attributes"]
+        assert (
+            ai_client_span["attributes"]["gen_ai.response.model"]
+            == "gpt-4.1-2025-04-14"
+        )
     else:
         with patch.object(
             agent.model._client._client,
@@ -4168,28 +4316,23 @@ async def test_invoke_agent_span_includes_response_model(
 
             assert result is not None
 
-            (transaction,) = events
-            spans = transaction["spans"]
-            invoke_agent_span = next(
-                span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT
-            )
-            ai_client_span = next(
-                span for span in spans if span["op"] == OP.GEN_AI_CHAT
-            )
+        (transaction,) = events
+        spans = transaction["spans"]
+        invoke_agent_span = next(
+            span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT
+        )
+        ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT)
 
-            # Verify invoke_agent span has response model from API
-            assert invoke_agent_span["description"] == "invoke_agent test_agent"
-            assert "gen_ai.response.model" in invoke_agent_span["data"]
-            assert (
-                invoke_agent_span["data"]["gen_ai.response.model"]
-                == "gpt-4.1-2025-04-14"
-            )
+        # Verify invoke_agent span has response model from API
+        assert invoke_agent_span["description"] == "invoke_agent test_agent"
+        assert "gen_ai.response.model" in invoke_agent_span["data"]
+        assert (
+            invoke_agent_span["data"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14"
+        )
 
-            # Also verify ai_client span has it
-            assert "gen_ai.response.model" in ai_client_span["data"]
-            assert (
-                ai_client_span["data"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14"
-            )
+        # Also verify ai_client span has it
+        assert "gen_ai.response.model" in ai_client_span["data"]
+        assert ai_client_span["data"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14"
 
 
 @pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
@@ -4311,28 +4454,27 @@ def calculator(a: int, b: int) -> int:
 
             assert result is not None
 
-            spans = [item.payload for item in items if item.type == "span"]
+        spans = [item.payload for item in items if item.type == "span"]
 
-            invoke_agent_span = spans[0]
-            first_ai_client_span = spans[1]
-            second_ai_client_span = spans[3]  # After tool span
+        invoke_agent_span = spans[0]
+        first_ai_client_span = spans[1]
+        second_ai_client_span = spans[3]  # After tool span
 
-            # Invoke_agent span uses the LAST response model
-            assert "gen_ai.response.model" in invoke_agent_span["attributes"]
-            assert (
-                invoke_agent_span["attributes"]["gen_ai.response.model"]
-                == "gpt-4.1-2025-04-14"
-            )
+        # Invoke_agent span uses the LAST response model
+        assert "gen_ai.response.model" in invoke_agent_span["attributes"]
+        assert (
+            invoke_agent_span["attributes"]["gen_ai.response.model"]
+            == "gpt-4.1-2025-04-14"
+        )
 
-            # Each ai_client span has its own response model from the API
-            assert (
-                first_ai_client_span["attributes"]["gen_ai.response.model"]
-                == "gpt-4-0613"
-            )
-            assert (
-                second_ai_client_span["attributes"]["gen_ai.response.model"]
-                == "gpt-4.1-2025-04-14"
-            )
+        # Each ai_client span has its own response model from the API
+        assert (
+            first_ai_client_span["attributes"]["gen_ai.response.model"] == "gpt-4-0613"
+        )
+        assert (
+            second_ai_client_span["attributes"]["gen_ai.response.model"]
+            == "gpt-4.1-2025-04-14"
+        )
     else:
         with patch.object(
             agent_with_tool.model._client._client,
@@ -4355,26 +4497,25 @@ def calculator(a: int, b: int) -> int:
 
             assert result is not None
 
-            (transaction,) = events
-            spans = transaction["spans"]
+        (transaction,) = events
+        spans = transaction["spans"]
 
-            invoke_agent_span = spans[0]
-            first_ai_client_span = spans[1]
-            second_ai_client_span = spans[3]  # After tool span
+        invoke_agent_span = spans[0]
+        first_ai_client_span = spans[1]
+        second_ai_client_span = spans[3]  # After tool span
 
-            # Invoke_agent span uses the LAST response model
-            assert "gen_ai.response.model" in invoke_agent_span["data"]
-            assert (
-                invoke_agent_span["data"]["gen_ai.response.model"]
-                == "gpt-4.1-2025-04-14"
-            )
+        # Invoke_agent span uses the LAST response model
+        assert "gen_ai.response.model" in invoke_agent_span["data"]
+        assert (
+            invoke_agent_span["data"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14"
+        )
 
-            # Each ai_client span has its own response model from the API
-            assert first_ai_client_span["data"]["gen_ai.response.model"] == "gpt-4-0613"
-            assert (
-                second_ai_client_span["data"]["gen_ai.response.model"]
-                == "gpt-4.1-2025-04-14"
-            )
+        # Each ai_client span has its own response model from the API
+        assert first_ai_client_span["data"]["gen_ai.response.model"] == "gpt-4-0613"
+        assert (
+            second_ai_client_span["data"]["gen_ai.response.model"]
+            == "gpt-4.1-2025-04-14"
+        )
 
 
 def test_openai_agents_message_truncation(sentry_init, capture_items):
@@ -4597,28 +4738,25 @@ async def test_streaming_ttft_on_chat_span(
         agent_with_tool.model._client._client,
         "send",
         return_value=response,
-    ) as _:
-        with sentry_sdk.start_transaction(
-            name="test_ttft", sampled=True
-        ) as transaction:
-            result = agents.Runner.run_streamed(
-                agent_with_tool,
-                "Please use the simple test tool",
-                run_config=test_run_config,
-            )
+    ) as _, sentry_sdk.start_transaction(name="test_ttft", sampled=True) as transaction:
+        result = agents.Runner.run_streamed(
+            agent_with_tool,
+            "Please use the simple test tool",
+            run_config=test_run_config,
+        )
 
-            async for event in result.stream_events():
-                pass
+        async for event in result.stream_events():
+            pass
 
-            # Verify TTFT is recorded on the chat span (must be inside transaction context)
-            chat_spans = [
-                s for s in transaction._span_recorder.spans if s.op == "gen_ai.chat"
-            ]
-            assert len(chat_spans) >= 1
-            chat_span = chat_spans[0]
+        # Verify TTFT is recorded on the chat span (must be inside transaction context)
+        chat_spans = [
+            s for s in transaction._span_recorder.spans if s.op == "gen_ai.chat"
+        ]
+        assert len(chat_spans) >= 1
+        chat_span = chat_spans[0]
 
-            assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in chat_span._data
-            assert chat_span._data.get(SPANDATA.GEN_AI_RESPONSE_STREAMING) is True
+        assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in chat_span._data
+        assert chat_span._data.get(SPANDATA.GEN_AI_RESPONSE_STREAMING) is True
 
 
 @pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
@@ -4671,39 +4809,31 @@ async def test_conversation_id_on_all_spans(
 
             assert result is not None
 
-            spans = [item.payload for item in items if item.type == "span"]
-            invoke_agent_span = next(
-                span
-                for span in spans
-                if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
-            )
-            ai_client_span = next(
-                span
-                for span in spans
-                if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-            )
+        spans = [item.payload for item in items if item.type == "span"]
+        invoke_agent_span = next(
+            span
+            for span in spans
+            if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
+        )
+        ai_client_span = next(
+            span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        )
 
-            # Verify workflow span (transaction) has conversation_id
-            (transaction,) = (
-                item.payload for item in items if item.type == "transaction"
-            )
+        # Verify workflow span (transaction) has conversation_id
+        (transaction,) = (item.payload for item in items if item.type == "transaction")
 
-            assert (
-                transaction["contexts"]["trace"]["data"]["gen_ai.conversation.id"]
-                == "conv_test_123"
-            )
+        assert (
+            transaction["contexts"]["trace"]["data"]["gen_ai.conversation.id"]
+            == "conv_test_123"
+        )
 
-            # Verify invoke_agent span has conversation_id
-            assert (
-                invoke_agent_span["attributes"]["gen_ai.conversation.id"]
-                == "conv_test_123"
-            )
+        # Verify invoke_agent span has conversation_id
+        assert (
+            invoke_agent_span["attributes"]["gen_ai.conversation.id"] == "conv_test_123"
+        )
 
-            # Verify ai_client span has conversation_id
-            assert (
-                ai_client_span["attributes"]["gen_ai.conversation.id"]
-                == "conv_test_123"
-            )
+        # Verify ai_client span has conversation_id
+        assert ai_client_span["attributes"]["gen_ai.conversation.id"] == "conv_test_123"
     else:
         with patch.object(
             agent.model._client._client,
@@ -4726,28 +4856,24 @@ async def test_conversation_id_on_all_spans(
 
             assert result is not None
 
-            (transaction,) = events
-            spans = transaction["spans"]
-            invoke_agent_span = next(
-                span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT
-            )
-            ai_client_span = next(
-                span for span in spans if span["op"] == OP.GEN_AI_CHAT
-            )
+        (transaction,) = events
+        spans = transaction["spans"]
+        invoke_agent_span = next(
+            span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT
+        )
+        ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT)
 
-            # Verify workflow span (transaction) has conversation_id
-            assert (
-                transaction["contexts"]["trace"]["data"]["gen_ai.conversation.id"]
-                == "conv_test_123"
-            )
+        # Verify workflow span (transaction) has conversation_id
+        assert (
+            transaction["contexts"]["trace"]["data"]["gen_ai.conversation.id"]
+            == "conv_test_123"
+        )
 
-            # Verify invoke_agent span has conversation_id
-            assert (
-                invoke_agent_span["data"]["gen_ai.conversation.id"] == "conv_test_123"
-            )
+        # Verify invoke_agent span has conversation_id
+        assert invoke_agent_span["data"]["gen_ai.conversation.id"] == "conv_test_123"
 
-            # Verify ai_client span has conversation_id
-            assert ai_client_span["data"]["gen_ai.conversation.id"] == "conv_test_123"
+        # Verify ai_client span has conversation_id
+        assert ai_client_span["data"]["gen_ai.conversation.id"] == "conv_test_123"
 
 
 @pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
@@ -4981,30 +5107,24 @@ async def test_no_conversation_id_when_not_provided(
 
             assert result is not None
 
-            (transaction,) = (
-                item.payload for item in items if item.type == "transaction"
-            )
+        (transaction,) = (item.payload for item in items if item.type == "transaction")
 
-            spans = [item.payload for item in items if item.type == "span"]
-            invoke_agent_span = next(
-                span
-                for span in spans
-                if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
-            )
-            ai_client_span = next(
-                span
-                for span in spans
-                if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-            )
+        spans = [item.payload for item in items if item.type == "span"]
+        invoke_agent_span = next(
+            span
+            for span in spans
+            if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
+        )
+        ai_client_span = next(
+            span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        )
 
-            # Verify conversation_id is NOT set on any spans
-            assert "gen_ai.conversation.id" not in transaction["contexts"]["trace"].get(
-                "attributes", {}
-            )
-            assert "gen_ai.conversation.id" not in invoke_agent_span.get(
-                "attributes", {}
-            )
-            assert "gen_ai.conversation.id" not in ai_client_span.get("attributes", {})
+        # Verify conversation_id is NOT set on any spans
+        assert "gen_ai.conversation.id" not in transaction["contexts"]["trace"].get(
+            "attributes", {}
+        )
+        assert "gen_ai.conversation.id" not in invoke_agent_span.get("attributes", {})
+        assert "gen_ai.conversation.id" not in ai_client_span.get("attributes", {})
     else:
         with patch.object(
             agent.model._client._client,
@@ -5026,18 +5146,16 @@ async def test_no_conversation_id_when_not_provided(
 
             assert result is not None
 
-            (transaction,) = events
-            spans = transaction["spans"]
-            invoke_agent_span = next(
-                span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT
-            )
-            ai_client_span = next(
-                span for span in spans if span["op"] == OP.GEN_AI_CHAT
-            )
+        (transaction,) = events
+        spans = transaction["spans"]
+        invoke_agent_span = next(
+            span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT
+        )
+        ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT)
 
-            # Verify conversation_id is NOT set on any spans
-            assert "gen_ai.conversation.id" not in transaction["contexts"]["trace"].get(
-                "data", {}
-            )
-            assert "gen_ai.conversation.id" not in invoke_agent_span.get("data", {})
-            assert "gen_ai.conversation.id" not in ai_client_span.get("data", {})
+        # Verify conversation_id is NOT set on any spans
+        assert "gen_ai.conversation.id" not in transaction["contexts"]["trace"].get(
+            "data", {}
+        )
+        assert "gen_ai.conversation.id" not in invoke_agent_span.get("data", {})
+        assert "gen_ai.conversation.id" not in ai_client_span.get("data", {})

From 14e379ff0e373503bd2dd6883abcfb6f62a43374 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Tue, 12 May 2026 15:12:57 +0200
Subject: [PATCH 53/84] fix pydantic-ai test

---
 tests/integrations/pydantic_ai/test_pydantic_ai.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/tests/integrations/pydantic_ai/test_pydantic_ai.py b/tests/integrations/pydantic_ai/test_pydantic_ai.py
index d60058e4ce..b2dfe76988 100644
--- a/tests/integrations/pydantic_ai/test_pydantic_ai.py
+++ b/tests/integrations/pydantic_ai/test_pydantic_ai.py
@@ -1366,9 +1366,6 @@ async def test_message_history(
         _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
 
-    # First message
-    await agent.run("Hello, I'm Alice")
-
     # Second message with history
     from pydantic_ai import messages
 
@@ -1385,6 +1382,9 @@ async def test_message_history(
     if stream_gen_ai_spans:
         items = capture_items("transaction", "span")
 
+        # First message
+        await agent.run("Hello, I'm Alice")
+
         await agent.run("What is my name?", message_history=history)
 
         # We should have 2 transactions
@@ -1407,6 +1407,9 @@ async def test_message_history(
     else:
         events = capture_events()
 
+        # First message
+        await agent.run("Hello, I'm Alice")
+
         await agent.run("What is my name?", message_history=history)
 
         # We should have 2 transactions

From 596db319bb01825003371d0331ce731b64905895 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Tue, 12 May 2026 15:31:07 +0200
Subject: [PATCH 54/84] fix tracing tests

---
 tests/tracing/test_decorator.py | 514 +++++++++++++++++++++-----------
 tests/tracing/test_misc.py      |  35 ++-
 2 files changed, 363 insertions(+), 186 deletions(-)

diff --git a/tests/tracing/test_decorator.py b/tests/tracing/test_decorator.py
index a71ca5588f..30c14b8ea6 100644
--- a/tests/tracing/test_decorator.py
+++ b/tests/tracing/test_decorator.py
@@ -121,12 +121,13 @@ async def _some_function_traced(a, b, c):
     )
 
 
-def test_span_templates_ai_dicts(sentry_init, capture_items):
-    sentry_init(
-        traces_sample_rate=1.0,
-    )
-    items = capture_items("span")
-
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_span_templates_ai_dicts(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     @sentry_sdk.trace(template=SPANTEMPLATE.AI_TOOL)
     def my_tool(arg1, arg2):
         return {
@@ -165,89 +166,157 @@ def my_agent():
             presence_penalty=2.0,
         )
 
-    with sentry_sdk.start_transaction(name="test-transaction"):
-        my_agent()
+    if stream_gen_ai_spans:
+        sentry_init(
+            traces_sample_rate=1.0,
+            _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        )
+        items = capture_items("span")
 
-    (agent_span, tool_span, chat_span) = (
-        item.payload for item in items if item.type == "span"
-    )
+        with sentry_sdk.start_transaction(name="test-transaction"):
+            my_agent()
 
-    assert (
-        agent_span["name"]
-        == "invoke_agent test_decorator.test_span_templates_ai_dicts.<locals>.my_agent"
-    )
-    assert agent_span["attributes"] == {
-        "gen_ai.agent.name": "test_decorator.test_span_templates_ai_dicts.<locals>.my_agent",
-        "gen_ai.operation.name": "invoke_agent",
-        "sentry.environment": "production",
-        "sentry.op": "gen_ai.invoke_agent",
-        "sentry.origin": "manual",
-        "sentry.release": mock.ANY,
-        "sentry.sdk.name": "sentry.python",
-        "sentry.sdk.version": mock.ANY,
-        "sentry.segment.id": mock.ANY,
-        "sentry.segment.name": "test-transaction",
-        "thread.id": mock.ANY,
-        "thread.name": mock.ANY,
-    }
-
-    assert (
-        tool_span["name"]
-        == "execute_tool test_decorator.test_span_templates_ai_dicts.<locals>.my_tool"
-    )
-    assert tool_span["attributes"] == {
-        "gen_ai.tool.name": "test_decorator.test_span_templates_ai_dicts.<locals>.my_tool",
-        "gen_ai.operation.name": "execute_tool",
-        "gen_ai.usage.input_tokens": 10,
-        "gen_ai.usage.output_tokens": 20,
-        "gen_ai.usage.total_tokens": 30,
-        "sentry.environment": "production",
-        "sentry.op": "gen_ai.execute_tool",
-        "sentry.origin": "manual",
-        "sentry.release": mock.ANY,
-        "sentry.sdk.name": "sentry.python",
-        "sentry.sdk.version": mock.ANY,
-        "sentry.segment.id": mock.ANY,
-        "sentry.segment.name": "test-transaction",
-        "thread.id": mock.ANY,
-        "thread.name": mock.ANY,
-    }
-    assert "gen_ai.tool.description" not in tool_span["attributes"]
-
-    assert chat_span["name"] == "chat my-gpt-4o-mini"
-    assert chat_span["attributes"] == {
-        "gen_ai.operation.name": "chat",
-        "gen_ai.request.frequency_penalty": 1.0,
-        "gen_ai.request.max_tokens": 100,
-        "gen_ai.request.messages": "[{'role': 'user', 'content': 'What is the weather in Tokyo?'}, {'role': 'system', 'content': 'You are a helpful assistant that can answer questions about the weather.'}]",
-        "gen_ai.request.model": "my-gpt-4o-mini",
-        "gen_ai.request.presence_penalty": 2.0,
-        "gen_ai.request.temperature": 0.5,
-        "gen_ai.request.top_k": 40,
-        "gen_ai.request.top_p": 0.9,
-        "gen_ai.response.model": "my-gpt-4o-mini-v123",
-        "gen_ai.usage.input_tokens": 11,
-        "gen_ai.usage.output_tokens": 22,
-        "gen_ai.usage.total_tokens": 33,
-        "sentry.environment": "production",
-        "sentry.op": "gen_ai.chat",
-        "sentry.origin": "manual",
-        "sentry.release": mock.ANY,
-        "sentry.sdk.name": "sentry.python",
-        "sentry.sdk.version": mock.ANY,
-        "sentry.segment.id": mock.ANY,
-        "sentry.segment.name": "test-transaction",
-        "thread.id": mock.ANY,
-        "thread.name": mock.ANY,
-    }
-
-
-def test_span_templates_ai_objects(sentry_init, capture_items):
-    sentry_init(
-        traces_sample_rate=1.0,
-    )
-    items = capture_items("span")
+        (agent_span, tool_span, chat_span) = (
+            item.payload for item in items if item.type == "span"
+        )
+
+        assert (
+            agent_span["name"]
+            == "invoke_agent test_decorator.test_span_templates_ai_dicts.<locals>.my_agent"
+        )
+        assert agent_span["attributes"] == {
+            "gen_ai.agent.name": "test_decorator.test_span_templates_ai_dicts.<locals>.my_agent",
+            "gen_ai.operation.name": "invoke_agent",
+            "sentry.environment": "production",
+            "sentry.op": "gen_ai.invoke_agent",
+            "sentry.origin": "manual",
+            "sentry.release": mock.ANY,
+            "sentry.sdk.name": "sentry.python",
+            "sentry.sdk.version": mock.ANY,
+            "sentry.segment.id": mock.ANY,
+            "sentry.segment.name": "test-transaction",
+            "thread.id": mock.ANY,
+            "thread.name": mock.ANY,
+        }
 
+        assert (
+            tool_span["name"]
+            == "execute_tool test_decorator.test_span_templates_ai_dicts.<locals>.my_tool"
+        )
+        assert tool_span["attributes"] == {
+            "gen_ai.tool.name": "test_decorator.test_span_templates_ai_dicts.<locals>.my_tool",
+            "gen_ai.operation.name": "execute_tool",
+            "gen_ai.usage.input_tokens": 10,
+            "gen_ai.usage.output_tokens": 20,
+            "gen_ai.usage.total_tokens": 30,
+            "sentry.environment": "production",
+            "sentry.op": "gen_ai.execute_tool",
+            "sentry.origin": "manual",
+            "sentry.release": mock.ANY,
+            "sentry.sdk.name": "sentry.python",
+            "sentry.sdk.version": mock.ANY,
+            "sentry.segment.id": mock.ANY,
+            "sentry.segment.name": "test-transaction",
+            "thread.id": mock.ANY,
+            "thread.name": mock.ANY,
+        }
+        assert "gen_ai.tool.description" not in tool_span["attributes"]
+
+        assert chat_span["name"] == "chat my-gpt-4o-mini"
+        assert chat_span["attributes"] == {
+            "gen_ai.operation.name": "chat",
+            "gen_ai.request.frequency_penalty": 1.0,
+            "gen_ai.request.max_tokens": 100,
+            "gen_ai.request.messages": "[{'role': 'user', 'content': 'What is the weather in Tokyo?'}, {'role': 'system', 'content': 'You are a helpful assistant that can answer questions about the weather.'}]",
+            "gen_ai.request.model": "my-gpt-4o-mini",
+            "gen_ai.request.presence_penalty": 2.0,
+            "gen_ai.request.temperature": 0.5,
+            "gen_ai.request.top_k": 40,
+            "gen_ai.request.top_p": 0.9,
+            "gen_ai.response.model": "my-gpt-4o-mini-v123",
+            "gen_ai.usage.input_tokens": 11,
+            "gen_ai.usage.output_tokens": 22,
+            "gen_ai.usage.total_tokens": 33,
+            "sentry.environment": "production",
+            "sentry.op": "gen_ai.chat",
+            "sentry.origin": "manual",
+            "sentry.release": mock.ANY,
+            "sentry.sdk.name": "sentry.python",
+            "sentry.sdk.version": mock.ANY,
+            "sentry.segment.id": mock.ANY,
+            "sentry.segment.name": "test-transaction",
+            "thread.id": mock.ANY,
+            "thread.name": mock.ANY,
+        }
+    else:
+        sentry_init(
+            traces_sample_rate=1.0,
+            _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        )
+        events = capture_events()
+
+        with sentry_sdk.start_transaction(name="test-transaction"):
+            my_agent()
+
+        (event,) = events
+        (agent_span, tool_span, chat_span) = event["spans"]
+
+        assert agent_span["op"] == "gen_ai.invoke_agent"
+        assert (
+            agent_span["description"]
+            == "invoke_agent test_decorator.test_span_templates_ai_dicts.<locals>.my_agent"
+        )
+        assert agent_span["data"] == {
+            "gen_ai.agent.name": "test_decorator.test_span_templates_ai_dicts.<locals>.my_agent",
+            "gen_ai.operation.name": "invoke_agent",
+            "thread.id": mock.ANY,
+            "thread.name": mock.ANY,
+        }
+
+        assert tool_span["op"] == "gen_ai.execute_tool"
+        assert (
+            tool_span["description"]
+            == "execute_tool test_decorator.test_span_templates_ai_dicts.<locals>.my_tool"
+        )
+        assert tool_span["data"] == {
+            "gen_ai.tool.name": "test_decorator.test_span_templates_ai_dicts.<locals>.my_tool",
+            "gen_ai.operation.name": "execute_tool",
+            "gen_ai.usage.input_tokens": 10,
+            "gen_ai.usage.output_tokens": 20,
+            "gen_ai.usage.total_tokens": 30,
+            "thread.id": mock.ANY,
+            "thread.name": mock.ANY,
+        }
+        assert "gen_ai.tool.description" not in tool_span["data"]
+
+        assert chat_span["op"] == "gen_ai.chat"
+        assert chat_span["description"] == "chat my-gpt-4o-mini"
+        assert chat_span["data"] == {
+            "gen_ai.operation.name": "chat",
+            "gen_ai.request.frequency_penalty": 1.0,
+            "gen_ai.request.max_tokens": 100,
+            "gen_ai.request.messages": "[{'role': 'user', 'content': 'What is the weather in Tokyo?'}, {'role': 'system', 'content': 'You are a helpful assistant that can answer questions about the weather.'}]",
+            "gen_ai.request.model": "my-gpt-4o-mini",
+            "gen_ai.request.presence_penalty": 2.0,
+            "gen_ai.request.temperature": 0.5,
+            "gen_ai.request.top_k": 40,
+            "gen_ai.request.top_p": 0.9,
+            "gen_ai.response.model": "my-gpt-4o-mini-v123",
+            "gen_ai.usage.input_tokens": 11,
+            "gen_ai.usage.output_tokens": 22,
+            "gen_ai.usage.total_tokens": 33,
+            "thread.id": mock.ANY,
+            "thread.name": mock.ANY,
+        }
+
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+def test_span_templates_ai_objects(
+    sentry_init,
+    capture_events,
+    capture_items,
+    stream_gen_ai_spans,
+):
     @sentry_sdk.trace(template=SPANTEMPLATE.AI_TOOL)
     def my_tool(arg1, arg2):
         """This is a tool function."""
@@ -290,91 +359,155 @@ def my_agent():
             presence_penalty=2.0,
         )
 
-    with sentry_sdk.start_transaction(name="test-transaction"):
-        my_agent()
+    if stream_gen_ai_spans:
+        sentry_init(
+            traces_sample_rate=1.0,
+            _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        )
+        items = capture_items("span")
 
-    (agent_span, tool_span, chat_span) = (
-        item.payload for item in items if item.type == "span"
-    )
+        with sentry_sdk.start_transaction(name="test-transaction"):
+            my_agent()
 
-    assert (
-        agent_span["name"]
-        == "invoke_agent test_decorator.test_span_templates_ai_objects.<locals>.my_agent"
-    )
-    assert agent_span["attributes"] == {
-        "gen_ai.agent.name": "test_decorator.test_span_templates_ai_objects.<locals>.my_agent",
-        "gen_ai.operation.name": "invoke_agent",
-        "sentry.environment": "production",
-        "sentry.op": "gen_ai.invoke_agent",
-        "sentry.origin": "manual",
-        "sentry.release": mock.ANY,
-        "sentry.sdk.name": "sentry.python",
-        "sentry.sdk.version": mock.ANY,
-        "sentry.segment.id": mock.ANY,
-        "sentry.segment.name": "test-transaction",
-        "thread.id": mock.ANY,
-        "thread.name": mock.ANY,
-    }
-
-    assert (
-        tool_span["name"]
-        == "execute_tool test_decorator.test_span_templates_ai_objects.<locals>.my_tool"
-    )
-    assert tool_span["attributes"] == {
-        "gen_ai.tool.name": "test_decorator.test_span_templates_ai_objects.<locals>.my_tool",
-        "gen_ai.tool.description": "This is a tool function.",
-        "gen_ai.operation.name": "execute_tool",
-        "gen_ai.usage.input_tokens": 10,
-        "gen_ai.usage.output_tokens": 20,
-        "gen_ai.usage.total_tokens": 30,
-        "sentry.environment": "production",
-        "sentry.op": "gen_ai.execute_tool",
-        "sentry.origin": "manual",
-        "sentry.release": mock.ANY,
-        "sentry.sdk.name": "sentry.python",
-        "sentry.sdk.version": mock.ANY,
-        "sentry.segment.id": mock.ANY,
-        "sentry.segment.name": "test-transaction",
-        "thread.id": mock.ANY,
-        "thread.name": mock.ANY,
-    }
-
-    assert chat_span["name"] == "chat my-gpt-4o-mini"
-    assert chat_span["attributes"] == {
-        "gen_ai.operation.name": "chat",
-        "gen_ai.request.frequency_penalty": 1.0,
-        "gen_ai.request.max_tokens": 100,
-        "gen_ai.request.messages": "[{'role': 'user', 'content': 'What is the weather in Tokyo?'}, {'role': 'system', 'content': 'You are a helpful assistant that can answer questions about the weather.'}]",
-        "gen_ai.request.model": "my-gpt-4o-mini",
-        "gen_ai.request.presence_penalty": 2.0,
-        "gen_ai.request.temperature": 0.5,
-        "gen_ai.request.top_k": 40,
-        "gen_ai.request.top_p": 0.9,
-        "gen_ai.response.model": "my-gpt-4o-mini-v123",
-        "gen_ai.usage.input_tokens": 11,
-        "gen_ai.usage.output_tokens": 22,
-        "gen_ai.usage.total_tokens": 33,
-        "sentry.environment": "production",
-        "sentry.op": "gen_ai.chat",
-        "sentry.origin": "manual",
-        "sentry.release": mock.ANY,
-        "sentry.sdk.name": "sentry.python",
-        "sentry.sdk.version": mock.ANY,
-        "sentry.segment.id": mock.ANY,
-        "sentry.segment.name": "test-transaction",
-        "thread.id": mock.ANY,
-        "thread.name": mock.ANY,
-    }
+        (agent_span, tool_span, chat_span) = (
+            item.payload for item in items if item.type == "span"
+        )
 
+        assert (
+            agent_span["name"]
+            == "invoke_agent test_decorator.test_span_templates_ai_objects.<locals>.my_agent"
+        )
+        assert agent_span["attributes"] == {
+            "gen_ai.agent.name": "test_decorator.test_span_templates_ai_objects.<locals>.my_agent",
+            "gen_ai.operation.name": "invoke_agent",
+            "sentry.environment": "production",
+            "sentry.op": "gen_ai.invoke_agent",
+            "sentry.origin": "manual",
+            "sentry.release": mock.ANY,
+            "sentry.sdk.name": "sentry.python",
+            "sentry.sdk.version": mock.ANY,
+            "sentry.segment.id": mock.ANY,
+            "sentry.segment.name": "test-transaction",
+            "thread.id": mock.ANY,
+            "thread.name": mock.ANY,
+        }
 
-@pytest.mark.parametrize("send_default_pii", [True, False])
-def test_span_templates_ai_pii(sentry_init, capture_items, send_default_pii):
-    sentry_init(
-        traces_sample_rate=1.0,
-        send_default_pii=send_default_pii,
-    )
-    items = capture_items("span")
+        assert (
+            tool_span["name"]
+            == "execute_tool test_decorator.test_span_templates_ai_objects.<locals>.my_tool"
+        )
+        assert tool_span["attributes"] == {
+            "gen_ai.tool.name": "test_decorator.test_span_templates_ai_objects.<locals>.my_tool",
+            "gen_ai.tool.description": "This is a tool function.",
+            "gen_ai.operation.name": "execute_tool",
+            "gen_ai.usage.input_tokens": 10,
+            "gen_ai.usage.output_tokens": 20,
+            "gen_ai.usage.total_tokens": 30,
+            "sentry.environment": "production",
+            "sentry.op": "gen_ai.execute_tool",
+            "sentry.origin": "manual",
+            "sentry.release": mock.ANY,
+            "sentry.sdk.name": "sentry.python",
+            "sentry.sdk.version": mock.ANY,
+            "sentry.segment.id": mock.ANY,
+            "sentry.segment.name": "test-transaction",
+            "thread.id": mock.ANY,
+            "thread.name": mock.ANY,
+        }
+
+        assert chat_span["name"] == "chat my-gpt-4o-mini"
+        assert chat_span["attributes"] == {
+            "gen_ai.operation.name": "chat",
+            "gen_ai.request.frequency_penalty": 1.0,
+            "gen_ai.request.max_tokens": 100,
+            "gen_ai.request.messages": "[{'role': 'user', 'content': 'What is the weather in Tokyo?'}, {'role': 'system', 'content': 'You are a helpful assistant that can answer questions about the weather.'}]",
+            "gen_ai.request.model": "my-gpt-4o-mini",
+            "gen_ai.request.presence_penalty": 2.0,
+            "gen_ai.request.temperature": 0.5,
+            "gen_ai.request.top_k": 40,
+            "gen_ai.request.top_p": 0.9,
+            "gen_ai.response.model": "my-gpt-4o-mini-v123",
+            "gen_ai.usage.input_tokens": 11,
+            "gen_ai.usage.output_tokens": 22,
+            "gen_ai.usage.total_tokens": 33,
+            "sentry.environment": "production",
+            "sentry.op": "gen_ai.chat",
+            "sentry.origin": "manual",
+            "sentry.release": mock.ANY,
+            "sentry.sdk.name": "sentry.python",
+            "sentry.sdk.version": mock.ANY,
+            "sentry.segment.id": mock.ANY,
+            "sentry.segment.name": "test-transaction",
+            "thread.id": mock.ANY,
+            "thread.name": mock.ANY,
+        }
+    else:
+        events = capture_events()
+
+        with sentry_sdk.start_transaction(name="test-transaction"):
+            my_agent()
+
+        (event,) = events
+        (agent_span, tool_span, chat_span) = event["spans"]
+
+        assert agent_span["op"] == "gen_ai.invoke_agent"
+        assert (
+            agent_span["description"]
+            == "invoke_agent test_decorator.test_span_templates_ai_objects.<locals>.my_agent"
+        )
+        assert agent_span["data"] == {
+            "gen_ai.agent.name": "test_decorator.test_span_templates_ai_objects.<locals>.my_agent",
+            "gen_ai.operation.name": "invoke_agent",
+            "thread.id": mock.ANY,
+            "thread.name": mock.ANY,
+        }
+
+        assert tool_span["op"] == "gen_ai.execute_tool"
+        assert (
+            tool_span["description"]
+            == "execute_tool test_decorator.test_span_templates_ai_objects.<locals>.my_tool"
+        )
+        assert tool_span["data"] == {
+            "gen_ai.tool.name": "test_decorator.test_span_templates_ai_objects.<locals>.my_tool",
+            "gen_ai.tool.description": "This is a tool function.",
+            "gen_ai.operation.name": "execute_tool",
+            "gen_ai.usage.input_tokens": 10,
+            "gen_ai.usage.output_tokens": 20,
+            "gen_ai.usage.total_tokens": 30,
+            "thread.id": mock.ANY,
+            "thread.name": mock.ANY,
+        }
+
+        assert chat_span["op"] == "gen_ai.chat"
+        assert chat_span["description"] == "chat my-gpt-4o-mini"
+        assert chat_span["data"] == {
+            "gen_ai.operation.name": "chat",
+            "gen_ai.request.frequency_penalty": 1.0,
+            "gen_ai.request.max_tokens": 100,
+            "gen_ai.request.messages": "[{'role': 'user', 'content': 'What is the weather in Tokyo?'}, {'role': 'system', 'content': 'You are a helpful assistant that can answer questions about the weather.'}]",
+            "gen_ai.request.model": "my-gpt-4o-mini",
+            "gen_ai.request.presence_penalty": 2.0,
+            "gen_ai.request.temperature": 0.5,
+            "gen_ai.request.top_k": 40,
+            "gen_ai.request.top_p": 0.9,
+            "gen_ai.response.model": "my-gpt-4o-mini-v123",
+            "gen_ai.usage.input_tokens": 11,
+            "gen_ai.usage.output_tokens": 22,
+            "gen_ai.usage.total_tokens": 33,
+            "thread.id": mock.ANY,
+            "thread.name": mock.ANY,
+        }
 
+
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
+@pytest.mark.parametrize("send_default_pii", [True, False])
+def test_span_templates_ai_pii(
+    sentry_init,
+    capture_events,
+    capture_items,
+    send_default_pii,
+    stream_gen_ai_spans,
+):
     @sentry_sdk.trace(template=SPANTEMPLATE.AI_TOOL)
     def my_tool(arg1, arg2, **kwargs):
         """This is a tool function."""
@@ -400,17 +533,44 @@ def my_agent(*args, **kwargs):
         )
         return "agent_output"
 
-    with sentry_sdk.start_transaction(name="test-transaction"):
-        my_agent(22, 33, arg1=44, arg2=55)
+    sentry_init(
+        traces_sample_rate=1.0,
+        send_default_pii=send_default_pii,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+    )
+
+    if stream_gen_ai_spans:
+        items = capture_items("span")
 
-    (_, tool_span, _) = (item.payload for item in items if item.type == "span")
+        with sentry_sdk.start_transaction(name="test-transaction"):
+            my_agent(22, 33, arg1=44, arg2=55)
 
-    if send_default_pii:
-        assert (
-            tool_span["attributes"]["gen_ai.tool.input"]
-            == "{'args': (1, 2), 'kwargs': {'tool_arg1': '3', 'tool_arg2': '4'}}"
-        )
-        assert tool_span["attributes"]["gen_ai.tool.output"] == "'tool_output'"
+        (_, tool_span, _) = (item.payload for item in items if item.type == "span")
+
+        if send_default_pii:
+            assert (
+                tool_span["attributes"]["gen_ai.tool.input"]
+                == "{'args': (1, 2), 'kwargs': {'tool_arg1': '3', 'tool_arg2': '4'}}"
+            )
+            assert tool_span["attributes"]["gen_ai.tool.output"] == "'tool_output'"
+        else:
+            assert "gen_ai.tool.input" not in tool_span["attributes"]
+            assert "gen_ai.tool.output" not in tool_span["attributes"]
     else:
-        assert "gen_ai.tool.input" not in tool_span["attributes"]
-        assert "gen_ai.tool.output" not in tool_span["attributes"]
+        events = capture_events()
+
+        with sentry_sdk.start_transaction(name="test-transaction"):
+            my_agent(22, 33, arg1=44, arg2=55)
+
+        (event,) = events
+        (_, tool_span, _) = event["spans"]
+
+        if send_default_pii:
+            assert (
+                tool_span["data"]["gen_ai.tool.input"]
+                == "{'args': (1, 2), 'kwargs': {'tool_arg1': '3', 'tool_arg2': '4'}}"
+            )
+            assert tool_span["data"]["gen_ai.tool.output"] == "'tool_output'"
+        else:
+            assert "gen_ai.tool.input" not in tool_span["data"]
+            assert "gen_ai.tool.output" not in tool_span["data"]
diff --git a/tests/tracing/test_misc.py b/tests/tracing/test_misc.py
index 85dfd6a302..1119f42461 100644
--- a/tests/tracing/test_misc.py
+++ b/tests/tracing/test_misc.py
@@ -646,24 +646,41 @@ def test_conversation_id_propagates_to_span_with_ai_op(
         span_data = event["spans"][0]["data"]
         assert span_data.get("gen_ai.conversation.id") == "conv-ai-op-test"
 
+    @pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
     def test_conversation_id_propagates_to_span_with_gen_ai_op(
-        self, sentry_init, capture_items
+        self, sentry_init, capture_events, capture_items, stream_gen_ai_spans
     ):
         """Span with gen_ai.* op should get conversation_id."""
         sentry_init(
             traces_sample_rate=1.0,
+            _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
         )
-        items = capture_items("span")
 
-        scope = sentry_sdk.get_current_scope()
-        scope.set_conversation_id("conv-gen-ai-op-test")
+        if stream_gen_ai_spans:
+            items = capture_items("span")
 
-        with sentry_sdk.start_transaction(name="test-tx"):
-            with start_span(op="gen_ai.invoke_agent"):
-                pass
+            scope = sentry_sdk.get_current_scope()
+            scope.set_conversation_id("conv-gen-ai-op-test")
+
+            with sentry_sdk.start_transaction(name="test-tx"):
+                with start_span(op="gen_ai.invoke_agent"):
+                    pass
+
+            spans = [item.payload for item in items if item.type == "span"]
+            span_data = spans[0]["attributes"]
+        else:
+            events = capture_events()
+
+            scope = sentry_sdk.get_current_scope()
+            scope.set_conversation_id("conv-gen-ai-op-test")
+
+            with sentry_sdk.start_transaction(name="test-tx"):
+                with start_span(op="gen_ai.invoke_agent"):
+                    pass
+
+            (event,) = events
+            span_data = event["spans"][0]["data"]
 
-        spans = [item.payload for item in items if item.type == "span"]
-        span_data = spans[0]["attributes"]
         assert span_data.get("gen_ai.conversation.id") == "conv-gen-ai-op-test"
 
     def test_conversation_id_not_propagated_to_non_ai_span(

From a2adf96fd93c40536f91266447b13fa477437dd2 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Tue, 12 May 2026 15:36:25 +0200
Subject: [PATCH 55/84] fix tests

---
 tests/tracing/test_decorator.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/tests/tracing/test_decorator.py b/tests/tracing/test_decorator.py
index 30c14b8ea6..8d7c97fdbf 100644
--- a/tests/tracing/test_decorator.py
+++ b/tests/tracing/test_decorator.py
@@ -359,11 +359,12 @@ def my_agent():
             presence_penalty=2.0,
         )
 
+    sentry_init(
+        traces_sample_rate=1.0,
+        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+    )
+
     if stream_gen_ai_spans:
-        sentry_init(
-            traces_sample_rate=1.0,
-            _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
-        )
         items = capture_items("span")
 
         with sentry_sdk.start_transaction(name="test-transaction"):

From 401109aeb1f2b9d12431d7becabab4341b91f9b8 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Tue, 12 May 2026 15:55:30 +0200
Subject: [PATCH 56/84] feat: Remove truncation when stream_gen_ai_spans is
 enabled

---
 sentry_sdk/ai/utils.py                        |   8 +
 .../integrations/anthropic/test_anthropic.py  | 329 +++-------
 .../google_genai/test_google_genai.py         | 166 ++---
 .../integrations/langchain/test_langchain.py  | 137 ++---
 .../integrations/langgraph/test_langgraph.py  | 251 ++------
 tests/integrations/litellm/test_litellm.py    | 104 +---
 tests/integrations/openai/test_openai.py      |  60 +-
 .../openai_agents/test_openai_agents.py       | 571 ++++++------------
 8 files changed, 439 insertions(+), 1187 deletions(-)

diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py
index 8efa077ce5..4bd65ced76 100644
--- a/sentry_sdk/ai/utils.py
+++ b/sentry_sdk/ai/utils.py
@@ -741,6 +741,10 @@ def truncate_and_annotate_messages(
     scope: "Any",
     max_single_message_chars: int = MAX_SINGLE_MESSAGE_CONTENT_CHARS,
 ) -> "Optional[List[Dict[str, Any]]]":
+    client = sentry_sdk.get_client()
+    if client.options["_experiments"].get("stream_gen_ai_spans", False):
+        return messages
+
     if not messages:
         return None
 
@@ -761,6 +765,10 @@ def truncate_and_annotate_embedding_inputs(
     scope: "Any",
     max_bytes: int = MAX_GEN_AI_MESSAGE_BYTES,
 ) -> "Optional[List[Dict[str, Any]]]":
+    client = sentry_sdk.get_client()
+    if client.options["_experiments"].get("stream_gen_ai_spans", False):
+        return messages
+
     if not messages:
         return None
 
diff --git a/tests/integrations/anthropic/test_anthropic.py b/tests/integrations/anthropic/test_anthropic.py
index 2e240b9c8f..02de047711 100644
--- a/tests/integrations/anthropic/test_anthropic.py
+++ b/tests/integrations/anthropic/test_anthropic.py
@@ -3625,20 +3625,14 @@ def mock_messages_create(*args, **kwargs):
     assert stored_messages[0]["role"] == expected_role
 
 
-@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
-def test_anthropic_message_truncation(
-    sentry_init,
-    capture_events,
-    capture_items,
-    stream_gen_ai_spans,
-):
+def test_anthropic_message_truncation(sentry_init, capture_events):
     """Test that large messages are truncated properly in Anthropic integration."""
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
+    events = capture_events()
 
     client = Anthropic(api_key="z")
     client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE)
@@ -3654,83 +3648,42 @@ def test_anthropic_message_truncation(
         {"role": "user", "content": "small message 5"},
     ]
 
-    if stream_gen_ai_spans:
-        items = capture_items("transaction", "span")
-
-        with start_transaction():
-            client.messages.create(max_tokens=1024, messages=messages, model="model")
-
-        spans = [item.payload for item in items if item.type == "span"]
-        chat_spans = [
-            span
-            for span in spans
-            if span["attributes"].get("sentry.op") == OP.GEN_AI_CHAT
-        ]
-
-        assert len(chat_spans) > 0
-
-        chat_span = chat_spans[0]
-        assert chat_span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-        assert chat_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["attributes"]
-
-        messages_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-
-        assert isinstance(messages_data, str)
-
-        parsed_messages = json.loads(messages_data)
-        assert isinstance(parsed_messages, list)
-        assert len(parsed_messages) == 1
-        assert "small message 5" in str(parsed_messages[0])
-
-        tx = next(item.payload for item in items if item.type == "transaction")
-    else:
-        events = capture_events()
-
-        with start_transaction():
-            client.messages.create(max_tokens=1024, messages=messages, model="model")
-
-        assert len(events) > 0
-        tx = events[0]
-        assert tx["type"] == "transaction"
+    with start_transaction():
+        client.messages.create(max_tokens=1024, messages=messages, model="model")
 
-        chat_spans = [
-            span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT
-        ]
+    assert len(events) > 0
+    tx = events[0]
+    assert tx["type"] == "transaction"
 
-        assert len(chat_spans) > 0
+    chat_spans = [
+        span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT
+    ]
+    assert len(chat_spans) > 0
 
-        chat_span = chat_spans[0]
-        assert chat_span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-        assert chat_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"]
+    chat_span = chat_spans[0]
+    assert chat_span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert chat_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"]
 
-        messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-        assert isinstance(messages_data, str)
+    messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+    assert isinstance(messages_data, str)
 
-        parsed_messages = json.loads(messages_data)
-        assert isinstance(parsed_messages, list)
-        assert len(parsed_messages) == 1
-        assert "small message 5" in str(parsed_messages[0])
+    parsed_messages = json.loads(messages_data)
+    assert isinstance(parsed_messages, list)
+    assert len(parsed_messages) == 1
+    assert "small message 5" in str(parsed_messages[0])
 
     assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5
 
 
-@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
-@pytest.mark.asyncio
-async def test_anthropic_message_truncation_async(
-    sentry_init,
-    capture_events,
-    capture_items,
-    stream_gen_ai_spans,
-):
+async def test_anthropic_message_truncation_async(sentry_init, capture_events):
     """Test that large messages are truncated properly in Anthropic integration."""
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
+    events = capture_events()
 
     client = AsyncAnthropic(api_key="z")
     client.messages._post = mock.AsyncMock(return_value=EXAMPLE_MESSAGE)
@@ -3746,70 +3699,30 @@ async def test_anthropic_message_truncation_async(
         {"role": "user", "content": "small message 5"},
     ]
 
-    if stream_gen_ai_spans:
-        items = capture_items("transaction", "span")
-
-        with start_transaction():
-            await client.messages.create(
-                max_tokens=1024, messages=messages, model="model"
-            )
-
-        spans = [item.payload for item in items if item.type == "span"]
-        chat_spans = [
-            span
-            for span in spans
-            if span["attributes"].get("sentry.op") == OP.GEN_AI_CHAT
-        ]
-
-        assert len(chat_spans) > 0
-
-        chat_span = chat_spans[0]
+    with start_transaction():
+        await client.messages.create(max_tokens=1024, messages=messages, model="model")
 
-        assert chat_span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-        assert chat_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["attributes"]
+    assert len(events) > 0
+    tx = events[0]
+    assert tx["type"] == "transaction"
 
-        messages_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-
-        assert isinstance(messages_data, str)
-
-        parsed_messages = json.loads(messages_data)
-        assert isinstance(parsed_messages, list)
-        assert len(parsed_messages) == 1
-        assert "small message 5" in str(parsed_messages[0])
-
-        tx = next(item.payload for item in items if item.type == "transaction")
-    else:
-        events = capture_events()
-
-        with start_transaction():
-            await client.messages.create(
-                max_tokens=1024, messages=messages, model="model"
-            )
-
-        assert len(events) > 0
-        tx = events[0]
-        assert tx["type"] == "transaction"
-
-        chat_spans = [
-            span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT
-        ]
-
-        assert len(chat_spans) > 0
-
-        chat_span = chat_spans[0]
+    chat_spans = [
+        span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT
+    ]
+    assert len(chat_spans) > 0
 
-        assert chat_span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-        assert chat_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"]
+    chat_span = chat_spans[0]
+    assert chat_span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert chat_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"]
 
-        messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-        assert isinstance(messages_data, str)
+    messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+    assert isinstance(messages_data, str)
 
-        parsed_messages = json.loads(messages_data)
-        assert isinstance(parsed_messages, list)
-        assert len(parsed_messages) == 1
-        assert "small message 5" in str(parsed_messages[0])
+    parsed_messages = json.loads(messages_data)
+    assert isinstance(parsed_messages, list)
+    assert len(parsed_messages) == 1
+    assert "small message 5" in str(parsed_messages[0])
 
     assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5
 
@@ -5203,21 +5116,14 @@ def test_transform_message_content_list_anthropic():
 # Integration tests for binary data in messages
 
 
-@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
-def test_message_with_base64_image(
-    sentry_init,
-    capture_events,
-    capture_items,
-    stream_gen_ai_spans,
-):
+def test_message_with_base64_image(sentry_init, capture_events):
     """Test that messages with base64 images are properly captured."""
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-
+    events = capture_events()
     client = Anthropic(api_key="z")
     client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE)
 
@@ -5238,31 +5144,15 @@ def test_message_with_base64_image(
         }
     ]
 
-    if stream_gen_ai_spans:
-        items = capture_items("transaction", "span")
-
-        with start_transaction(name="anthropic"):
-            client.messages.create(max_tokens=1024, messages=messages, model="model")
-
-        spans = [item.payload for item in items if item.type == "span"]
-        (span,) = spans
-
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]
-        stored_messages = json.loads(
-            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-        )
-    else:
-        events = capture_events()
+    with start_transaction(name="anthropic"):
+        client.messages.create(max_tokens=1024, messages=messages, model="model")
 
-        with start_transaction(name="anthropic"):
-            client.messages.create(max_tokens=1024, messages=messages, model="model")
+    assert len(events) == 1
+    (event,) = events
+    (span,) = event["spans"]
 
-        assert len(events) == 1
-        (event,) = events
-        (span,) = event["spans"]
-
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]
-        stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]
+    stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
 
     assert len(stored_messages) == 1
     assert stored_messages[0]["role"] == "user"
@@ -5412,21 +5302,14 @@ def test_message_with_file_image(
     }
 
 
-@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
-def test_message_with_base64_pdf(
-    sentry_init,
-    capture_events,
-    capture_items,
-    stream_gen_ai_spans,
-):
+def test_message_with_base64_pdf(sentry_init, capture_events):
     """Test that messages with base64-encoded PDF documents are properly captured."""
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-
+    events = capture_events()
     client = Anthropic(api_key="z")
     client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE)
 
@@ -5447,30 +5330,14 @@ def test_message_with_base64_pdf(
         }
     ]
 
-    if stream_gen_ai_spans:
-        items = capture_items("transaction", "span")
-
-        with start_transaction(name="anthropic"):
-            client.messages.create(max_tokens=1024, messages=messages, model="model")
-
-        spans = [item.payload for item in items if item.type == "span"]
-        (span,) = spans
-
-        stored_messages = json.loads(
-            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-        )
-    else:
-        events = capture_events()
-
-        with start_transaction(name="anthropic"):
-            client.messages.create(max_tokens=1024, messages=messages, model="model")
-
-        assert len(events) == 1
-        (event,) = events
-        (span,) = event["spans"]
+    with start_transaction(name="anthropic"):
+        client.messages.create(max_tokens=1024, messages=messages, model="model")
 
-        stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    assert len(events) == 1
+    (event,) = events
+    (span,) = event["spans"]
 
+    stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     content = stored_messages[0]["content"]
     assert content[1] == {
         "type": "blob",
@@ -5615,21 +5482,14 @@ def test_message_with_file_document(
     }
 
 
-@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
-def test_message_with_mixed_content(
-    sentry_init,
-    capture_events,
-    capture_items,
-    stream_gen_ai_spans,
-):
+def test_message_with_mixed_content(sentry_init, capture_events):
     """Test that messages with mixed content (text, images, documents) are properly captured."""
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-
+    events = capture_events()
     client = Anthropic(api_key="z")
     client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE)
 
@@ -5666,30 +5526,14 @@ def test_message_with_mixed_content(
         }
     ]
 
-    if stream_gen_ai_spans:
-        items = capture_items("transaction", "span")
+    with start_transaction(name="anthropic"):
+        client.messages.create(max_tokens=1024, messages=messages, model="model")
 
-        with start_transaction(name="anthropic"):
-            client.messages.create(max_tokens=1024, messages=messages, model="model")
-
-        spans = [item.payload for item in items if item.type == "span"]
-        (span,) = spans
-
-        stored_messages = json.loads(
-            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-        )
-    else:
-        events = capture_events()
-
-        with start_transaction(name="anthropic"):
-            client.messages.create(max_tokens=1024, messages=messages, model="model")
-
-        assert len(events) == 1
-        (event,) = events
-        (span,) = event["spans"]
-
-        stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    assert len(events) == 1
+    (event,) = events
+    (span,) = event["spans"]
 
+    stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     content = stored_messages[0]["content"]
 
     assert len(content) == 5
@@ -5721,21 +5565,14 @@ def test_message_with_mixed_content(
     }
 
 
-@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
-def test_message_with_multiple_images_different_formats(
-    sentry_init,
-    capture_events,
-    capture_items,
-    stream_gen_ai_spans,
-):
+def test_message_with_multiple_images_different_formats(sentry_init, capture_events):
     """Test that messages with multiple images of different source types are handled."""
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
-
+    events = capture_events()
     client = Anthropic(api_key="z")
     client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE)
 
@@ -5771,30 +5608,14 @@ def test_message_with_multiple_images_different_formats(
         }
     ]
 
-    if stream_gen_ai_spans:
-        items = capture_items("transaction", "span")
-
-        with start_transaction(name="anthropic"):
-            client.messages.create(max_tokens=1024, messages=messages, model="model")
-
-        spans = [item.payload for item in items if item.type == "span"]
-        (span,) = spans
-
-        stored_messages = json.loads(
-            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-        )
-    else:
-        events = capture_events()
-
-        with start_transaction(name="anthropic"):
-            client.messages.create(max_tokens=1024, messages=messages, model="model")
+    with start_transaction(name="anthropic"):
+        client.messages.create(max_tokens=1024, messages=messages, model="model")
 
-        assert len(events) == 1
-        (event,) = events
-        (span,) = event["spans"]
-
-        stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    assert len(events) == 1
+    (event,) = events
+    (span,) = event["spans"]
 
+    stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     content = stored_messages[0]["content"]
 
     assert len(content) == 4
diff --git a/tests/integrations/google_genai/test_google_genai.py b/tests/integrations/google_genai/test_google_genai.py
index 8da5e7ca22..d9b2736584 100644
--- a/tests/integrations/google_genai/test_google_genai.py
+++ b/tests/integrations/google_genai/test_google_genai.py
@@ -1401,21 +1401,16 @@ def test_tool_calls_extraction(
     assert json.loads(tool_calls[1]["arguments"]) == {"timezone": "PST"}
 
 
-@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 def test_google_genai_message_truncation(
-    sentry_init,
-    capture_events,
-    capture_items,
-    mock_genai_client,
-    stream_gen_ai_spans,
+    sentry_init, capture_events, mock_genai_client
 ):
     """Test that large messages are truncated properly in Google GenAI integration."""
     sentry_init(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
+    events = capture_events()
 
     large_content = (
         "This is a very long message that will exceed our size limits. " * 1000
@@ -1424,39 +1419,21 @@ def test_google_genai_message_truncation(
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
 
-    if stream_gen_ai_spans:
-        items = capture_items("span")
-
-        with mock.patch.object(
-            mock_genai_client._api_client, "request", return_value=mock_http_response
-        ), start_transaction(name="google_genai"):
-            mock_genai_client.models.generate_content(
-                model="gemini-1.5-flash",
-                contents=[large_content, small_content],
-                config=create_test_config(),
-            )
-
-        invoke_span = next(item.payload for item in items if item.type == "span")
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["attributes"]
-
-        messages_data = invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-    else:
-        events = capture_events()
-
-        with mock.patch.object(
-            mock_genai_client._api_client, "request", return_value=mock_http_response
-        ), start_transaction(name="google_genai"):
+    with mock.patch.object(
+        mock_genai_client._api_client, "request", return_value=mock_http_response
+    ):
+        with start_transaction(name="google_genai"):
             mock_genai_client.models.generate_content(
                 model="gemini-1.5-flash",
                 contents=[large_content, small_content],
                 config=create_test_config(),
             )
 
-        (event,) = events
-        invoke_span = event["spans"][0]
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["data"]
+    (event,) = events
+    invoke_span = event["spans"][0]
+    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["data"]
 
-        messages_data = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+    messages_data = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
     assert isinstance(messages_data, str)
 
     parsed_messages = json.loads(messages_data)
@@ -2543,21 +2520,16 @@ def test_generate_content_with_inline_data(
     assert messages[0]["content"][1]["content"] == BLOB_DATA_SUBSTITUTE
 
 
-@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 def test_generate_content_with_function_response(
-    sentry_init,
-    capture_events,
-    capture_items,
-    mock_genai_client,
-    stream_gen_ai_spans,
+    sentry_init, capture_events, mock_genai_client
 ):
     """Test generate_content with function_response (tool result)."""
     sentry_init(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
+    events = capture_events()
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
 
@@ -2583,36 +2555,18 @@ def test_generate_content_with_function_response(
         ),
     ]
 
-    if stream_gen_ai_spans:
-        items = capture_items("span")
-
-        with mock.patch.object(
-            mock_genai_client._api_client, "request", return_value=mock_http_response
-        ), start_transaction(name="google_genai"):
+    with mock.patch.object(
+        mock_genai_client._api_client, "request", return_value=mock_http_response
+    ):
+        with start_transaction(name="google_genai"):
             mock_genai_client.models.generate_content(
                 model="gemini-1.5-flash", contents=contents, config=create_test_config()
             )
 
-        invoke_span = next(item.payload for item in items if item.type == "span")
-
-        messages = json.loads(
-            invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-        )
-    else:
-        events = capture_events()
-
-        with mock.patch.object(
-            mock_genai_client._api_client, "request", return_value=mock_http_response
-        ), start_transaction(name="google_genai"):
-            mock_genai_client.models.generate_content(
-                model="gemini-1.5-flash", contents=contents, config=create_test_config()
-            )
-
-        (event,) = events
-        invoke_span = event["spans"][0]
-
-        messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    (event,) = events
+    invoke_span = event["spans"][0]
 
+    messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     assert len(messages) == 1
     # First message is user message
     assert messages[0]["role"] == "tool"
@@ -2621,21 +2575,16 @@ def test_generate_content_with_function_response(
     assert messages[0]["content"]["output"] == "Sunny, 72F"
 
 
-@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 def test_generate_content_with_mixed_string_and_content(
-    sentry_init,
-    capture_events,
-    capture_items,
-    mock_genai_client,
-    stream_gen_ai_spans,
+    sentry_init, capture_events, mock_genai_client
 ):
     """Test generate_content with mixed string and Content objects in list."""
     sentry_init(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
+    events = capture_events()
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
 
@@ -2652,36 +2601,18 @@ def test_generate_content_with_mixed_string_and_content(
         ),
     ]
 
-    if stream_gen_ai_spans:
-        items = capture_items("span")
-
-        with mock.patch.object(
-            mock_genai_client._api_client, "request", return_value=mock_http_response
-        ), start_transaction(name="google_genai"):
+    with mock.patch.object(
+        mock_genai_client._api_client, "request", return_value=mock_http_response
+    ):
+        with start_transaction(name="google_genai"):
             mock_genai_client.models.generate_content(
                 model="gemini-1.5-flash", contents=contents, config=create_test_config()
             )
 
-        invoke_span = next(item.payload for item in items if item.type == "span")
-
-        messages = json.loads(
-            invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-        )
-    else:
-        events = capture_events()
-
-        with mock.patch.object(
-            mock_genai_client._api_client, "request", return_value=mock_http_response
-        ), start_transaction(name="google_genai"):
-            mock_genai_client.models.generate_content(
-                model="gemini-1.5-flash", contents=contents, config=create_test_config()
-            )
-
-        (event,) = events
-        invoke_span = event["spans"][0]
-
-        messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    (event,) = events
+    invoke_span = event["spans"][0]
 
+    messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     assert len(messages) == 1
     # User message
     assert messages[0]["role"] == "user"
@@ -2744,13 +2675,8 @@ def test_generate_content_with_part_object_directly(
     assert messages[0]["content"] == [{"text": "Direct Part object", "type": "text"}]
 
 
-@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 def test_generate_content_with_list_of_dicts(
-    sentry_init,
-    capture_events,
-    capture_items,
-    mock_genai_client,
-    stream_gen_ai_spans,
+    sentry_init, capture_events, mock_genai_client
 ):
     """
     Test generate_content with list of dict format inputs.
@@ -2763,8 +2689,8 @@ def test_generate_content_with_list_of_dicts(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
+    events = capture_events()
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
 
@@ -2775,36 +2701,18 @@ def test_generate_content_with_list_of_dicts(
         {"role": "user", "parts": [{"text": "Second user message"}]},
     ]
 
-    if stream_gen_ai_spans:
-        items = capture_items("span")
-
-        with mock.patch.object(
-            mock_genai_client._api_client, "request", return_value=mock_http_response
-        ), start_transaction(name="google_genai"):
+    with mock.patch.object(
+        mock_genai_client._api_client, "request", return_value=mock_http_response
+    ):
+        with start_transaction(name="google_genai"):
             mock_genai_client.models.generate_content(
                 model="gemini-1.5-flash", contents=contents, config=create_test_config()
             )
 
-        invoke_span = next(item.payload for item in items if item.type == "span")
-
-        messages = json.loads(
-            invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-        )
-    else:
-        events = capture_events()
-
-        with mock.patch.object(
-            mock_genai_client._api_client, "request", return_value=mock_http_response
-        ), start_transaction(name="google_genai"):
-            mock_genai_client.models.generate_content(
-                model="gemini-1.5-flash", contents=contents, config=create_test_config()
-            )
-
-        (event,) = events
-        invoke_span = event["spans"][0]
-
-        messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    (event,) = events
+    invoke_span = event["spans"][0]
 
+    messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     assert len(messages) == 1
     assert messages[0]["role"] == "user"
     assert messages[0]["content"] == [{"text": "Second user message", "type": "text"}]
diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py
index 7adb2d13c5..414eb67b3e 100644
--- a/tests/integrations/langchain/test_langchain.py
+++ b/tests/integrations/langchain/test_langchain.py
@@ -2949,13 +2949,7 @@ def test_langchain_message_role_normalization_units():
     assert normalized[5] == "string message"  # String message unchanged
 
 
-@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
-def test_langchain_message_truncation(
-    sentry_init,
-    capture_events,
-    capture_items,
-    stream_gen_ai_spans,
-):
+def test_langchain_message_truncation(sentry_init, capture_events):
     """Test that large messages are truncated properly in Langchain integration."""
     from langchain_core.outputs import LLMResult, Generation
 
@@ -2963,8 +2957,8 @@ def test_langchain_message_truncation(
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
+    events = capture_events()
 
     callback = SentryLangchainCallback(max_span_map_size=100, include_prompts=True)
 
@@ -2982,101 +2976,48 @@ def test_langchain_message_truncation(
         "small message 5",
     ]
 
-    if stream_gen_ai_spans:
-        items = capture_items("transaction", "span")
-
-        with start_transaction():
-            callback.on_llm_start(
-                serialized=serialized,
-                prompts=prompts,
-                run_id=run_id,
-                name="my_pipeline",
-                invocation_params={
-                    "temperature": 0.7,
-                    "max_tokens": 100,
-                    "model": "gpt-3.5-turbo",
-                },
-            )
-
-            response = LLMResult(
-                generations=[[Generation(text="The response")]],
-                llm_output={
-                    "token_usage": {
-                        "total_tokens": 25,
-                        "prompt_tokens": 10,
-                        "completion_tokens": 15,
-                    }
-                },
-            )
-            callback.on_llm_end(response=response, run_id=run_id)
-
-        tx = next(item.payload for item in items if item.type == "transaction")
-        assert tx["type"] == "transaction"
-
-        spans = [item.payload for item in items if item.type == "span"]
-        llm_spans = [
-            span
-            for span in spans
-            if span["attributes"].get("sentry.op") == "gen_ai.text_completion"
-        ]
-
-        assert len(llm_spans) > 0
-
-        llm_span = llm_spans[0]
-
-        assert llm_span["attributes"]["gen_ai.operation.name"] == "text_completion"
-        assert llm_span["attributes"][SPANDATA.GEN_AI_FUNCTION_ID] == "my_pipeline"
-
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in llm_span["attributes"]
-        messages_data = llm_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-    else:
-        events = capture_events()
-
-        with start_transaction():
-            callback.on_llm_start(
-                serialized=serialized,
-                prompts=prompts,
-                run_id=run_id,
-                name="my_pipeline",
-                invocation_params={
-                    "temperature": 0.7,
-                    "max_tokens": 100,
-                    "model": "gpt-3.5-turbo",
-                },
-            )
-
-            response = LLMResult(
-                generations=[[Generation(text="The response")]],
-                llm_output={
-                    "token_usage": {
-                        "total_tokens": 25,
-                        "prompt_tokens": 10,
-                        "completion_tokens": 15,
-                    }
-                },
-            )
-            callback.on_llm_end(response=response, run_id=run_id)
-
-        assert len(events) > 0
-        tx = events[0]
-        assert tx["type"] == "transaction"
-
-        llm_spans = [
-            span
-            for span in tx.get("spans", [])
-            if span.get("op") == "gen_ai.text_completion"
-        ]
+    with start_transaction():
+        callback.on_llm_start(
+            serialized=serialized,
+            prompts=prompts,
+            run_id=run_id,
+            name="my_pipeline",
+            invocation_params={
+                "temperature": 0.7,
+                "max_tokens": 100,
+                "model": "gpt-3.5-turbo",
+            },
+        )
 
-        assert len(llm_spans) > 0
+        response = LLMResult(
+            generations=[[Generation(text="The response")]],
+            llm_output={
+                "token_usage": {
+                    "total_tokens": 25,
+                    "prompt_tokens": 10,
+                    "completion_tokens": 15,
+                }
+            },
+        )
+        callback.on_llm_end(response=response, run_id=run_id)
 
-        llm_span = llm_spans[0]
+    assert len(events) > 0
+    tx = events[0]
+    assert tx["type"] == "transaction"
 
-        assert llm_span["data"]["gen_ai.operation.name"] == "text_completion"
-        assert llm_span["data"][SPANDATA.GEN_AI_FUNCTION_ID] == "my_pipeline"
+    llm_spans = [
+        span
+        for span in tx.get("spans", [])
+        if span.get("op") == "gen_ai.text_completion"
+    ]
+    assert len(llm_spans) > 0
 
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in llm_span["data"]
-        messages_data = llm_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+    llm_span = llm_spans[0]
+    assert llm_span["data"]["gen_ai.operation.name"] == "text_completion"
+    assert llm_span["data"][SPANDATA.GEN_AI_FUNCTION_ID] == "my_pipeline"
 
+    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in llm_span["data"]
+    messages_data = llm_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
     assert isinstance(messages_data, str)
 
     parsed_messages = json.loads(messages_data)
diff --git a/tests/integrations/langgraph/test_langgraph.py b/tests/integrations/langgraph/test_langgraph.py
index 991c1f2269..c1e753716d 100644
--- a/tests/integrations/langgraph/test_langgraph.py
+++ b/tests/integrations/langgraph/test_langgraph.py
@@ -242,7 +242,6 @@ def original_compile(self, *args, **kwargs):
         assert "calculator" in tools_data
 
 
-@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.parametrize(
     "send_default_pii, include_prompts",
     [
@@ -252,21 +251,14 @@ def original_compile(self, *args, **kwargs):
         (False, False),
     ],
 )
-def test_pregel_invoke(
-    sentry_init,
-    capture_events,
-    capture_items,
-    send_default_pii,
-    include_prompts,
-    stream_gen_ai_spans,
-):
+def test_pregel_invoke(sentry_init, capture_events, send_default_pii, include_prompts):
     """Test Pregel.invoke() wrapper creates proper invoke_agent span."""
     sentry_init(
         integrations=[LanggraphIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
+    events = capture_events()
 
     test_state = {
         "messages": [
@@ -297,134 +289,57 @@ def original_invoke(self, *args, **kwargs):
         ]
         return {"messages": new_messages}
 
-    if stream_gen_ai_spans:
-        items = capture_items("transaction", "span")
-
-        with start_transaction():
-            wrapped_invoke = _wrap_pregel_invoke(original_invoke)
-            result = wrapped_invoke(pregel, test_state)
-
-        assert result is not None
+    with start_transaction():
+        wrapped_invoke = _wrap_pregel_invoke(original_invoke)
+        result = wrapped_invoke(pregel, test_state)
 
-        spans = [item.payload for item in items if item.type == "span"]
-        invoke_spans = [
-            span
-            for span in spans
-            if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
-        ]
+    assert result is not None
 
-        assert len(invoke_spans) == 1
+    tx = events[0]
+    assert tx["type"] == "transaction"
 
-        invoke_span = invoke_spans[0]
+    invoke_spans = [
+        span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT
+    ]
+    assert len(invoke_spans) == 1
 
-        assert invoke_span["name"] == "invoke_agent test_graph"
-        assert invoke_span["attributes"]["sentry.origin"] == "auto.ai.langgraph"
-        assert (
-            invoke_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "invoke_agent"
-        )
-        assert invoke_span["attributes"][SPANDATA.GEN_AI_PIPELINE_NAME] == "test_graph"
-        assert invoke_span["attributes"][SPANDATA.GEN_AI_AGENT_NAME] == "test_graph"
+    invoke_span = invoke_spans[0]
+    assert invoke_span["description"] == "invoke_agent test_graph"
+    assert invoke_span["origin"] == "auto.ai.langgraph"
+    assert invoke_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "invoke_agent"
+    assert invoke_span["data"][SPANDATA.GEN_AI_PIPELINE_NAME] == "test_graph"
+    assert invoke_span["data"][SPANDATA.GEN_AI_AGENT_NAME] == "test_graph"
 
-        if send_default_pii and include_prompts:
-            assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["attributes"]
-            assert SPANDATA.GEN_AI_RESPONSE_TEXT in invoke_span["attributes"]
+    if send_default_pii and include_prompts:
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["data"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT in invoke_span["data"]
 
-            request_messages = invoke_span["attributes"][
-                SPANDATA.GEN_AI_REQUEST_MESSAGES
-            ]
+        request_messages = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
 
-            if isinstance(request_messages, str):
-                import json
+        if isinstance(request_messages, str):
+            import json
 
-                request_messages = json.loads(request_messages)
-            assert len(request_messages) == 1
-            assert request_messages[0]["content"] == "Of course! How can I assist you?"
+            request_messages = json.loads(request_messages)
+        assert len(request_messages) == 1
+        assert request_messages[0]["content"] == "Of course! How can I assist you?"
 
-            response_text = invoke_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
-            assert response_text == expected_assistant_response
-
-            assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in invoke_span["attributes"]
-            tool_calls_data = invoke_span["attributes"][
-                SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS
-            ]
+        response_text = invoke_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+        assert response_text == expected_assistant_response
 
-            if isinstance(tool_calls_data, str):
-                import json
+        assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in invoke_span["data"]
+        tool_calls_data = invoke_span["data"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS]
+        if isinstance(tool_calls_data, str):
+            import json
 
-                tool_calls_data = json.loads(tool_calls_data)
+            tool_calls_data = json.loads(tool_calls_data)
 
-            assert len(tool_calls_data) == 1
-            assert tool_calls_data[0]["id"] == "call_test_123"
-            assert tool_calls_data[0]["function"]["name"] == "search_tool"
-        else:
-            assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in invoke_span.get(
-                "attributes", {}
-            )
-            assert SPANDATA.GEN_AI_RESPONSE_TEXT not in invoke_span.get(
-                "attributes", {}
-            )
-            assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in invoke_span.get(
-                "attributes", {}
-            )
+        assert len(tool_calls_data) == 1
+        assert tool_calls_data[0]["id"] == "call_test_123"
+        assert tool_calls_data[0]["function"]["name"] == "search_tool"
     else:
-        events = capture_events()
-
-        with start_transaction():
-            wrapped_invoke = _wrap_pregel_invoke(original_invoke)
-            result = wrapped_invoke(pregel, test_state)
-
-        assert result is not None
-
-        tx = events[0]
-        assert tx["type"] == "transaction"
-
-        invoke_spans = [
-            span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT
-        ]
-
-        assert len(invoke_spans) == 1
-
-        invoke_span = invoke_spans[0]
-
-        assert invoke_span["description"] == "invoke_agent test_graph"
-        assert invoke_span["origin"] == "auto.ai.langgraph"
-        assert invoke_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "invoke_agent"
-        assert invoke_span["data"][SPANDATA.GEN_AI_PIPELINE_NAME] == "test_graph"
-        assert invoke_span["data"][SPANDATA.GEN_AI_AGENT_NAME] == "test_graph"
-
-        if send_default_pii and include_prompts:
-            assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["data"]
-            assert SPANDATA.GEN_AI_RESPONSE_TEXT in invoke_span["data"]
-
-            request_messages = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-
-            if isinstance(request_messages, str):
-                import json
-
-                request_messages = json.loads(request_messages)
-            assert len(request_messages) == 1
-            assert request_messages[0]["content"] == "Of course! How can I assist you?"
-
-            response_text = invoke_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
-            assert response_text == expected_assistant_response
-
-            assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in invoke_span["data"]
-            tool_calls_data = invoke_span["data"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS]
-
-            if isinstance(tool_calls_data, str):
-                import json
-
-                tool_calls_data = json.loads(tool_calls_data)
-
-            assert len(tool_calls_data) == 1
-            assert tool_calls_data[0]["id"] == "call_test_123"
-            assert tool_calls_data[0]["function"]["name"] == "search_tool"
-        else:
-            assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in invoke_span.get("data", {})
-            assert SPANDATA.GEN_AI_RESPONSE_TEXT not in invoke_span.get("data", {})
-            assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in invoke_span.get(
-                "data", {}
-            )
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in invoke_span.get("data", {})
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in invoke_span.get("data", {})
+        assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in invoke_span.get("data", {})
 
 
 @pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
@@ -2022,13 +1937,7 @@ def __init__(self, content, message_type="human"):
     assert "ai" not in roles
 
 
-@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
-def test_langgraph_message_truncation(
-    sentry_init,
-    capture_events,
-    capture_items,
-    stream_gen_ai_spans,
-):
+def test_langgraph_message_truncation(sentry_init, capture_events):
     """Test that large messages are truncated properly in Langgraph integration."""
     import json
 
@@ -2036,8 +1945,8 @@ def test_langgraph_message_truncation(
         integrations=[LanggraphIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
+    events = capture_events()
 
     large_content = (
         "This is a very long message that will exceed our size limits. " * 1000
@@ -2057,66 +1966,28 @@ def test_langgraph_message_truncation(
     def original_invoke(self, *args, **kwargs):
         return {"messages": args[0].get("messages", [])}
 
-    if stream_gen_ai_spans:
-        items = capture_items("transaction", "span")
-
-        with start_transaction():
-            wrapped_invoke = _wrap_pregel_invoke(original_invoke)
-            result = wrapped_invoke(pregel, test_state)
-
-        assert result is not None
-
-        spans = [item.payload for item in items if item.type == "span"]
-        invoke_spans = [
-            span
-            for span in spans
-            if span["attributes"].get("sentry.op") == OP.GEN_AI_INVOKE_AGENT
-        ]
-
-        assert len(invoke_spans) > 0
+    with start_transaction():
+        wrapped_invoke = _wrap_pregel_invoke(original_invoke)
+        result = wrapped_invoke(pregel, test_state)
 
-        invoke_span = invoke_spans[0]
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["attributes"]
-
-        messages_data = invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-
-        assert isinstance(messages_data, str)
-
-        parsed_messages = json.loads(messages_data)
-        assert isinstance(parsed_messages, list)
-        assert len(parsed_messages) == 1
-        assert "small message 5" in str(parsed_messages[0])
-        (tx,) = (item.payload for item in items if item.type == "transaction")
-    else:
-        events = capture_events()
-
-        with start_transaction():
-            wrapped_invoke = _wrap_pregel_invoke(original_invoke)
-            result = wrapped_invoke(pregel, test_state)
-
-        assert result is not None
-
-        assert len(events) > 0
-        tx = events[0]
-        assert tx["type"] == "transaction"
-
-        invoke_spans = [
-            span
-            for span in tx.get("spans", [])
-            if span.get("op") == OP.GEN_AI_INVOKE_AGENT
-        ]
-
-        assert len(invoke_spans) > 0
+    assert result is not None
+    assert len(events) > 0
+    tx = events[0]
+    assert tx["type"] == "transaction"
 
-        invoke_span = invoke_spans[0]
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["data"]
+    invoke_spans = [
+        span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_INVOKE_AGENT
+    ]
+    assert len(invoke_spans) > 0
 
-        messages_data = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-        assert isinstance(messages_data, str)
+    invoke_span = invoke_spans[0]
+    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["data"]
 
-        parsed_messages = json.loads(messages_data)
-        assert isinstance(parsed_messages, list)
-        assert len(parsed_messages) == 1
-        assert "small message 5" in str(parsed_messages[0])
+    messages_data = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+    assert isinstance(messages_data, str)
 
+    parsed_messages = json.loads(messages_data)
+    assert isinstance(parsed_messages, list)
+    assert len(parsed_messages) == 1
+    assert "small message 5" in str(parsed_messages[0])
     assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5
diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py
index 8ae8dca99e..404cdeb9c4 100644
--- a/tests/integrations/litellm/test_litellm.py
+++ b/tests/integrations/litellm/test_litellm.py
@@ -2341,20 +2341,14 @@ def test_integration_setup(sentry_init):
     assert _failure_callback in (litellm.failure_callback or [])
 
 
-@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
-def test_litellm_message_truncation(
-    sentry_init,
-    capture_events,
-    capture_items,
-    stream_gen_ai_spans,
-):
+def test_litellm_message_truncation(sentry_init, capture_events):
     """Test that large messages are truncated properly in LiteLLM integration."""
     sentry_init(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
+    events = capture_events()
 
     large_content = (
         "This is a very long message that will exceed our size limits. " * 1000
@@ -2368,79 +2362,39 @@ def test_litellm_message_truncation(
     ]
     mock_response = MockCompletionResponse()
 
-    if stream_gen_ai_spans:
-        items = capture_items("transaction", "span")
-
-        with start_transaction(name="litellm test"):
-            kwargs = {
-                "model": "gpt-3.5-turbo",
-                "messages": messages,
-            }
-
-            _input_callback(kwargs)
-            _success_callback(
-                kwargs,
-                mock_response,
-                datetime.now(),
-                datetime.now(),
-            )
-
-        spans = [item.payload for item in items if item.type == "span"]
-        chat_spans = [
-            span
-            for span in spans
-            if span["attributes"].get("sentry.op") == OP.GEN_AI_CHAT
-        ]
-
-        assert len(chat_spans) > 0
-
-        chat_span = chat_spans[0]
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["attributes"]
-
-        messages_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-        assert isinstance(messages_data, str)
-
-        parsed_messages = json.loads(messages_data)
-        assert isinstance(parsed_messages, list)
-        assert len(parsed_messages) == 1
-        assert "small message 5" in str(parsed_messages[0])
-        tx = next(item.payload for item in items if item.type == "transaction")
-    else:
-        events = capture_events()
-
-        with start_transaction(name="litellm test"):
-            kwargs = {
-                "model": "gpt-3.5-turbo",
-                "messages": messages,
-            }
+    with start_transaction(name="litellm test"):
+        kwargs = {
+            "model": "gpt-3.5-turbo",
+            "messages": messages,
+        }
 
-            _input_callback(kwargs)
-            _success_callback(
-                kwargs,
-                mock_response,
-                datetime.now(),
-                datetime.now(),
-            )
+        _input_callback(kwargs)
+        _success_callback(
+            kwargs,
+            mock_response,
+            datetime.now(),
+            datetime.now(),
+        )
 
-        assert len(events) > 0
-        tx = events[0]
-        assert tx["type"] == "transaction"
+    assert len(events) > 0
+    tx = events[0]
+    assert tx["type"] == "transaction"
 
-        chat_spans = [
-            span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT
-        ]
-        assert len(chat_spans) > 0
+    chat_spans = [
+        span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT
+    ]
+    assert len(chat_spans) > 0
 
-        chat_span = chat_spans[0]
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"]
+    chat_span = chat_spans[0]
+    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"]
 
-        messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-        assert isinstance(messages_data, str)
+    messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+    assert isinstance(messages_data, str)
 
-        parsed_messages = json.loads(messages_data)
-        assert isinstance(parsed_messages, list)
-        assert len(parsed_messages) == 1
-        assert "small message 5" in str(parsed_messages[0])
+    parsed_messages = json.loads(messages_data)
+    assert isinstance(parsed_messages, list)
+    assert len(parsed_messages) == 1
+    assert "small message 5" in str(parsed_messages[0])
     assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5
 
 
diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py
index 0da39e842d..56ac885619 100644
--- a/tests/integrations/openai/test_openai.py
+++ b/tests/integrations/openai/test_openai.py
@@ -5720,21 +5720,16 @@ def test_openai_message_role_mapping(
     assert stored_messages[0]["role"] == expected_role
 
 
-@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 def test_openai_message_truncation(
-    sentry_init,
-    capture_events,
-    capture_items,
-    nonstreaming_chat_completions_model_response,
-    stream_gen_ai_spans,
+    sentry_init, capture_events, nonstreaming_chat_completions_model_response
 ):
     """Test that large messages are truncated properly in OpenAI integration."""
     sentry_init(
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
     )
+    events = capture_events()
 
     client = OpenAI(api_key="z")
     client.chat.completions._post = mock.Mock(
@@ -5761,47 +5756,22 @@ def test_openai_message_truncation(
         {"role": "user", "content": large_content},
     ]
 
-    if stream_gen_ai_spans:
-        items = capture_items("transaction", "span")
-
-        with start_transaction(name="openai tx"):
-            client.chat.completions.create(
-                model="some-model",
-                messages=large_messages,
-            )
-
-        span = next(item.payload for item in items if item.type == "span")
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]
-
-        messages_data = span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-
-        assert isinstance(messages_data, str)
-
-        parsed_messages = json.loads(messages_data)
-        assert isinstance(parsed_messages, list)
-        assert len(parsed_messages) <= len(large_messages)
-
-        (event,) = (item.payload for item in items if item.type == "transaction")
-    else:
-        events = capture_events()
-
-        with start_transaction(name="openai tx"):
-            client.chat.completions.create(
-                model="some-model",
-                messages=large_messages,
-            )
-
-        (event,) = events
-        span = event["spans"][0]
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]
+    with start_transaction(name="openai tx"):
+        client.chat.completions.create(
+            model="some-model",
+            messages=large_messages,
+        )
 
-        messages_data = span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+    (event,) = events
+    span = event["spans"][0]
+    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]
 
-        assert isinstance(messages_data, str)
+    messages_data = span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+    assert isinstance(messages_data, str)
 
-        parsed_messages = json.loads(messages_data)
-        assert isinstance(parsed_messages, list)
-        assert len(parsed_messages) <= len(large_messages)
+    parsed_messages = json.loads(messages_data)
+    assert isinstance(parsed_messages, list)
+    assert len(parsed_messages) <= len(large_messages)
 
     meta_path = event["_meta"]
     span_meta = meta_path["spans"]["0"]["data"]
diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py
index 60f88cd7f4..f15bac5c64 100644
--- a/tests/integrations/openai_agents/test_openai_agents.py
+++ b/tests/integrations/openai_agents/test_openai_agents.py
@@ -1679,16 +1679,13 @@ async def test_max_turns_before_handoff_span(
         assert handoff_span["data"]["gen_ai.operation.name"] == "handoff"
 
 
-@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
 async def test_tool_execution_span(
     sentry_init,
     capture_events,
-    capture_items,
     test_agent,
     get_model_response,
     responses_tool_call_model_responses,
-    stream_gen_ai_spans,
 ):
     """
     Test tool execution span creation.
@@ -1746,413 +1743,195 @@ def simple_test_tool(message: str) -> str:
         serialize_pydantic=True,
     )
 
-    if stream_gen_ai_spans:
-        with patch.object(
-            agent_with_tool.model._client._client,
-            "send",
-            side_effect=[tool_response, final_response],
-        ) as _:
-            sentry_init(
-                integrations=[OpenAIAgentsIntegration()],
-                traces_sample_rate=1.0,
-                send_default_pii=True,
-                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
-            )
-
-            items = capture_items("transaction", "span")
-
-            await agents.Runner.run(
-                agent_with_tool,
-                "Please use the simple test tool",
-                run_config=test_run_config,
-            )
-
-        (transaction,) = (item.payload for item in items if item.type == "transaction")
-        assert transaction["transaction"] == "test_agent workflow"
-        assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
-
-        spans = [item.payload for item in items if item.type == "span"]
-        agent_span = next(
-            span
-            for span in spans
-            if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
-        )
-        ai_client_span1, ai_client_span2 = (
-            span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
-        )
-        tool_span = next(
-            span
-            for span in spans
-            if span["attributes"]["sentry.op"] == OP.GEN_AI_EXECUTE_TOOL
+    with patch.object(
+        agent_with_tool.model._client._client,
+        "send",
+        side_effect=[tool_response, final_response],
+    ) as _:
+        sentry_init(
+            integrations=[OpenAIAgentsIntegration()],
+            traces_sample_rate=1.0,
+            send_default_pii=True,
         )
 
-        available_tool = {
-            "name": "simple_test_tool",
-            "description": "A simple tool",
-            "params_json_schema": {
-                "properties": {"message": {"title": "Message", "type": "string"}},
-                "required": ["message"],
-                "title": "simple_test_tool_args",
-                "type": "object",
-                "additionalProperties": False,
-            },
-            "on_invoke_tool": mock.ANY,
-            "strict_json_schema": True,
-            "is_enabled": True,
-        }
-
-        if parse_version(OPENAI_AGENTS_VERSION) >= (0, 3, 3):
-            available_tool.update(
-                {"tool_input_guardrails": None, "tool_output_guardrails": None}
-            )
-
-        if parse_version(OPENAI_AGENTS_VERSION) >= (
-            0,
-            8,
-        ):
-            available_tool["needs_approval"] = False
-        if parse_version(OPENAI_AGENTS_VERSION) >= (
-            0,
-            9,
-            0,
-        ):
-            available_tool.update(
-                {
-                    "timeout_seconds": None,
-                    "timeout_behavior": "error_as_result",
-                    "timeout_error_function": None,
-                }
-            )
-
-        assert agent_span["name"] == "invoke_agent test_agent"
-        assert agent_span["attributes"]["sentry.origin"] == "auto.ai.openai_agents"
-        assert agent_span["attributes"]["gen_ai.agent.name"] == "test_agent"
-        assert agent_span["attributes"]["gen_ai.operation.name"] == "invoke_agent"
-
-        agent_span_available_tool = json.loads(
-            agent_span["attributes"]["gen_ai.request.available_tools"]
-        )[0]
-
-        assert all(agent_span_available_tool[k] == v for k, v in available_tool.items())
-
-        assert agent_span["attributes"]["gen_ai.request.max_tokens"] == 100
-        assert agent_span["attributes"]["gen_ai.request.model"] == "gpt-4"
-        assert agent_span["attributes"]["gen_ai.request.temperature"] == 0.7
-        assert agent_span["attributes"]["gen_ai.request.top_p"] == 1.0
-        assert agent_span["attributes"]["gen_ai.system"] == "openai"
-
-        assert ai_client_span1["name"] == "chat gpt-4"
-        assert ai_client_span1["attributes"]["gen_ai.operation.name"] == "chat"
-        assert ai_client_span1["attributes"]["gen_ai.system"] == "openai"
-        assert ai_client_span1["attributes"]["gen_ai.agent.name"] == "test_agent"
-
-        ai_client_span1_available_tool = json.loads(
-            ai_client_span1["attributes"]["gen_ai.request.available_tools"]
-        )[0]
+        events = capture_events()
 
-        assert all(
-            ai_client_span1_available_tool[k] == v for k, v in available_tool.items()
-        )
-
-        assert ai_client_span1["attributes"]["gen_ai.request.max_tokens"] == 100
-        assert ai_client_span1["attributes"][
-            "gen_ai.request.messages"
-        ] == safe_serialize(
-            [
-                {
-                    "role": "user",
-                    "content": [
-                        {"type": "text", "text": "Please use the simple test tool"}
-                    ],
-                },
-            ]
-        )
-        assert ai_client_span1["attributes"]["gen_ai.request.model"] == "gpt-4"
-        assert ai_client_span1["attributes"]["gen_ai.request.temperature"] == 0.7
-        assert ai_client_span1["attributes"]["gen_ai.request.top_p"] == 1.0
-        assert ai_client_span1["attributes"]["gen_ai.usage.input_tokens"] == 10
-        assert ai_client_span1["attributes"]["gen_ai.usage.input_tokens.cached"] == 0
-        assert ai_client_span1["attributes"]["gen_ai.usage.output_tokens"] == 5
-        assert (
-            ai_client_span1["attributes"]["gen_ai.usage.output_tokens.reasoning"] == 0
-        )
-        assert ai_client_span1["attributes"]["gen_ai.usage.total_tokens"] == 15
-
-        tool_call = {
-            "arguments": '{"message": "hello"}',
-            "call_id": "call_123",
-            "name": "simple_test_tool",
-            "type": "function_call",
-            "id": "call_123",
-            "status": None,
-        }
-
-        if OPENAI_VERSION >= (2, 25, 0):
-            tool_call["namespace"] = None
-
-        assert json.loads(
-            ai_client_span1["attributes"]["gen_ai.response.tool_calls"]
-        ) == [tool_call]
-
-        assert tool_span["name"] == "execute_tool simple_test_tool"
-        assert tool_span["attributes"]["gen_ai.agent.name"] == "test_agent"
-        assert tool_span["attributes"]["gen_ai.operation.name"] == "execute_tool"
-
-        tool_span_available_tool = json.loads(
-            tool_span["attributes"]["gen_ai.request.available_tools"]
-        )[0]
-
-        assert all(tool_span_available_tool[k] == v for k, v in available_tool.items())
-
-        assert tool_span["attributes"]["gen_ai.request.max_tokens"] == 100
-        assert tool_span["attributes"]["gen_ai.request.model"] == "gpt-4"
-        assert tool_span["attributes"]["gen_ai.request.temperature"] == 0.7
-        assert tool_span["attributes"]["gen_ai.request.top_p"] == 1.0
-        assert tool_span["attributes"]["gen_ai.system"] == "openai"
-        assert tool_span["attributes"]["gen_ai.tool.description"] == "A simple tool"
-        assert tool_span["attributes"]["gen_ai.tool.input"] == '{"message": "hello"}'
-        assert tool_span["attributes"]["gen_ai.tool.name"] == "simple_test_tool"
-        assert (
-            tool_span["attributes"]["gen_ai.tool.output"] == "Tool executed with: hello"
+        await agents.Runner.run(
+            agent_with_tool,
+            "Please use the simple test tool",
+            run_config=test_run_config,
         )
-        assert ai_client_span2["name"] == "chat gpt-4"
-        assert ai_client_span2["attributes"]["gen_ai.agent.name"] == "test_agent"
-        assert ai_client_span2["attributes"]["gen_ai.operation.name"] == "chat"
 
-        ai_client_span2_available_tool = json.loads(
-            ai_client_span2["attributes"]["gen_ai.request.available_tools"]
-        )[0]
-
-        assert all(
-            ai_client_span2_available_tool[k] == v for k, v in available_tool.items()
+    (transaction,) = events
+    spans = transaction["spans"]
+    agent_span = next(span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT)
+    ai_client_span1, ai_client_span2 = (
+        span for span in spans if span["op"] == OP.GEN_AI_CHAT
+    )
+    tool_span = next(span for span in spans if span["op"] == OP.GEN_AI_EXECUTE_TOOL)
+
+    available_tool = {
+        "name": "simple_test_tool",
+        "description": "A simple tool",
+        "params_json_schema": {
+            "properties": {"message": {"title": "Message", "type": "string"}},
+            "required": ["message"],
+            "title": "simple_test_tool_args",
+            "type": "object",
+            "additionalProperties": False,
+        },
+        "on_invoke_tool": mock.ANY,
+        "strict_json_schema": True,
+        "is_enabled": True,
+    }
+
+    if parse_version(OPENAI_AGENTS_VERSION) >= (0, 3, 3):
+        available_tool.update(
+            {"tool_input_guardrails": None, "tool_output_guardrails": None}
         )
 
-        assert ai_client_span2["attributes"]["gen_ai.request.max_tokens"] == 100
-        assert ai_client_span2["attributes"][
-            "gen_ai.request.messages"
-        ] == safe_serialize(
-            [
-                {
-                    "role": "tool",
-                    "content": [
-                        {
-                            "call_id": "call_123",
-                            "output": "Tool executed with: hello",
-                            "type": "function_call_output",
-                        }
-                    ],
-                },
-            ]
-        )
-        assert ai_client_span2["attributes"]["gen_ai.request.model"] == "gpt-4"
-        assert ai_client_span2["attributes"]["gen_ai.request.temperature"] == 0.7
-        assert ai_client_span2["attributes"]["gen_ai.request.top_p"] == 1.0
-        assert (
-            ai_client_span2["attributes"]["gen_ai.response.text"]
-            == "Task completed using the tool"
-        )
-        assert ai_client_span2["attributes"]["gen_ai.system"] == "openai"
-        assert ai_client_span2["attributes"]["gen_ai.usage.input_tokens.cached"] == 0
-        assert ai_client_span2["attributes"]["gen_ai.usage.input_tokens"] == 15
-        assert (
-            ai_client_span2["attributes"]["gen_ai.usage.output_tokens.reasoning"] == 0
+    if parse_version(OPENAI_AGENTS_VERSION) >= (
+        0,
+        8,
+    ):
+        available_tool["needs_approval"] = False
+    if parse_version(OPENAI_AGENTS_VERSION) >= (
+        0,
+        9,
+        0,
+    ):
+        available_tool.update(
+            {
+                "timeout_seconds": None,
+                "timeout_behavior": "error_as_result",
+                "timeout_error_function": None,
+            }
         )
-        assert ai_client_span2["attributes"]["gen_ai.usage.output_tokens"] == 10
-        assert ai_client_span2["attributes"]["gen_ai.usage.total_tokens"] == 25
 
-    else:
-        with patch.object(
-            agent_with_tool.model._client._client,
-            "send",
-            side_effect=[tool_response, final_response],
-        ) as _:
-            sentry_init(
-                integrations=[OpenAIAgentsIntegration()],
-                traces_sample_rate=1.0,
-                send_default_pii=True,
-                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
-            )
-
-            events = capture_events()
-
-            await agents.Runner.run(
-                agent_with_tool,
-                "Please use the simple test tool",
-                run_config=test_run_config,
-            )
+    assert transaction["transaction"] == "test_agent workflow"
+    assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
+
+    assert agent_span["description"] == "invoke_agent test_agent"
+    assert agent_span["origin"] == "auto.ai.openai_agents"
+    assert agent_span["data"]["gen_ai.agent.name"] == "test_agent"
+    assert agent_span["data"]["gen_ai.operation.name"] == "invoke_agent"
+
+    agent_span_available_tool = json.loads(
+        agent_span["data"]["gen_ai.request.available_tools"]
+    )[0]
+    assert all(agent_span_available_tool[k] == v for k, v in available_tool.items())
+
+    assert agent_span["data"]["gen_ai.request.max_tokens"] == 100
+    assert agent_span["data"]["gen_ai.request.model"] == "gpt-4"
+    assert agent_span["data"]["gen_ai.request.temperature"] == 0.7
+    assert agent_span["data"]["gen_ai.request.top_p"] == 1.0
+    assert agent_span["data"]["gen_ai.system"] == "openai"
+
+    assert ai_client_span1["description"] == "chat gpt-4"
+    assert ai_client_span1["data"]["gen_ai.operation.name"] == "chat"
+    assert ai_client_span1["data"]["gen_ai.system"] == "openai"
+    assert ai_client_span1["data"]["gen_ai.agent.name"] == "test_agent"
+
+    ai_client_span1_available_tool = json.loads(
+        ai_client_span1["data"]["gen_ai.request.available_tools"]
+    )[0]
+    assert all(
+        ai_client_span1_available_tool[k] == v for k, v in available_tool.items()
+    )
 
-        (transaction,) = events
-        spans = transaction["spans"]
-        agent_span = next(
-            span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT
-        )
-        ai_client_span1, ai_client_span2 = (
-            span for span in spans if span["op"] == OP.GEN_AI_CHAT
-        )
-        tool_span = next(span for span in spans if span["op"] == OP.GEN_AI_EXECUTE_TOOL)
-
-        available_tool = {
-            "name": "simple_test_tool",
-            "description": "A simple tool",
-            "params_json_schema": {
-                "properties": {"message": {"title": "Message", "type": "string"}},
-                "required": ["message"],
-                "title": "simple_test_tool_args",
-                "type": "object",
-                "additionalProperties": False,
+    assert ai_client_span1["data"]["gen_ai.request.max_tokens"] == 100
+    assert ai_client_span1["data"]["gen_ai.request.messages"] == safe_serialize(
+        [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": "Please use the simple test tool"}
+                ],
             },
-            "on_invoke_tool": mock.ANY,
-            "strict_json_schema": True,
-            "is_enabled": True,
-        }
-
-        if parse_version(OPENAI_AGENTS_VERSION) >= (0, 3, 3):
-            available_tool.update(
-                {"tool_input_guardrails": None, "tool_output_guardrails": None}
-            )
-
-        if parse_version(OPENAI_AGENTS_VERSION) >= (
-            0,
-            8,
-        ):
-            available_tool["needs_approval"] = False
-        if parse_version(OPENAI_AGENTS_VERSION) >= (
-            0,
-            9,
-            0,
-        ):
-            available_tool.update(
-                {
-                    "timeout_seconds": None,
-                    "timeout_behavior": "error_as_result",
-                    "timeout_error_function": None,
-                }
-            )
-
-        assert transaction["transaction"] == "test_agent workflow"
-        assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
-
-        assert agent_span["description"] == "invoke_agent test_agent"
-        assert agent_span["origin"] == "auto.ai.openai_agents"
-        assert agent_span["data"]["gen_ai.agent.name"] == "test_agent"
-        assert agent_span["data"]["gen_ai.operation.name"] == "invoke_agent"
-
-        agent_span_available_tool = json.loads(
-            agent_span["data"]["gen_ai.request.available_tools"]
-        )[0]
-        assert all(agent_span_available_tool[k] == v for k, v in available_tool.items())
-
-        assert agent_span["data"]["gen_ai.request.max_tokens"] == 100
-        assert agent_span["data"]["gen_ai.request.model"] == "gpt-4"
-        assert agent_span["data"]["gen_ai.request.temperature"] == 0.7
-        assert agent_span["data"]["gen_ai.request.top_p"] == 1.0
-        assert agent_span["data"]["gen_ai.system"] == "openai"
-
-        assert ai_client_span1["description"] == "chat gpt-4"
-        assert ai_client_span1["data"]["gen_ai.operation.name"] == "chat"
-        assert ai_client_span1["data"]["gen_ai.system"] == "openai"
-        assert ai_client_span1["data"]["gen_ai.agent.name"] == "test_agent"
-
-        ai_client_span1_available_tool = json.loads(
-            ai_client_span1["data"]["gen_ai.request.available_tools"]
-        )[0]
-        assert all(
-            ai_client_span1_available_tool[k] == v for k, v in available_tool.items()
-        )
-
-        assert ai_client_span1["data"]["gen_ai.request.max_tokens"] == 100
-        assert ai_client_span1["data"]["gen_ai.request.messages"] == safe_serialize(
-            [
-                {
-                    "role": "user",
-                    "content": [
-                        {"type": "text", "text": "Please use the simple test tool"}
-                    ],
-                },
-            ]
-        )
-        assert ai_client_span1["data"]["gen_ai.request.model"] == "gpt-4"
-        assert ai_client_span1["data"]["gen_ai.request.temperature"] == 0.7
-        assert ai_client_span1["data"]["gen_ai.request.top_p"] == 1.0
-        assert ai_client_span1["data"]["gen_ai.usage.input_tokens"] == 10
-        assert ai_client_span1["data"]["gen_ai.usage.input_tokens.cached"] == 0
-        assert ai_client_span1["data"]["gen_ai.usage.output_tokens"] == 5
-        assert ai_client_span1["data"]["gen_ai.usage.output_tokens.reasoning"] == 0
-        assert ai_client_span1["data"]["gen_ai.usage.total_tokens"] == 15
-
-        tool_call = {
-            "arguments": '{"message": "hello"}',
-            "call_id": "call_123",
-            "name": "simple_test_tool",
-            "type": "function_call",
-            "id": "call_123",
-            "status": None,
-        }
-
-        if OPENAI_VERSION >= (2, 25, 0):
-            tool_call["namespace"] = None
-
-        assert json.loads(ai_client_span1["data"]["gen_ai.response.tool_calls"]) == [
-            tool_call
         ]
+    )
+    assert ai_client_span1["data"]["gen_ai.request.model"] == "gpt-4"
+    assert ai_client_span1["data"]["gen_ai.request.temperature"] == 0.7
+    assert ai_client_span1["data"]["gen_ai.request.top_p"] == 1.0
+    assert ai_client_span1["data"]["gen_ai.usage.input_tokens"] == 10
+    assert ai_client_span1["data"]["gen_ai.usage.input_tokens.cached"] == 0
+    assert ai_client_span1["data"]["gen_ai.usage.output_tokens"] == 5
+    assert ai_client_span1["data"]["gen_ai.usage.output_tokens.reasoning"] == 0
+    assert ai_client_span1["data"]["gen_ai.usage.total_tokens"] == 15
+
+    tool_call = {
+        "arguments": '{"message": "hello"}',
+        "call_id": "call_123",
+        "name": "simple_test_tool",
+        "type": "function_call",
+        "id": "call_123",
+        "status": None,
+    }
+
+    if OPENAI_VERSION >= (2, 25, 0):
+        tool_call["namespace"] = None
+
+    assert json.loads(ai_client_span1["data"]["gen_ai.response.tool_calls"]) == [
+        tool_call
+    ]
 
-        assert tool_span["description"] == "execute_tool simple_test_tool"
-        assert tool_span["data"]["gen_ai.agent.name"] == "test_agent"
-        assert tool_span["data"]["gen_ai.operation.name"] == "execute_tool"
-
-        tool_span_available_tool = json.loads(
-            tool_span["data"]["gen_ai.request.available_tools"]
-        )[0]
-        assert all(tool_span_available_tool[k] == v for k, v in available_tool.items())
-
-        assert tool_span["data"]["gen_ai.request.max_tokens"] == 100
-        assert tool_span["data"]["gen_ai.request.model"] == "gpt-4"
-        assert tool_span["data"]["gen_ai.request.temperature"] == 0.7
-        assert tool_span["data"]["gen_ai.request.top_p"] == 1.0
-        assert tool_span["data"]["gen_ai.system"] == "openai"
-        assert tool_span["data"]["gen_ai.tool.description"] == "A simple tool"
-        assert tool_span["data"]["gen_ai.tool.input"] == '{"message": "hello"}'
-        assert tool_span["data"]["gen_ai.tool.name"] == "simple_test_tool"
-        assert tool_span["data"]["gen_ai.tool.output"] == "Tool executed with: hello"
-        assert ai_client_span2["description"] == "chat gpt-4"
-        assert ai_client_span2["data"]["gen_ai.agent.name"] == "test_agent"
-        assert ai_client_span2["data"]["gen_ai.operation.name"] == "chat"
-
-        ai_client_span2_available_tool = json.loads(
-            ai_client_span2["data"]["gen_ai.request.available_tools"]
-        )[0]
-        assert all(
-            ai_client_span2_available_tool[k] == v for k, v in available_tool.items()
-        )
+    assert tool_span["description"] == "execute_tool simple_test_tool"
+    assert tool_span["data"]["gen_ai.agent.name"] == "test_agent"
+    assert tool_span["data"]["gen_ai.operation.name"] == "execute_tool"
+
+    tool_span_available_tool = json.loads(
+        tool_span["data"]["gen_ai.request.available_tools"]
+    )[0]
+    assert all(tool_span_available_tool[k] == v for k, v in available_tool.items())
+
+    assert tool_span["data"]["gen_ai.request.max_tokens"] == 100
+    assert tool_span["data"]["gen_ai.request.model"] == "gpt-4"
+    assert tool_span["data"]["gen_ai.request.temperature"] == 0.7
+    assert tool_span["data"]["gen_ai.request.top_p"] == 1.0
+    assert tool_span["data"]["gen_ai.system"] == "openai"
+    assert tool_span["data"]["gen_ai.tool.description"] == "A simple tool"
+    assert tool_span["data"]["gen_ai.tool.input"] == '{"message": "hello"}'
+    assert tool_span["data"]["gen_ai.tool.name"] == "simple_test_tool"
+    assert tool_span["data"]["gen_ai.tool.output"] == "Tool executed with: hello"
+    assert ai_client_span2["description"] == "chat gpt-4"
+    assert ai_client_span2["data"]["gen_ai.agent.name"] == "test_agent"
+    assert ai_client_span2["data"]["gen_ai.operation.name"] == "chat"
+
+    ai_client_span2_available_tool = json.loads(
+        ai_client_span2["data"]["gen_ai.request.available_tools"]
+    )[0]
+    assert all(
+        ai_client_span2_available_tool[k] == v for k, v in available_tool.items()
+    )
 
-        assert ai_client_span2["data"]["gen_ai.request.max_tokens"] == 100
-        assert ai_client_span2["data"]["gen_ai.request.messages"] == safe_serialize(
-            [
-                {
-                    "role": "tool",
-                    "content": [
-                        {
-                            "call_id": "call_123",
-                            "output": "Tool executed with: hello",
-                            "type": "function_call_output",
-                        }
-                    ],
-                },
-            ]
-        )
-        assert ai_client_span2["data"]["gen_ai.request.model"] == "gpt-4"
-        assert ai_client_span2["data"]["gen_ai.request.temperature"] == 0.7
-        assert ai_client_span2["data"]["gen_ai.request.top_p"] == 1.0
-        assert (
-            ai_client_span2["data"]["gen_ai.response.text"]
-            == "Task completed using the tool"
-        )
-        assert ai_client_span2["data"]["gen_ai.system"] == "openai"
-        assert ai_client_span2["data"]["gen_ai.usage.input_tokens.cached"] == 0
-        assert ai_client_span2["data"]["gen_ai.usage.input_tokens"] == 15
-        assert ai_client_span2["data"]["gen_ai.usage.output_tokens.reasoning"] == 0
-        assert ai_client_span2["data"]["gen_ai.usage.output_tokens"] == 10
-        assert ai_client_span2["data"]["gen_ai.usage.total_tokens"] == 25
+    assert ai_client_span2["data"]["gen_ai.request.max_tokens"] == 100
+    assert ai_client_span2["data"]["gen_ai.request.messages"] == safe_serialize(
+        [
+            {
+                "role": "tool",
+                "content": [
+                    {
+                        "call_id": "call_123",
+                        "output": "Tool executed with: hello",
+                        "type": "function_call_output",
+                    }
+                ],
+            },
+        ]
+    )
+    assert ai_client_span2["data"]["gen_ai.request.model"] == "gpt-4"
+    assert ai_client_span2["data"]["gen_ai.request.temperature"] == 0.7
+    assert ai_client_span2["data"]["gen_ai.request.top_p"] == 1.0
+    assert (
+        ai_client_span2["data"]["gen_ai.response.text"]
+        == "Task completed using the tool"
+    )
+    assert ai_client_span2["data"]["gen_ai.system"] == "openai"
+    assert ai_client_span2["data"]["gen_ai.usage.input_tokens.cached"] == 0
+    assert ai_client_span2["data"]["gen_ai.usage.input_tokens"] == 15
+    assert ai_client_span2["data"]["gen_ai.usage.output_tokens.reasoning"] == 0
+    assert ai_client_span2["data"]["gen_ai.usage.output_tokens"] == 10
+    assert ai_client_span2["data"]["gen_ai.usage.total_tokens"] == 25
 
 
 @pytest.mark.asyncio

From 5e8c254da212e907d24571911fe54dc9555b074d Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Tue, 12 May 2026 15:59:06 +0200
Subject: [PATCH 57/84] add pytest mark asyncio

---
 tests/integrations/anthropic/test_anthropic.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/integrations/anthropic/test_anthropic.py b/tests/integrations/anthropic/test_anthropic.py
index 02de047711..21e6c95100 100644
--- a/tests/integrations/anthropic/test_anthropic.py
+++ b/tests/integrations/anthropic/test_anthropic.py
@@ -3676,6 +3676,7 @@ def test_anthropic_message_truncation(sentry_init, capture_events):
     assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5
 
 
+@pytest.mark.asyncio
 async def test_anthropic_message_truncation_async(sentry_init, capture_events):
     """Test that large messages are truncated properly in Anthropic integration."""
     sentry_init(

From 449457b9239b9b15606af9389ca5069d2e9412f2 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Tue, 12 May 2026 16:17:39 +0200
Subject: [PATCH 58/84] do not leak new option and use event_opt

---
 sentry_sdk/client.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py
index 01b4fc8fb6..aa3a210596 100644
--- a/sentry_sdk/client.py
+++ b/sentry_sdk/client.py
@@ -1121,6 +1121,8 @@ def capture_event(
             envelope.add_profile(profile.to_json(event_opt, self.options))
 
         span_recorder_has_gen_ai_span = event.pop("_has_gen_ai_span", False)
+        if "_has_gen_ai_span" in event_opt:
+            del event_opt["_has_gen_ai_span"]
 
         if is_transaction and not span_recorder_has_gen_ai_span:
             envelope.add_transaction(event_opt)
@@ -1135,7 +1137,7 @@ def capture_event(
                 envelope.add_transaction(event_opt)
 
                 converted_gen_ai_spans = [
-                    _serialized_v1_span_to_serialized_v2_span(span, event)
+                    _serialized_v1_span_to_serialized_v2_span(span, event_opt)
                     for span in gen_ai_spans
                     if isinstance(span, dict)
                 ]

From 96f86e35d921014fcac81fefb3eaf9c15128137d Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Tue, 12 May 2026 16:19:36 +0200
Subject: [PATCH 59/84] send version field in json

---
 sentry_sdk/client.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py
index aa3a210596..e1da658efb 100644
--- a/sentry_sdk/client.py
+++ b/sentry_sdk/client.py
@@ -1150,7 +1150,10 @@ def capture_event(
                             "item_count": len(converted_gen_ai_spans),
                         },
                         payload=PayloadRef(
-                            json={"items": converted_gen_ai_spans},
+                            json={
+                                "version": 2,
+                                "items": converted_gen_ai_spans,
+                            },
                         ),
                     )
                 )

From aba2cf12b0df5c5c3e39267365dda4efd7c0f556 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Tue, 12 May 2026 16:31:45 +0200
Subject: [PATCH 60/84] fix op fallback

---
 sentry_sdk/client.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py
index e1da658efb..053c73085c 100644
--- a/sentry_sdk/client.py
+++ b/sentry_sdk/client.py
@@ -174,11 +174,12 @@ def _serialized_v1_span_to_serialized_v2_span(
         res["span_id"] = span["span_id"]
 
     if "description" in span:
-        res["name"] = span["description"]
-    elif (
-        "op" in span
-    ):  # fallback based on observed downstream fallback for transactions
-        res["name"] = span["op"]
+        description = span["description"]
+
+        if description is None and "op" in span:
+            res["name"] = span["op"]
+
+        res["name"] = description
 
     if "start_timestamp" in span:
         start_timestamp = None

From a48d7013654ecb015788e237e940cfd6aa1112c8 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Tue, 12 May 2026 16:35:27 +0200
Subject: [PATCH 61/84] fix logic

---
 sentry_sdk/client.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py
index 053c73085c..ca5cad4da4 100644
--- a/sentry_sdk/client.py
+++ b/sentry_sdk/client.py
@@ -177,7 +177,7 @@ def _serialized_v1_span_to_serialized_v2_span(
         description = span["description"]
 
         if description is None and "op" in span:
-            res["name"] = span["op"]
+            description = span["op"]
 
         res["name"] = description
 

From dcce855e4f19e77174c97045f2976901bb682c00 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Tue, 12 May 2026 16:44:56 +0200
Subject: [PATCH 62/84] simplify logic

---
 sentry_sdk/client.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py
index ca5cad4da4..d198b7f854 100644
--- a/sentry_sdk/client.py
+++ b/sentry_sdk/client.py
@@ -1083,6 +1083,8 @@ def capture_event(
         event_id = event.get("event_id")
         if event_id is None:
             event["event_id"] = event_id = uuid.uuid4().hex
+
+        span_recorder_has_gen_ai_span = event.pop("_has_gen_ai_span", False)
         event_opt = self._prepare_event(event, hint, scope)
         if event_opt is None:
             return None
@@ -1121,10 +1123,6 @@ def capture_event(
         if is_transaction and isinstance(profile, Profile):
             envelope.add_profile(profile.to_json(event_opt, self.options))
 
-        span_recorder_has_gen_ai_span = event.pop("_has_gen_ai_span", False)
-        if "_has_gen_ai_span" in event_opt:
-            del event_opt["_has_gen_ai_span"]
-
         if is_transaction and not span_recorder_has_gen_ai_span:
             envelope.add_transaction(event_opt)
         elif is_transaction:

From 43920b54de1a373d02d83ba3f5fac382e54c4cab Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Tue, 12 May 2026 19:56:04 +0200
Subject: [PATCH 63/84] promote to top level option

---
 .../integrations/anthropic/test_anthropic.py  | 104 +++++++++---------
 .../google_genai/test_google_genai.py         |  72 ++++++------
 .../huggingface_hub/test_huggingface_hub.py   |  16 +--
 .../integrations/langchain/test_langchain.py  |  54 ++++-----
 .../integrations/langgraph/test_langgraph.py  |  36 +++---
 tests/integrations/litellm/test_litellm.py    |  54 ++++-----
 tests/integrations/openai/test_openai.py      |  86 +++++++--------
 .../openai_agents/test_openai_agents.py       | 104 +++++++++---------
 .../pydantic_ai/test_pydantic_ai.py           |  80 +++++++-------
 9 files changed, 303 insertions(+), 303 deletions(-)

diff --git a/tests/integrations/anthropic/test_anthropic.py b/tests/integrations/anthropic/test_anthropic.py
index 2e240b9c8f..f9f6241997 100644
--- a/tests/integrations/anthropic/test_anthropic.py
+++ b/tests/integrations/anthropic/test_anthropic.py
@@ -103,7 +103,7 @@ def test_nonstreaming_create_message(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = Anthropic(api_key="z")
@@ -238,7 +238,7 @@ async def test_nonstreaming_create_message_async(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = AsyncAnthropic(api_key="z")
@@ -409,7 +409,7 @@ def test_streaming_create_message(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     messages = [
@@ -574,7 +574,7 @@ def test_streaming_create_message_close(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     messages = [
@@ -726,7 +726,7 @@ def test_streaming_create_message_api_error(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     messages = [
@@ -891,7 +891,7 @@ def test_stream_messages(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     messages = [
@@ -1055,7 +1055,7 @@ def test_stream_messages_close(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     messages = [
@@ -1215,7 +1215,7 @@ def test_stream_messages_api_error(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     messages = [
@@ -1386,7 +1386,7 @@ async def test_streaming_create_message_async(
         traces_sample_rate=1.0,
         default_integrations=False,
         send_default_pii=send_default_pii,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     messages = [
@@ -1555,7 +1555,7 @@ async def test_streaming_create_message_async_close(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     messages = [
@@ -1709,7 +1709,7 @@ async def test_streaming_create_message_async_api_error(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     messages = [
@@ -1879,7 +1879,7 @@ async def test_stream_message_async(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     messages = [
@@ -2044,7 +2044,7 @@ async def test_stream_messages_async_api_error(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     messages = [
@@ -2205,7 +2205,7 @@ async def test_stream_messages_async_close(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     messages = [
@@ -2417,7 +2417,7 @@ def test_streaming_create_message_with_input_json_delta(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     messages = [
@@ -2619,7 +2619,7 @@ def test_stream_messages_with_input_json_delta(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     messages = [
@@ -2827,7 +2827,7 @@ async def test_streaming_create_message_with_input_json_delta_async(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     messages = [
@@ -3037,7 +3037,7 @@ async def test_stream_message_with_input_json_delta_async(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     messages = [
@@ -3154,7 +3154,7 @@ def test_exception_message_create(
     sentry_init(
         integrations=[AnthropicIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = Anthropic(api_key="z")
@@ -3202,7 +3202,7 @@ def test_span_status_error(
     sentry_init(
         integrations=[AnthropicIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
     if stream_gen_ai_spans:
         items = capture_items("event", "span")
@@ -3264,7 +3264,7 @@ async def test_span_status_error_async(
     sentry_init(
         integrations=[AnthropicIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
     if stream_gen_ai_spans:
         items = capture_items("event", "span")
@@ -3326,7 +3326,7 @@ async def test_exception_message_create_async(
     sentry_init(
         integrations=[AnthropicIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = AsyncAnthropic(api_key="z")
@@ -3373,7 +3373,7 @@ def test_span_origin(
     sentry_init(
         integrations=[AnthropicIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = Anthropic(api_key="z")
@@ -3423,7 +3423,7 @@ async def test_span_origin_async(
     sentry_init(
         integrations=[AnthropicIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = AsyncAnthropic(api_key="z")
@@ -3561,7 +3561,7 @@ def test_anthropic_message_role_mapping(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = Anthropic(api_key="z")
@@ -3637,7 +3637,7 @@ def test_anthropic_message_truncation(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = Anthropic(api_key="z")
@@ -3729,7 +3729,7 @@ async def test_anthropic_message_truncation_async(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = AsyncAnthropic(api_key="z")
@@ -3837,7 +3837,7 @@ def test_nonstreaming_create_message_with_system_prompt(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = Anthropic(api_key="z")
@@ -3994,7 +3994,7 @@ async def test_nonstreaming_create_message_with_system_prompt_async(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = AsyncAnthropic(api_key="z")
@@ -4191,7 +4191,7 @@ def test_streaming_create_message_with_system_prompt(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     messages = [
@@ -4388,7 +4388,7 @@ def test_stream_messages_with_system_prompt(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     messages = [
@@ -4578,7 +4578,7 @@ async def test_stream_message_with_system_prompt_async(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     messages = [
@@ -4773,7 +4773,7 @@ async def test_streaming_create_message_with_system_prompt_async(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     messages = [
@@ -4917,7 +4917,7 @@ def test_system_prompt_with_complex_structure(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = Anthropic(api_key="z")
@@ -5215,7 +5215,7 @@ def test_message_with_base64_image(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = Anthropic(api_key="z")
@@ -5289,7 +5289,7 @@ def test_message_with_url_image(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = Anthropic(api_key="z")
@@ -5356,7 +5356,7 @@ def test_message_with_file_image(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = Anthropic(api_key="z")
@@ -5424,7 +5424,7 @@ def test_message_with_base64_pdf(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = Anthropic(api_key="z")
@@ -5492,7 +5492,7 @@ def test_message_with_url_pdf(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = Anthropic(api_key="z")
@@ -5559,7 +5559,7 @@ def test_message_with_file_document(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = Anthropic(api_key="z")
@@ -5627,7 +5627,7 @@ def test_message_with_mixed_content(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = Anthropic(api_key="z")
@@ -5733,7 +5733,7 @@ def test_message_with_multiple_images_different_formats(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = Anthropic(api_key="z")
@@ -5831,7 +5831,7 @@ def test_binary_content_not_stored_when_pii_disabled(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=False,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = Anthropic(api_key="z")
@@ -5891,7 +5891,7 @@ def test_binary_content_not_stored_when_prompts_disabled(
         integrations=[AnthropicIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = Anthropic(api_key="z")
@@ -5950,7 +5950,7 @@ def test_cache_tokens_nonstreaming(
     sentry_init(
         integrations=[AnthropicIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = Anthropic(api_key="z")
@@ -6028,7 +6028,7 @@ def test_input_tokens_include_cache_write_nonstreaming(
     sentry_init(
         integrations=[AnthropicIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = Anthropic(api_key="z")
@@ -6110,7 +6110,7 @@ def test_input_tokens_include_cache_read_nonstreaming(
     sentry_init(
         integrations=[AnthropicIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = Anthropic(api_key="z")
@@ -6216,7 +6216,7 @@ def test_input_tokens_include_cache_read_streaming(
     sentry_init(
         integrations=[AnthropicIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     if stream_gen_ai_spans:
@@ -6315,7 +6315,7 @@ def test_stream_messages_input_tokens_include_cache_read_streaming(
     sentry_init(
         integrations=[AnthropicIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     if stream_gen_ai_spans:
@@ -6382,7 +6382,7 @@ def test_input_tokens_unchanged_without_caching(
     sentry_init(
         integrations=[AnthropicIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = Anthropic(api_key="z")
@@ -6474,7 +6474,7 @@ def test_cache_tokens_streaming(
     sentry_init(
         integrations=[AnthropicIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     if stream_gen_ai_spans:
@@ -6568,7 +6568,7 @@ def test_stream_messages_cache_tokens(
     sentry_init(
         integrations=[AnthropicIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     if stream_gen_ai_spans:
diff --git a/tests/integrations/google_genai/test_google_genai.py b/tests/integrations/google_genai/test_google_genai.py
index 8da5e7ca22..94bfea91fd 100644
--- a/tests/integrations/google_genai/test_google_genai.py
+++ b/tests/integrations/google_genai/test_google_genai.py
@@ -137,7 +137,7 @@ def test_nonstreaming_generate_content(
         integrations=[GoogleGenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     # Mock the HTTP response at the _api_client.request() level
@@ -281,7 +281,7 @@ def test_generate_content_with_system_instruction(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
@@ -364,7 +364,7 @@ def test_generate_content_with_tools(
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     # Create a mock tool function
@@ -482,7 +482,7 @@ def test_tool_execution(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     # Create a mock tool function
@@ -548,7 +548,7 @@ def test_error_handling(
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
     if stream_gen_ai_spans:
         items = capture_items("event", "transaction")
@@ -604,7 +604,7 @@ def test_streaming_generate_content(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     # Create streaming chunks - simulating a multi-chunk response
@@ -797,7 +797,7 @@ def test_span_origin(
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
@@ -849,7 +849,7 @@ def test_response_without_usage_metadata(
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     # Response without usage metadata
@@ -917,7 +917,7 @@ def test_multiple_candidates(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     # Response with multiple candidates
@@ -1021,7 +1021,7 @@ def test_all_configuration_parameters(
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
@@ -1103,7 +1103,7 @@ def test_empty_response(
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     # Minimal response with empty candidates array
@@ -1158,7 +1158,7 @@ def test_response_with_different_id_fields(
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     # Response with response_id and model_version
@@ -1248,7 +1248,7 @@ def test_contents_as_none(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
@@ -1299,7 +1299,7 @@ def test_tool_calls_extraction(
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     # Response with function calls
@@ -1414,7 +1414,7 @@ def test_google_genai_message_truncation(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     large_content = (
@@ -1515,7 +1515,7 @@ def test_embed_content(
         integrations=[GoogleGenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     # Mock the HTTP response at the _api_client.request() level
@@ -1637,7 +1637,7 @@ def test_embed_content_string_input(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     # Mock response with single embedding
@@ -1717,7 +1717,7 @@ def test_embed_content_error_handling(
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
     if stream_gen_ai_spans:
         items = capture_items("transaction", "event")
@@ -1774,7 +1774,7 @@ def test_embed_content_without_statistics(
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     # Response without statistics (typical for older google-genai versions)
@@ -1839,7 +1839,7 @@ def test_embed_content_span_origin(
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     mock_http_response = create_mock_http_response(EXAMPLE_EMBED_RESPONSE_JSON)
@@ -1901,7 +1901,7 @@ async def test_async_embed_content(
         integrations=[GoogleGenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     # Mock the async HTTP response
@@ -2024,7 +2024,7 @@ async def test_async_embed_content_string_input(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     # Mock response with single embedding
@@ -2109,7 +2109,7 @@ async def test_async_embed_content_error_handling(
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     if stream_gen_ai_spans:
@@ -2168,7 +2168,7 @@ async def test_async_embed_content_without_statistics(
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     # Response without statistics (typical for older google-genai versions)
@@ -2238,7 +2238,7 @@ async def test_async_embed_content_span_origin(
     sentry_init(
         integrations=[GoogleGenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     mock_http_response = create_mock_http_response(EXAMPLE_EMBED_RESPONSE_JSON)
@@ -2296,7 +2296,7 @@ def test_generate_content_with_content_object(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
@@ -2356,7 +2356,7 @@ def test_generate_content_with_dict_format(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
@@ -2414,7 +2414,7 @@ def test_generate_content_with_file_data(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
@@ -2487,7 +2487,7 @@ def test_generate_content_with_inline_data(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
@@ -2556,7 +2556,7 @@ def test_generate_content_with_function_response(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
@@ -2634,7 +2634,7 @@ def test_generate_content_with_mixed_string_and_content(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
@@ -2701,7 +2701,7 @@ def test_generate_content_with_part_object_directly(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
@@ -2763,7 +2763,7 @@ def test_generate_content_with_list_of_dicts(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
@@ -2823,7 +2823,7 @@ def test_generate_content_with_dict_inline_data(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
@@ -2891,7 +2891,7 @@ def test_generate_content_without_parts_property_inline_data(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
@@ -2957,7 +2957,7 @@ def test_generate_content_without_parts_property_inline_data_and_binary_data_wit
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py
index 2d94082e7b..dc9d7925ff 100644
--- a/tests/integrations/huggingface_hub/test_huggingface_hub.py
+++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py
@@ -483,7 +483,7 @@ def test_text_generation(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
         integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)],
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = InferenceClient(model="test-model")
@@ -618,7 +618,7 @@ def test_text_generation_streaming(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
         integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)],
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = InferenceClient(model="test-model")
@@ -751,7 +751,7 @@ def test_chat_completion(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
         integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)],
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = get_hf_provider_inference_client()
@@ -888,7 +888,7 @@ def test_chat_completion_streaming(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
         integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)],
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = get_hf_provider_inference_client()
@@ -1024,7 +1024,7 @@ def test_chat_completion_api_error(
 ):
     sentry_init(
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = get_hf_provider_inference_client()
@@ -1140,7 +1140,7 @@ def test_span_status_error(
 
     sentry_init(
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     if stream_gen_ai_spans:
@@ -1210,7 +1210,7 @@ def test_chat_completion_with_tools(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
         integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)],
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = get_hf_provider_inference_client()
@@ -1366,7 +1366,7 @@ def test_chat_completion_streaming_with_tools(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
         integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)],
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = get_hf_provider_inference_client()
diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py
index 7adb2d13c5..e8d96d7eb1 100644
--- a/tests/integrations/langchain/test_langchain.py
+++ b/tests/integrations/langchain/test_langchain.py
@@ -273,7 +273,7 @@ def test_langchain_text_completion(
         ],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     model_response = get_model_response(
@@ -395,7 +395,7 @@ def test_langchain_chat_with_run_name(
         ],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     request_headers = {}
@@ -484,7 +484,7 @@ def test_langchain_tool_call_with_run_name(
         ],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
     if stream_gen_ai_spans:
         items = capture_items("span")
@@ -566,7 +566,7 @@ def test_langchain_create_agent(
         ],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     model_response = get_model_response(
@@ -766,7 +766,7 @@ def test_tool_execution_span(
         ],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     responses = responses_tool_call_model_responses(
@@ -1056,7 +1056,7 @@ def test_langchain_openai_tools_agent_no_prompts(
         ],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     prompt = ChatPromptTemplate.from_messages(
@@ -1328,7 +1328,7 @@ def test_langchain_openai_tools_agent(
         ],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     prompt = ChatPromptTemplate.from_messages(
@@ -1605,7 +1605,7 @@ def test_langchain_openai_tools_agent_with_config(
         ],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     prompt = ChatPromptTemplate.from_messages(
@@ -1726,7 +1726,7 @@ def test_langchain_openai_tools_agent_stream_no_prompts(
         ],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     prompt = ChatPromptTemplate.from_messages(
@@ -2000,7 +2000,7 @@ def test_langchain_openai_tools_agent_stream(
         ],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     prompt = ChatPromptTemplate.from_messages(
@@ -2293,7 +2293,7 @@ def test_langchain_openai_tools_agent_stream_with_config(
         ],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     prompt = ChatPromptTemplate.from_messages(
@@ -2400,7 +2400,7 @@ def test_langchain_error(
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     prompt = ChatPromptTemplate.from_messages(
@@ -2454,7 +2454,7 @@ def test_span_status_error(
     sentry_init(
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
     if stream_gen_ai_spans:
         items = capture_items("event", "transaction", "span")
@@ -2761,7 +2761,7 @@ def test_langchain_message_role_mapping(
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     prompt = ChatPromptTemplate.from_messages(
@@ -2963,7 +2963,7 @@ def test_langchain_message_truncation(
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     callback = SentryLangchainCallback(max_span_map_size=100, include_prompts=True)
@@ -3114,7 +3114,7 @@ def test_langchain_embeddings_sync(
         integrations=[LangchainIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
     if stream_gen_ai_spans:
         items = capture_items("span")
@@ -3262,7 +3262,7 @@ def test_langchain_embeddings_embed_query(
         integrations=[LangchainIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
     if stream_gen_ai_spans:
         items = capture_items("span")
@@ -3403,7 +3403,7 @@ async def test_langchain_embeddings_async(
         integrations=[LangchainIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     async def mock_aembed_documents(self, texts):
@@ -3554,7 +3554,7 @@ async def test_langchain_embeddings_aembed_query(
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     async def mock_aembed_query(self, text):
@@ -3671,7 +3671,7 @@ def test_langchain_embeddings_no_model_name(
     sentry_init(
         integrations=[LangchainIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
     if stream_gen_ai_spans:
         items = capture_items("span")
@@ -3772,7 +3772,7 @@ def test_langchain_embeddings_integration_disabled(
 
     sentry_init(
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     # Initialize without LangchainIntegration
@@ -3844,7 +3844,7 @@ def test_langchain_embeddings_multiple_providers(
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
     if stream_gen_ai_spans:
         items = capture_items("span")
@@ -3999,7 +3999,7 @@ def test_langchain_embeddings_multiple_calls(
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
     if stream_gen_ai_spans:
         items = capture_items("span")
@@ -4125,7 +4125,7 @@ def test_langchain_embeddings_span_hierarchy(
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
     if stream_gen_ai_spans:
         items = capture_items("transaction", "span")
@@ -4235,7 +4235,7 @@ def test_langchain_embeddings_with_list_and_string_inputs(
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
     if stream_gen_ai_spans:
         items = capture_items("span")
@@ -4359,7 +4359,7 @@ def test_langchain_response_model_extraction(
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     callback = SentryLangchainCallback(max_span_map_size=100, include_prompts=True)
@@ -4718,7 +4718,7 @@ def test_langchain_ai_system_detection(
     sentry_init(
         integrations=[LangchainIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     callback = SentryLangchainCallback(max_span_map_size=100, include_prompts=True)
diff --git a/tests/integrations/langgraph/test_langgraph.py b/tests/integrations/langgraph/test_langgraph.py
index 991c1f2269..6dd5c3cace 100644
--- a/tests/integrations/langgraph/test_langgraph.py
+++ b/tests/integrations/langgraph/test_langgraph.py
@@ -160,7 +160,7 @@ def test_state_graph_compile(
         integrations=[LanggraphIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     graph = MockStateGraph()
@@ -265,7 +265,7 @@ def test_pregel_invoke(
         integrations=[LanggraphIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     test_state = {
@@ -450,7 +450,7 @@ def test_pregel_ainvoke(
         integrations=[LanggraphIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     test_state = {"messages": [MockMessage("What's the weather like?", name="user")]}
@@ -598,7 +598,7 @@ def test_pregel_invoke_error(
         integrations=[LanggraphIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     test_state = {"messages": [MockMessage("This will fail")]}
@@ -660,7 +660,7 @@ def test_pregel_ainvoke_error(
         integrations=[LanggraphIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     test_state = {"messages": [MockMessage("This will fail async")]}
@@ -720,7 +720,7 @@ def test_span_origin(
     sentry_init(
         integrations=[LanggraphIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     graph = MockStateGraph()
@@ -773,7 +773,7 @@ def test_pregel_invoke_with_different_graph_names(
         integrations=[LanggraphIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     pregel = MockPregelInstance(graph_name) if graph_name else MockPregelInstance()
@@ -854,7 +854,7 @@ def test_pregel_invoke_span_includes_usage_data(
     sentry_init(
         integrations=[LanggraphIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     test_state = {
@@ -970,7 +970,7 @@ def test_pregel_ainvoke_span_includes_usage_data(
     sentry_init(
         integrations=[LanggraphIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     test_state = {
@@ -1086,7 +1086,7 @@ def test_pregel_invoke_multiple_llm_calls_aggregate_usage(
     sentry_init(
         integrations=[LanggraphIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     test_state = {
@@ -1201,7 +1201,7 @@ def test_pregel_ainvoke_multiple_llm_calls_aggregate_usage(
     sentry_init(
         integrations=[LanggraphIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     test_state = {
@@ -1316,7 +1316,7 @@ def test_pregel_invoke_span_includes_response_model(
     sentry_init(
         integrations=[LanggraphIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     test_state = {
@@ -1425,7 +1425,7 @@ def test_pregel_ainvoke_span_includes_response_model(
     sentry_init(
         integrations=[LanggraphIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     test_state = {
@@ -1534,7 +1534,7 @@ def test_pregel_invoke_span_uses_last_response_model(
     sentry_init(
         integrations=[LanggraphIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     test_state = {
@@ -1654,7 +1654,7 @@ def test_pregel_ainvoke_span_uses_last_response_model(
     sentry_init(
         integrations=[LanggraphIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     test_state = {
@@ -1819,7 +1819,7 @@ def test_extraction_functions_complex_scenario(
         integrations=[LanggraphIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     pregel = MockPregelInstance("complex_graph")
@@ -1930,7 +1930,7 @@ def test_langgraph_message_role_mapping(
         integrations=[LanggraphIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     # Mock a langgraph message with mixed roles
@@ -2036,7 +2036,7 @@ def test_langgraph_message_truncation(
         integrations=[LanggraphIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     large_content = (
diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py
index 8ae8dca99e..a0120cd7b5 100644
--- a/tests/integrations/litellm/test_litellm.py
+++ b/tests/integrations/litellm/test_litellm.py
@@ -156,7 +156,7 @@ def test_nonstreaming_chat_completion(
         integrations=[LiteLLMIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     messages = [{"role": "user", "content": "Hello!"}]
@@ -299,7 +299,7 @@ async def test_async_nonstreaming_chat_completion(
         integrations=[LiteLLMIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     messages = [{"role": "user", "content": "Hello!"}]
@@ -445,7 +445,7 @@ def test_streaming_chat_completion(
         integrations=[LiteLLMIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     messages = [{"role": "user", "content": "Hello!"}]
@@ -553,7 +553,7 @@ async def test_async_streaming_chat_completion(
         integrations=[LiteLLMIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     messages = [{"role": "user", "content": "Hello!"}]
@@ -658,7 +658,7 @@ def test_embeddings_create(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = OpenAI(api_key="test-key")
@@ -777,7 +777,7 @@ async def test_async_embeddings_create(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = AsyncOpenAI(api_key="test-key")
@@ -892,7 +892,7 @@ def test_embeddings_create_with_list_input(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = OpenAI(api_key="test-key")
@@ -1003,7 +1003,7 @@ async def test_async_embeddings_create_with_list_input(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = AsyncOpenAI(api_key="test-key")
@@ -1115,7 +1115,7 @@ def test_embeddings_no_pii(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=False,  # PII disabled
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = OpenAI(api_key="test-key")
@@ -1213,7 +1213,7 @@ async def test_async_embeddings_no_pii(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=False,  # PII disabled
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = AsyncOpenAI(api_key="test-key")
@@ -1308,7 +1308,7 @@ def test_exception_handling(
     sentry_init(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     messages = [{"role": "user", "content": "Hello!"}]
@@ -1374,7 +1374,7 @@ async def test_async_exception_handling(
     sentry_init(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     messages = [{"role": "user", "content": "Hello!"}]
@@ -1441,7 +1441,7 @@ def test_span_origin(
     sentry_init(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     messages = [{"role": "user", "content": "Hello!"}]
@@ -1523,7 +1523,7 @@ def test_multiple_providers(
     sentry_init(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     messages = [{"role": "user", "content": "Hello!"}]
@@ -1698,7 +1698,7 @@ async def test_async_multiple_providers(
     sentry_init(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     messages = [{"role": "user", "content": "Hello!"}]
@@ -1877,7 +1877,7 @@ def test_additional_parameters(
     sentry_init(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     messages = [{"role": "user", "content": "Hello!"}]
@@ -1986,7 +1986,7 @@ async def test_async_additional_parameters(
     sentry_init(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     messages = [{"role": "user", "content": "Hello!"}]
@@ -2096,7 +2096,7 @@ def test_no_integration(
     """Test that when integration is not enabled, callbacks don't break."""
     sentry_init(
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     messages = [{"role": "user", "content": "Hello!"}]
@@ -2181,7 +2181,7 @@ async def test_async_no_integration(
     """Test that when integration is not enabled, callbacks don't break."""
     sentry_init(
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     messages = [{"role": "user", "content": "Hello!"}]
@@ -2266,7 +2266,7 @@ def test_response_without_usage(
     sentry_init(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     messages = [{"role": "user", "content": "Hello!"}]
@@ -2353,7 +2353,7 @@ def test_litellm_message_truncation(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     large_content = (
@@ -2463,7 +2463,7 @@ def test_binary_content_encoding_image_url(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     messages = [
@@ -2583,7 +2583,7 @@ async def test_async_binary_content_encoding_image_url(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     messages = [
@@ -2705,7 +2705,7 @@ def test_binary_content_encoding_mixed_content(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     messages = [
@@ -2814,7 +2814,7 @@ async def test_async_binary_content_encoding_mixed_content(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     messages = [
@@ -2927,7 +2927,7 @@ def test_binary_content_encoding_uri_type(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     messages = [
@@ -3044,7 +3044,7 @@ async def test_async_binary_content_encoding_uri_type(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     messages = [
diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py
index 0da39e842d..af0932eeb9 100644
--- a/tests/integrations/openai/test_openai.py
+++ b/tests/integrations/openai/test_openai.py
@@ -125,7 +125,7 @@ def test_nonstreaming_chat_completion_no_prompts(
         integrations=[OpenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = OpenAI(api_key="z")
@@ -288,7 +288,7 @@ def test_nonstreaming_chat_completion(
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = OpenAI(api_key="z")
@@ -453,7 +453,7 @@ async def test_nonstreaming_chat_completion_async_no_prompts(
         integrations=[OpenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = AsyncOpenAI(api_key="z")
@@ -611,7 +611,7 @@ async def test_nonstreaming_chat_completion_async(
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = AsyncOpenAI(api_key="z")
@@ -785,7 +785,7 @@ def test_streaming_chat_completion_no_prompts(
         ],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = OpenAI(api_key="z")
@@ -964,7 +964,7 @@ def test_streaming_chat_completion_with_usage_in_stream(
         integrations=[OpenAIIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
         send_default_pii=False,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = OpenAI(api_key="z")
@@ -1073,7 +1073,7 @@ def test_streaming_chat_completion_empty_content_preserves_token_usage(
         integrations=[OpenAIIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
         send_default_pii=False,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = OpenAI(api_key="z")
@@ -1165,7 +1165,7 @@ async def test_streaming_chat_completion_empty_content_preserves_token_usage_asy
         integrations=[OpenAIIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
         send_default_pii=False,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = AsyncOpenAI(api_key="z")
@@ -1259,7 +1259,7 @@ async def test_streaming_chat_completion_async_with_usage_in_stream(
         integrations=[OpenAIIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
         send_default_pii=False,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = AsyncOpenAI(api_key="z")
@@ -1416,7 +1416,7 @@ def test_streaming_chat_completion(
         ],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = OpenAI(api_key="z")
@@ -1656,7 +1656,7 @@ async def test_streaming_chat_completion_async_no_prompts(
         ],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = AsyncOpenAI(api_key="z")
@@ -1889,7 +1889,7 @@ async def test_streaming_chat_completion_async(
         ],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = AsyncOpenAI(api_key="z")
@@ -2131,7 +2131,7 @@ def test_bad_chat_completion(
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     if stream_gen_ai_spans:
@@ -2176,7 +2176,7 @@ def test_span_status_error(
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     if stream_gen_ai_spans:
@@ -2229,7 +2229,7 @@ async def test_bad_chat_completion_async(
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = AsyncOpenAI(api_key="z")
@@ -2279,7 +2279,7 @@ def test_embeddings_create_no_pii(
         integrations=[OpenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = OpenAI(api_key="z")
@@ -2397,7 +2397,7 @@ def test_embeddings_create(
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = OpenAI(api_key="z")
@@ -2539,7 +2539,7 @@ async def test_embeddings_create_async_no_pii(
         integrations=[OpenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = AsyncOpenAI(api_key="z")
@@ -2658,7 +2658,7 @@ async def test_embeddings_create_async(
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = AsyncOpenAI(api_key="z")
@@ -2797,7 +2797,7 @@ def test_embeddings_create_raises_error(
         integrations=[OpenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = OpenAI(api_key="z")
@@ -2842,7 +2842,7 @@ async def test_embeddings_create_raises_error_async(
         integrations=[OpenAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = AsyncOpenAI(api_key="z")
@@ -2884,7 +2884,7 @@ def test_span_origin_nonstreaming_chat(
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = OpenAI(api_key="z")
@@ -2941,7 +2941,7 @@ async def test_span_origin_nonstreaming_chat_async(
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = AsyncOpenAI(api_key="z")
@@ -2996,7 +2996,7 @@ def test_span_origin_streaming_chat(
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = OpenAI(api_key="z")
@@ -3082,7 +3082,7 @@ async def test_span_origin_streaming_chat_async(
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = AsyncOpenAI(api_key="z")
@@ -3174,7 +3174,7 @@ def test_span_origin_embeddings(
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = OpenAI(api_key="z")
@@ -3225,7 +3225,7 @@ async def test_span_origin_embeddings_async(
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = AsyncOpenAI(api_key="z")
@@ -3642,7 +3642,7 @@ def test_ai_client_span_responses_api_no_pii(
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = OpenAI(api_key="z")
@@ -3814,7 +3814,7 @@ def test_ai_client_span_responses_api(
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = OpenAI(api_key="z")
@@ -4237,7 +4237,7 @@ def test_responses_api_conversation_id(
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = OpenAI(api_key="z")
@@ -4290,7 +4290,7 @@ def test_error_in_responses_api(
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = OpenAI(api_key="z")
@@ -4425,7 +4425,7 @@ async def test_ai_client_span_responses_async_api(
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = AsyncOpenAI(api_key="z")
@@ -4911,7 +4911,7 @@ async def test_ai_client_span_streaming_responses_async_api(
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = AsyncOpenAI(api_key="z")
@@ -5196,7 +5196,7 @@ async def test_error_in_responses_async_api(
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = AsyncOpenAI(api_key="z")
@@ -5348,7 +5348,7 @@ def test_streaming_responses_api(
         ],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = OpenAI(api_key="z")
@@ -5473,7 +5473,7 @@ async def test_streaming_responses_api_async(
         ],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = AsyncOpenAI(api_key="z")
@@ -5590,7 +5590,7 @@ def test_empty_tools_in_chat_completion(
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = OpenAI(api_key="z")
@@ -5668,7 +5668,7 @@ def test_openai_message_role_mapping(
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = OpenAI(api_key="z")
@@ -5733,7 +5733,7 @@ def test_openai_message_truncation(
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = OpenAI(api_key="z")
@@ -5825,7 +5825,7 @@ def test_streaming_chat_completion_ttft(
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = OpenAI(api_key="z")
@@ -5933,7 +5933,7 @@ async def test_streaming_chat_completion_ttft_async(
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = AsyncOpenAI(api_key="z")
@@ -6042,7 +6042,7 @@ def test_streaming_responses_api_ttft(
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = OpenAI(api_key="z")
@@ -6121,7 +6121,7 @@ async def test_streaming_responses_api_ttft_async(
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     client = AsyncOpenAI(api_key="z")
diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py
index 60f88cd7f4..9cdb3ea6f8 100644
--- a/tests/integrations/openai_agents/test_openai_agents.py
+++ b/tests/integrations/openai_agents/test_openai_agents.py
@@ -186,7 +186,7 @@ async def test_agent_invocation_span_no_pii(
                 integrations=[OpenAIAgentsIntegration()],
                 traces_sample_rate=1.0,
                 send_default_pii=False,
-                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+                stream_gen_ai_spans=stream_gen_ai_spans,
             )
 
             items = capture_items("span", "transaction")
@@ -249,7 +249,7 @@ async def test_agent_invocation_span_no_pii(
                 integrations=[OpenAIAgentsIntegration()],
                 traces_sample_rate=1.0,
                 send_default_pii=False,
-                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+                stream_gen_ai_spans=stream_gen_ai_spans,
             )
             events = capture_events()
 
@@ -404,7 +404,7 @@ async def test_agent_invocation_span(
                 integrations=[OpenAIAgentsIntegration()],
                 traces_sample_rate=1.0,
                 send_default_pii=True,
-                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+                stream_gen_ai_spans=stream_gen_ai_spans,
             )
 
             items = capture_items("span", "transaction")
@@ -573,7 +573,7 @@ async def test_agent_invocation_span(
                 integrations=[OpenAIAgentsIntegration()],
                 traces_sample_rate=1.0,
                 send_default_pii=True,
-                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+                stream_gen_ai_spans=stream_gen_ai_spans,
             )
             events = capture_events()
 
@@ -762,7 +762,7 @@ async def test_client_span_custom_model(
             sentry_init(
                 integrations=[OpenAIAgentsIntegration()],
                 traces_sample_rate=1.0,
-                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+                stream_gen_ai_spans=stream_gen_ai_spans,
             )
 
             items = capture_items("span")
@@ -790,7 +790,7 @@ async def test_client_span_custom_model(
             sentry_init(
                 integrations=[OpenAIAgentsIntegration()],
                 traces_sample_rate=1.0,
-                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+                stream_gen_ai_spans=stream_gen_ai_spans,
             )
             events = capture_events()
 
@@ -840,7 +840,7 @@ def test_agent_invocation_span_sync_no_pii(
                 integrations=[OpenAIAgentsIntegration()],
                 traces_sample_rate=1.0,
                 send_default_pii=False,
-                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+                stream_gen_ai_spans=stream_gen_ai_spans,
             )
 
             items = capture_items("span", "transaction")
@@ -900,7 +900,7 @@ def test_agent_invocation_span_sync_no_pii(
                 integrations=[OpenAIAgentsIntegration()],
                 traces_sample_rate=1.0,
                 send_default_pii=False,
-                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+                stream_gen_ai_spans=stream_gen_ai_spans,
             )
             events = capture_events()
 
@@ -1051,7 +1051,7 @@ def test_agent_invocation_span_sync(
                 integrations=[OpenAIAgentsIntegration()],
                 traces_sample_rate=1.0,
                 send_default_pii=True,
-                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+                stream_gen_ai_spans=stream_gen_ai_spans,
             )
 
             items = capture_items("span", "transaction")
@@ -1200,7 +1200,7 @@ def test_agent_invocation_span_sync(
                 integrations=[OpenAIAgentsIntegration()],
                 traces_sample_rate=1.0,
                 send_default_pii=True,
-                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+                stream_gen_ai_spans=stream_gen_ai_spans,
             )
 
             events = capture_events()
@@ -1449,7 +1449,7 @@ async def test_handoff_span(
             sentry_init(
                 integrations=[OpenAIAgentsIntegration()],
                 traces_sample_rate=1.0,
-                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+                stream_gen_ai_spans=stream_gen_ai_spans,
             )
 
             items = capture_items("transaction", "span")
@@ -1482,7 +1482,7 @@ async def test_handoff_span(
             sentry_init(
                 integrations=[OpenAIAgentsIntegration()],
                 traces_sample_rate=1.0,
-                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+                stream_gen_ai_spans=stream_gen_ai_spans,
             )
             events = capture_events()
 
@@ -1619,7 +1619,7 @@ async def test_max_turns_before_handoff_span(
             sentry_init(
                 integrations=[OpenAIAgentsIntegration()],
                 traces_sample_rate=1.0,
-                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+                stream_gen_ai_spans=stream_gen_ai_spans,
             )
 
             items = capture_items("transaction", "span")
@@ -1652,7 +1652,7 @@ async def test_max_turns_before_handoff_span(
             sentry_init(
                 integrations=[OpenAIAgentsIntegration()],
                 traces_sample_rate=1.0,
-                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+                stream_gen_ai_spans=stream_gen_ai_spans,
             )
             events = capture_events()
 
@@ -1756,7 +1756,7 @@ def simple_test_tool(message: str) -> str:
                 integrations=[OpenAIAgentsIntegration()],
                 traces_sample_rate=1.0,
                 send_default_pii=True,
-                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+                stream_gen_ai_spans=stream_gen_ai_spans,
             )
 
             items = capture_items("transaction", "span")
@@ -1970,7 +1970,7 @@ def simple_test_tool(message: str) -> str:
                 integrations=[OpenAIAgentsIntegration()],
                 traces_sample_rate=1.0,
                 send_default_pii=True,
-                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+                stream_gen_ai_spans=stream_gen_ai_spans,
             )
 
             events = capture_events()
@@ -2458,7 +2458,7 @@ def simple_test_tool(message: str) -> str:
                 integrations=[OpenAIAgentsIntegration()],
                 traces_sample_rate=1.0,
                 send_default_pii=True,
-                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+                stream_gen_ai_spans=stream_gen_ai_spans,
             )
 
             items = capture_items("span", "transaction")
@@ -2512,7 +2512,7 @@ def simple_test_tool(message: str) -> str:
                 integrations=[OpenAIAgentsIntegration()],
                 traces_sample_rate=1.0,
                 send_default_pii=True,
-                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+                stream_gen_ai_spans=stream_gen_ai_spans,
             )
             events = capture_events()
 
@@ -2566,7 +2566,7 @@ async def test_error_handling(
                     LoggingIntegration(event_level=logging.CRITICAL),
                 ],
                 traces_sample_rate=1.0,
-                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+                stream_gen_ai_spans=stream_gen_ai_spans,
             )
 
             items = capture_items("event", "span", "transaction")
@@ -2613,7 +2613,7 @@ async def test_error_handling(
                     LoggingIntegration(event_level=logging.CRITICAL),
                 ],
                 traces_sample_rate=1.0,
-                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+                stream_gen_ai_spans=stream_gen_ai_spans,
             )
             events = capture_events()
 
@@ -2689,7 +2689,7 @@ async def test_error_captures_input_data(
                 ],
                 traces_sample_rate=1.0,
                 send_default_pii=True,
-                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+                stream_gen_ai_spans=stream_gen_ai_spans,
             )
 
             items = capture_items("event", "span")
@@ -2732,7 +2732,7 @@ async def test_error_captures_input_data(
                 ],
                 traces_sample_rate=1.0,
                 send_default_pii=True,
-                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+                stream_gen_ai_spans=stream_gen_ai_spans,
             )
 
             events = capture_events()
@@ -2785,7 +2785,7 @@ async def test_span_status_error(
                     LoggingIntegration(event_level=logging.CRITICAL),
                 ],
                 traces_sample_rate=1.0,
-                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+                stream_gen_ai_spans=stream_gen_ai_spans,
             )
 
             items = capture_items("event", "transaction", "span")
@@ -2814,7 +2814,7 @@ async def test_span_status_error(
                     LoggingIntegration(event_level=logging.CRITICAL),
                 ],
                 traces_sample_rate=1.0,
-                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+                stream_gen_ai_spans=stream_gen_ai_spans,
             )
 
             events = capture_events()
@@ -2933,7 +2933,7 @@ async def test_mcp_tool_execution_spans(
                 integrations=[OpenAIAgentsIntegration()],
                 traces_sample_rate=1.0,
                 send_default_pii=True,
-                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+                stream_gen_ai_spans=stream_gen_ai_spans,
             )
 
             items = capture_items("span", "transaction")
@@ -2979,7 +2979,7 @@ async def test_mcp_tool_execution_spans(
                 integrations=[OpenAIAgentsIntegration()],
                 traces_sample_rate=1.0,
                 send_default_pii=True,
-                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+                stream_gen_ai_spans=stream_gen_ai_spans,
             )
             events = capture_events()
 
@@ -3115,7 +3115,7 @@ async def test_mcp_tool_execution_with_error(
                 integrations=[OpenAIAgentsIntegration()],
                 traces_sample_rate=1.0,
                 send_default_pii=True,
-                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+                stream_gen_ai_spans=stream_gen_ai_spans,
             )
 
             items = capture_items("span", "transaction")
@@ -3154,7 +3154,7 @@ async def test_mcp_tool_execution_with_error(
                 integrations=[OpenAIAgentsIntegration()],
                 traces_sample_rate=1.0,
                 send_default_pii=True,
-                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+                stream_gen_ai_spans=stream_gen_ai_spans,
             )
             events = capture_events()
 
@@ -3287,7 +3287,7 @@ async def test_mcp_tool_execution_without_pii(
                 integrations=[OpenAIAgentsIntegration()],
                 traces_sample_rate=1.0,
                 send_default_pii=False,  # PII disabled
-                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+                stream_gen_ai_spans=stream_gen_ai_spans,
             )
 
             items = capture_items("span", "transaction")
@@ -3325,7 +3325,7 @@ async def test_mcp_tool_execution_without_pii(
                 integrations=[OpenAIAgentsIntegration()],
                 traces_sample_rate=1.0,
                 send_default_pii=False,  # PII disabled
-                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+                stream_gen_ai_spans=stream_gen_ai_spans,
             )
             events = capture_events()
 
@@ -3387,7 +3387,7 @@ async def test_multiple_agents_asyncio(
             sentry_init(
                 integrations=[OpenAIAgentsIntegration()],
                 traces_sample_rate=1.0,
-                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+                stream_gen_ai_spans=stream_gen_ai_spans,
             )
 
             items = capture_items("span", "transaction")
@@ -3417,7 +3417,7 @@ async def run():
             sentry_init(
                 integrations=[OpenAIAgentsIntegration()],
                 traces_sample_rate=1.0,
-                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+                stream_gen_ai_spans=stream_gen_ai_spans,
             )
             events = capture_events()
 
@@ -3567,7 +3567,7 @@ def failing_tool(message: str) -> str:
                 integrations=[OpenAIAgentsIntegration()],
                 traces_sample_rate=1.0,
                 send_default_pii=True,
-                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+                stream_gen_ai_spans=stream_gen_ai_spans,
             )
             items = capture_items("span", "transaction")
 
@@ -3609,7 +3609,7 @@ def failing_tool(message: str) -> str:
                 integrations=[OpenAIAgentsIntegration()],
                 traces_sample_rate=1.0,
                 send_default_pii=True,
-                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+                stream_gen_ai_spans=stream_gen_ai_spans,
             )
             events = capture_events()
 
@@ -3712,7 +3712,7 @@ async def test_invoke_agent_span_includes_usage_data(
                 integrations=[OpenAIAgentsIntegration()],
                 traces_sample_rate=1.0,
                 send_default_pii=True,
-                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+                stream_gen_ai_spans=stream_gen_ai_spans,
             )
             items = capture_items("span", "transaction")
 
@@ -3752,7 +3752,7 @@ async def test_invoke_agent_span_includes_usage_data(
                 integrations=[OpenAIAgentsIntegration()],
                 traces_sample_rate=1.0,
                 send_default_pii=True,
-                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+                stream_gen_ai_spans=stream_gen_ai_spans,
             )
             events = capture_events()
 
@@ -3848,7 +3848,7 @@ async def test_ai_client_span_includes_response_model(
                 integrations=[OpenAIAgentsIntegration()],
                 traces_sample_rate=1.0,
                 send_default_pii=True,
-                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+                stream_gen_ai_spans=stream_gen_ai_spans,
             )
             items = capture_items("span", "transaction")
 
@@ -3880,7 +3880,7 @@ async def test_ai_client_span_includes_response_model(
                 integrations=[OpenAIAgentsIntegration()],
                 traces_sample_rate=1.0,
                 send_default_pii=True,
-                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+                stream_gen_ai_spans=stream_gen_ai_spans,
             )
             events = capture_events()
 
@@ -3971,7 +3971,7 @@ async def test_ai_client_span_response_model_with_chat_completions(
             sentry_init(
                 integrations=[OpenAIAgentsIntegration()],
                 traces_sample_rate=1.0,
-                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+                stream_gen_ai_spans=stream_gen_ai_spans,
             )
 
             items = capture_items("span", "transaction")
@@ -4002,7 +4002,7 @@ async def test_ai_client_span_response_model_with_chat_completions(
             sentry_init(
                 integrations=[OpenAIAgentsIntegration()],
                 traces_sample_rate=1.0,
-                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+                stream_gen_ai_spans=stream_gen_ai_spans,
             )
             events = capture_events()
 
@@ -4129,7 +4129,7 @@ def calculator(a: int, b: int) -> int:
                 integrations=[OpenAIAgentsIntegration()],
                 traces_sample_rate=1.0,
                 send_default_pii=True,
-                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+                stream_gen_ai_spans=stream_gen_ai_spans,
             )
 
             items = capture_items("span", "transaction")
@@ -4167,7 +4167,7 @@ def calculator(a: int, b: int) -> int:
                 integrations=[OpenAIAgentsIntegration()],
                 traces_sample_rate=1.0,
                 send_default_pii=True,
-                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+                stream_gen_ai_spans=stream_gen_ai_spans,
             )
             events = capture_events()
 
@@ -4261,7 +4261,7 @@ async def test_invoke_agent_span_includes_response_model(
                 integrations=[OpenAIAgentsIntegration()],
                 traces_sample_rate=1.0,
                 send_default_pii=True,
-                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+                stream_gen_ai_spans=stream_gen_ai_spans,
             )
 
             items = capture_items("span", "transaction")
@@ -4306,7 +4306,7 @@ async def test_invoke_agent_span_includes_response_model(
                 integrations=[OpenAIAgentsIntegration()],
                 traces_sample_rate=1.0,
                 send_default_pii=True,
-                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+                stream_gen_ai_spans=stream_gen_ai_spans,
             )
             events = capture_events()
 
@@ -4441,7 +4441,7 @@ def calculator(a: int, b: int) -> int:
                 integrations=[OpenAIAgentsIntegration()],
                 traces_sample_rate=1.0,
                 send_default_pii=True,
-                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+                stream_gen_ai_spans=stream_gen_ai_spans,
             )
 
             items = capture_items("span", "transaction")
@@ -4485,7 +4485,7 @@ def calculator(a: int, b: int) -> int:
                 integrations=[OpenAIAgentsIntegration()],
                 traces_sample_rate=1.0,
                 send_default_pii=True,
-                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+                stream_gen_ai_spans=stream_gen_ai_spans,
             )
             events = capture_events()
 
@@ -4795,7 +4795,7 @@ async def test_conversation_id_on_all_spans(
             sentry_init(
                 integrations=[OpenAIAgentsIntegration()],
                 traces_sample_rate=1.0,
-                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+                stream_gen_ai_spans=stream_gen_ai_spans,
             )
 
             items = capture_items("span", "transaction")
@@ -4843,7 +4843,7 @@ async def test_conversation_id_on_all_spans(
             sentry_init(
                 integrations=[OpenAIAgentsIntegration()],
                 traces_sample_rate=1.0,
-                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+                stream_gen_ai_spans=stream_gen_ai_spans,
             )
             events = capture_events()
 
@@ -4984,7 +4984,7 @@ def simple_tool(message: str) -> str:
             sentry_init(
                 integrations=[OpenAIAgentsIntegration()],
                 traces_sample_rate=1.0,
-                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+                stream_gen_ai_spans=stream_gen_ai_spans,
             )
 
             items = capture_items("span", "transaction")
@@ -5026,7 +5026,7 @@ def simple_tool(message: str) -> str:
             sentry_init(
                 integrations=[OpenAIAgentsIntegration()],
                 traces_sample_rate=1.0,
-                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+                stream_gen_ai_spans=stream_gen_ai_spans,
             )
 
             events = capture_events()
@@ -5095,7 +5095,7 @@ async def test_no_conversation_id_when_not_provided(
             sentry_init(
                 integrations=[OpenAIAgentsIntegration()],
                 traces_sample_rate=1.0,
-                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+                stream_gen_ai_spans=stream_gen_ai_spans,
             )
 
             items = capture_items("span", "transaction")
@@ -5134,7 +5134,7 @@ async def test_no_conversation_id_when_not_provided(
             sentry_init(
                 integrations=[OpenAIAgentsIntegration()],
                 traces_sample_rate=1.0,
-                _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+                stream_gen_ai_spans=stream_gen_ai_spans,
             )
 
             events = capture_events()
diff --git a/tests/integrations/pydantic_ai/test_pydantic_ai.py b/tests/integrations/pydantic_ai/test_pydantic_ai.py
index b2dfe76988..5cea5063ff 100644
--- a/tests/integrations/pydantic_ai/test_pydantic_ai.py
+++ b/tests/integrations/pydantic_ai/test_pydantic_ai.py
@@ -68,7 +68,7 @@ async def test_agent_run_async(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     test_agent = get_test_agent()
@@ -150,7 +150,7 @@ async def test_agent_run_async_model_error(
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     def failing_model(messages, info):
@@ -205,7 +205,7 @@ async def test_agent_run_async_usage_data(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     test_agent = get_test_agent()
@@ -269,7 +269,7 @@ def test_agent_run_sync(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     test_agent = get_test_agent()
@@ -336,7 +336,7 @@ def test_agent_run_sync_model_error(
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     def failing_model(messages, info):
@@ -391,7 +391,7 @@ async def test_agent_run_stream(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     test_agent = get_test_agent()
@@ -478,7 +478,7 @@ async def test_agent_run_stream_events(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     # Consume all events
@@ -545,7 +545,7 @@ async def test_agent_with_tools(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     test_agent = get_test_agent()
@@ -651,7 +651,7 @@ async def test_agent_with_tool_model_retry(
         ],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     retries = 0
@@ -790,7 +790,7 @@ async def test_agent_with_tool_validation_error(
         ],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     test_agent = get_test_agent()
@@ -910,7 +910,7 @@ async def test_agent_with_tools_streaming(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     test_agent = get_test_agent()
@@ -994,7 +994,7 @@ async def test_model_settings(
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     test_agent_with_settings = get_test_agent_with_settings()
@@ -1068,7 +1068,7 @@ async def test_system_prompt_attribute(
         integrations=[PydanticAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     if stream_gen_ai_spans:
@@ -1147,7 +1147,7 @@ async def test_error_handling(
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     if stream_gen_ai_spans:
@@ -1190,7 +1190,7 @@ async def test_without_pii(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=False,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     if stream_gen_ai_spans:
@@ -1244,7 +1244,7 @@ async def test_without_pii_tools(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=False,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     test_agent = get_test_agent()
@@ -1304,7 +1304,7 @@ async def test_multiple_agents_concurrent(
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     test_agent = get_test_agent()
@@ -1363,7 +1363,7 @@ async def test_message_history(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     # Second message with history
@@ -1444,7 +1444,7 @@ async def test_gen_ai_system(
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     test_agent = get_test_agent()
@@ -1500,7 +1500,7 @@ async def test_include_prompts_false(
         integrations=[PydanticAIIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
         send_default_pii=True,  # Even with PII enabled, prompts should not be captured
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     test_agent = get_test_agent()
@@ -1554,7 +1554,7 @@ async def test_include_prompts_true(
         integrations=[PydanticAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     test_agent = get_test_agent()
@@ -1608,7 +1608,7 @@ async def test_include_prompts_false_with_tools(
         integrations=[PydanticAIIntegration(include_prompts=False)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     test_agent = get_test_agent()
@@ -1669,7 +1669,7 @@ async def test_include_prompts_requires_pii(
         integrations=[PydanticAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=False,  # PII disabled
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     test_agent = get_test_agent()
@@ -1784,7 +1784,7 @@ async def mock_map_tool_result_part(part):
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     if stream_gen_ai_spans:
@@ -2080,7 +2080,7 @@ async def test_invoke_agent_with_list_user_prompt(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     if stream_gen_ai_spans:
@@ -2146,7 +2146,7 @@ async def test_invoke_agent_with_instructions(
         integrations=[PydanticAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     if stream_gen_ai_spans:
@@ -2307,7 +2307,7 @@ async def test_usage_data_partial(
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     if stream_gen_ai_spans:
@@ -2358,7 +2358,7 @@ async def test_agent_data_from_scope(
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     if stream_gen_ai_spans:
@@ -2397,7 +2397,7 @@ async def test_available_tools_without_description(
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     test_agent = get_test_agent()
@@ -2454,7 +2454,7 @@ async def test_output_with_tool_calls(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     test_agent = get_test_agent()
@@ -2525,7 +2525,7 @@ async def test_message_formatting_with_different_parts(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     # Create message history with different part types
@@ -2651,7 +2651,7 @@ async def test_agent_without_name(
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     if stream_gen_ai_spans:
@@ -2849,7 +2849,7 @@ def test_tool(x: int) -> int:
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     if stream_gen_ai_spans:
@@ -3775,7 +3775,7 @@ async def test_binary_content_encoding_image(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     if stream_gen_ai_spans:
@@ -3830,7 +3830,7 @@ async def test_binary_content_encoding_mixed_content(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     if stream_gen_ai_spans:
@@ -3901,7 +3901,7 @@ async def test_binary_content_in_agent_run(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     binary_content = BinaryContent(
@@ -3957,7 +3957,7 @@ async def test_set_usage_data_with_cache_tokens(
     sentry_init(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     if stream_gen_ai_spans:
@@ -4053,7 +4053,7 @@ def test_image_url_base64_content_in_span(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     found_image = False
@@ -4157,7 +4157,7 @@ async def test_invoke_agent_image_url(
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     agent = Agent("test", name="test_image_url_agent")
@@ -4233,7 +4233,7 @@ def multiply_numbers(a: int, b: int) -> int:
         integrations=[PydanticAIIntegration()],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     if stream_gen_ai_spans:

From 9b4ad4bd5bc4746447d52df97a2d88b0e34f36c3 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Tue, 12 May 2026 20:04:12 +0200
Subject: [PATCH 64/84] add parameter

---
 sentry_sdk/consts.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py
index d2b4cd89af..0a58292d6d 100644
--- a/sentry_sdk/consts.py
+++ b/sentry_sdk/consts.py
@@ -1218,6 +1218,7 @@ def __init__(
         before_send_metric: "Optional[Callable[[Metric, Hint], Optional[Metric]]]" = None,
         org_id: "Optional[str]" = None,
         strict_trace_continuation: bool = False,
+        stream_gen_ai_spans: bool = False,
     ) -> None:
         """Initialize the Sentry SDK with the given parameters. All parameters described here can be used in a call to `sentry_sdk.init()`.
 
@@ -1633,6 +1634,9 @@ def __init__(
             but you can provide it explicitly for self-hosted and Relay setups. This value is used for
             trace propagation and for features like `strict_trace_continuation`.
 
+        :param stream_gen_ai_spans: When set, generative AI spans are sent in a new transport format to
+            reduce downstream data loss.
+
         :param _experiments:
         """
         pass

From 5889ad968a404e82e40a58b5d3e3147febe161da Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Tue, 12 May 2026 20:06:55 +0200
Subject: [PATCH 65/84] update tracing

---
 sentry_sdk/tracing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sentry_sdk/tracing.py b/sentry_sdk/tracing.py
index 96029f1f58..6cf2527fe3 100644
--- a/sentry_sdk/tracing.py
+++ b/sentry_sdk/tracing.py
@@ -1042,7 +1042,7 @@ def finish(
 
         finished_spans = []
         has_gen_ai_span = False
-        if client.options["_experiments"].get("stream_gen_ai_spans", False):
+        if client.options.get("stream_gen_ai_spans", False):
             for span in self._span_recorder.spans:
                 if span.timestamp is None:
                     continue

From c948c14af1ef232d7653444d74683c46a85bbff5 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Tue, 12 May 2026 20:12:15 +0200
Subject: [PATCH 66/84] update to non-experimental option

---
 sentry_sdk/ai/utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py
index 4bd65ced76..fb9edcd335 100644
--- a/sentry_sdk/ai/utils.py
+++ b/sentry_sdk/ai/utils.py
@@ -742,7 +742,7 @@ def truncate_and_annotate_messages(
     max_single_message_chars: int = MAX_SINGLE_MESSAGE_CONTENT_CHARS,
 ) -> "Optional[List[Dict[str, Any]]]":
     client = sentry_sdk.get_client()
-    if client.options["_experiments"].get("stream_gen_ai_spans", False):
+    if client.options.get("stream_gen_ai_spans", False):
         return messages
 
     if not messages:
@@ -766,7 +766,7 @@ def truncate_and_annotate_embedding_inputs(
     max_bytes: int = MAX_GEN_AI_MESSAGE_BYTES,
 ) -> "Optional[List[Dict[str, Any]]]":
     client = sentry_sdk.get_client()
-    if client.options["_experiments"].get("stream_gen_ai_spans", False):
+    if client.options.get("stream_gen_ai_spans", False):
         return messages
 
     if not messages:

From cf04adba9c7a6d960d1abb16851d316160a40665 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Tue, 12 May 2026 20:14:02 +0200
Subject: [PATCH 67/84] update more tests

---
 tests/tracing/test_decorator.py | 8 ++++----
 tests/tracing/test_misc.py      | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/tracing/test_decorator.py b/tests/tracing/test_decorator.py
index 8d7c97fdbf..4e0c6cc1a9 100644
--- a/tests/tracing/test_decorator.py
+++ b/tests/tracing/test_decorator.py
@@ -169,7 +169,7 @@ def my_agent():
     if stream_gen_ai_spans:
         sentry_init(
             traces_sample_rate=1.0,
-            _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+            stream_gen_ai_spans=stream_gen_ai_spans,
         )
         items = capture_items("span")
 
@@ -251,7 +251,7 @@ def my_agent():
     else:
         sentry_init(
             traces_sample_rate=1.0,
-            _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+            stream_gen_ai_spans=stream_gen_ai_spans,
         )
         events = capture_events()
 
@@ -361,7 +361,7 @@ def my_agent():
 
     sentry_init(
         traces_sample_rate=1.0,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     if stream_gen_ai_spans:
@@ -537,7 +537,7 @@ def my_agent(*args, **kwargs):
     sentry_init(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
-        _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
     if stream_gen_ai_spans:
diff --git a/tests/tracing/test_misc.py b/tests/tracing/test_misc.py
index 1119f42461..1066bcb709 100644
--- a/tests/tracing/test_misc.py
+++ b/tests/tracing/test_misc.py
@@ -653,7 +653,7 @@ def test_conversation_id_propagates_to_span_with_gen_ai_op(
         """Span with gen_ai.* op should get conversation_id."""
         sentry_init(
             traces_sample_rate=1.0,
-            _experiments={"stream_gen_ai_spans": stream_gen_ai_spans},
+            stream_gen_ai_spans=stream_gen_ai_spans,
         )
 
         if stream_gen_ai_spans:

From 398559b8d8b87ed712b52651dbbbc5bdc4ad94b9 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Tue, 12 May 2026 20:32:39 +0200
Subject: [PATCH 68/84] restore legitimate test

---
 .../openai_agents/test_openai_agents.py       | 589 ++++++++++++------
 1 file changed, 414 insertions(+), 175 deletions(-)

diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py
index aa2dcab76e..4752ac0376 100644
--- a/tests/integrations/openai_agents/test_openai_agents.py
+++ b/tests/integrations/openai_agents/test_openai_agents.py
@@ -1679,13 +1679,16 @@ async def test_max_turns_before_handoff_span(
         assert handoff_span["data"]["gen_ai.operation.name"] == "handoff"
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
 async def test_tool_execution_span(
     sentry_init,
     capture_events,
+    capture_items,
     test_agent,
     get_model_response,
     responses_tool_call_model_responses,
+    stream_gen_ai_spans,
 ):
     """
     Test tool execution span creation.
@@ -1743,195 +1746,431 @@ def simple_test_tool(message: str) -> str:
         serialize_pydantic=True,
     )
 
-    with patch.object(
-        agent_with_tool.model._client._client,
-        "send",
-        side_effect=[tool_response, final_response],
-    ) as _:
-        sentry_init(
-            integrations=[OpenAIAgentsIntegration()],
-            traces_sample_rate=1.0,
-            send_default_pii=True,
+    if stream_gen_ai_spans:
+        with patch.object(
+            agent_with_tool.model._client._client,
+            "send",
+            side_effect=[tool_response, final_response],
+        ) as _:
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+                send_default_pii=True,
+                stream_gen_ai_spans=stream_gen_ai_spans,
+            )
+
+            items = capture_items("transaction", "span")
+
+            await agents.Runner.run(
+                agent_with_tool,
+                "Please use the simple test tool",
+                run_config=test_run_config,
+            )
+
+        (transaction,) = (item.payload for item in items if item.type == "transaction")
+        assert transaction["transaction"] == "test_agent workflow"
+        assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
+
+        spans = [item.payload for item in items if item.type == "span"]
+        agent_span = next(
+            span
+            for span in spans
+            if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
+        )
+        ai_client_span1, ai_client_span2 = (
+            span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
+        )
+        tool_span = next(
+            span
+            for span in spans
+            if span["attributes"]["sentry.op"] == OP.GEN_AI_EXECUTE_TOOL
         )
 
-        events = capture_events()
+        available_tool = {
+            "name": "simple_test_tool",
+            "description": "A simple tool",
+            "params_json_schema": {
+                "properties": {"message": {"title": "Message", "type": "string"}},
+                "required": ["message"],
+                "title": "simple_test_tool_args",
+                "type": "object",
+                "additionalProperties": False,
+            },
+            "on_invoke_tool": mock.ANY,
+            "strict_json_schema": True,
+            "is_enabled": True,
+        }
+
+        if parse_version(OPENAI_AGENTS_VERSION) >= (0, 3, 3):
+            available_tool.update(
+                {"tool_input_guardrails": None, "tool_output_guardrails": None}
+            )
+
+        if parse_version(OPENAI_AGENTS_VERSION) >= (
+            0,
+            8,
+        ):
+            available_tool["needs_approval"] = False
+        if parse_version(OPENAI_AGENTS_VERSION) >= (
+            0,
+            9,
+            0,
+        ):
+            available_tool.update(
+                {
+                    "timeout_seconds": None,
+                    "timeout_behavior": "error_as_result",
+                    "timeout_error_function": None,
+                }
+            )
 
-        await agents.Runner.run(
-            agent_with_tool,
-            "Please use the simple test tool",
-            run_config=test_run_config,
+        assert agent_span["name"] == "invoke_agent test_agent"
+        assert agent_span["attributes"]["sentry.origin"] == "auto.ai.openai_agents"
+        assert agent_span["attributes"]["gen_ai.agent.name"] == "test_agent"
+        assert agent_span["attributes"]["gen_ai.operation.name"] == "invoke_agent"
+
+        agent_span_available_tool = json.loads(
+            agent_span["attributes"]["gen_ai.request.available_tools"]
+        )[0]
+
+        assert all(agent_span_available_tool[k] == v for k, v in available_tool.items())
+
+        assert agent_span["attributes"]["gen_ai.request.max_tokens"] == 100
+        assert agent_span["attributes"]["gen_ai.request.model"] == "gpt-4"
+        assert agent_span["attributes"]["gen_ai.request.temperature"] == 0.7
+        assert agent_span["attributes"]["gen_ai.request.top_p"] == 1.0
+        assert agent_span["attributes"]["gen_ai.system"] == "openai"
+
+        assert ai_client_span1["name"] == "chat gpt-4"
+        assert ai_client_span1["attributes"]["gen_ai.operation.name"] == "chat"
+        assert ai_client_span1["attributes"]["gen_ai.system"] == "openai"
+        assert ai_client_span1["attributes"]["gen_ai.agent.name"] == "test_agent"
+
+        ai_client_span1_available_tool = json.loads(
+            ai_client_span1["attributes"]["gen_ai.request.available_tools"]
+        )[0]
+
+        assert all(
+            ai_client_span1_available_tool[k] == v for k, v in available_tool.items()
         )
 
-    (transaction,) = events
-    spans = transaction["spans"]
-    agent_span = next(span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT)
-    ai_client_span1, ai_client_span2 = (
-        span for span in spans if span["op"] == OP.GEN_AI_CHAT
-    )
-    tool_span = next(span for span in spans if span["op"] == OP.GEN_AI_EXECUTE_TOOL)
-
-    available_tool = {
-        "name": "simple_test_tool",
-        "description": "A simple tool",
-        "params_json_schema": {
-            "properties": {"message": {"title": "Message", "type": "string"}},
-            "required": ["message"],
-            "title": "simple_test_tool_args",
-            "type": "object",
-            "additionalProperties": False,
-        },
-        "on_invoke_tool": mock.ANY,
-        "strict_json_schema": True,
-        "is_enabled": True,
-    }
-
-    if parse_version(OPENAI_AGENTS_VERSION) >= (0, 3, 3):
-        available_tool.update(
-            {"tool_input_guardrails": None, "tool_output_guardrails": None}
+        assert ai_client_span1["attributes"]["gen_ai.request.max_tokens"] == 100
+        assert ai_client_span1["attributes"][
+            "gen_ai.request.messages"
+        ] == safe_serialize(
+            [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "text", "text": "Please use the simple test tool"}
+                    ],
+                },
+            ]
+        )
+        assert ai_client_span1["attributes"]["gen_ai.request.model"] == "gpt-4"
+        assert ai_client_span1["attributes"]["gen_ai.request.temperature"] == 0.7
+        assert ai_client_span1["attributes"]["gen_ai.request.top_p"] == 1.0
+        assert ai_client_span1["attributes"]["gen_ai.usage.input_tokens"] == 10
+        assert ai_client_span1["attributes"]["gen_ai.usage.input_tokens.cached"] == 0
+        assert ai_client_span1["attributes"]["gen_ai.usage.output_tokens"] == 5
+        assert (
+            ai_client_span1["attributes"]["gen_ai.usage.output_tokens.reasoning"] == 0
+        )
+        assert ai_client_span1["attributes"]["gen_ai.usage.total_tokens"] == 15
+
+        tool_call = {
+            "arguments": '{"message": "hello"}',
+            "call_id": "call_123",
+            "name": "simple_test_tool",
+            "type": "function_call",
+            "id": "call_123",
+            "status": None,
+        }
+
+        if OPENAI_VERSION >= (2, 25, 0):
+            tool_call["namespace"] = None
+
+        assert json.loads(
+            ai_client_span1["attributes"]["gen_ai.response.tool_calls"]
+        ) == [tool_call]
+
+        assert tool_span["name"] == "execute_tool simple_test_tool"
+        assert tool_span["attributes"]["gen_ai.agent.name"] == "test_agent"
+        assert tool_span["attributes"]["gen_ai.operation.name"] == "execute_tool"
+
+        tool_span_available_tool = json.loads(
+            tool_span["attributes"]["gen_ai.request.available_tools"]
+        )[0]
+
+        assert all(tool_span_available_tool[k] == v for k, v in available_tool.items())
+
+        assert tool_span["attributes"]["gen_ai.request.max_tokens"] == 100
+        assert tool_span["attributes"]["gen_ai.request.model"] == "gpt-4"
+        assert tool_span["attributes"]["gen_ai.request.temperature"] == 0.7
+        assert tool_span["attributes"]["gen_ai.request.top_p"] == 1.0
+        assert tool_span["attributes"]["gen_ai.system"] == "openai"
+        assert tool_span["attributes"]["gen_ai.tool.description"] == "A simple tool"
+        assert tool_span["attributes"]["gen_ai.tool.input"] == '{"message": "hello"}'
+        assert tool_span["attributes"]["gen_ai.tool.name"] == "simple_test_tool"
+        assert (
+            tool_span["attributes"]["gen_ai.tool.output"] == "Tool executed with: hello"
         )
+        assert ai_client_span2["name"] == "chat gpt-4"
+        assert ai_client_span2["attributes"]["gen_ai.agent.name"] == "test_agent"
+        assert ai_client_span2["attributes"]["gen_ai.operation.name"] == "chat"
 
-    if parse_version(OPENAI_AGENTS_VERSION) >= (
-        0,
-        8,
-    ):
-        available_tool["needs_approval"] = False
-    if parse_version(OPENAI_AGENTS_VERSION) >= (
-        0,
-        9,
-        0,
-    ):
-        available_tool.update(
-            {
-                "timeout_seconds": None,
-                "timeout_behavior": "error_as_result",
-                "timeout_error_function": None,
-            }
+        ai_client_span2_available_tool = json.loads(
+            ai_client_span2["attributes"]["gen_ai.request.available_tools"]
+        )[0]
+
+        assert all(
+            ai_client_span2_available_tool[k] == v for k, v in available_tool.items()
         )
 
-    assert transaction["transaction"] == "test_agent workflow"
-    assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
-
-    assert agent_span["description"] == "invoke_agent test_agent"
-    assert agent_span["origin"] == "auto.ai.openai_agents"
-    assert agent_span["data"]["gen_ai.agent.name"] == "test_agent"
-    assert agent_span["data"]["gen_ai.operation.name"] == "invoke_agent"
-
-    agent_span_available_tool = json.loads(
-        agent_span["data"]["gen_ai.request.available_tools"]
-    )[0]
-    assert all(agent_span_available_tool[k] == v for k, v in available_tool.items())
-
-    assert agent_span["data"]["gen_ai.request.max_tokens"] == 100
-    assert agent_span["data"]["gen_ai.request.model"] == "gpt-4"
-    assert agent_span["data"]["gen_ai.request.temperature"] == 0.7
-    assert agent_span["data"]["gen_ai.request.top_p"] == 1.0
-    assert agent_span["data"]["gen_ai.system"] == "openai"
-
-    assert ai_client_span1["description"] == "chat gpt-4"
-    assert ai_client_span1["data"]["gen_ai.operation.name"] == "chat"
-    assert ai_client_span1["data"]["gen_ai.system"] == "openai"
-    assert ai_client_span1["data"]["gen_ai.agent.name"] == "test_agent"
-
-    ai_client_span1_available_tool = json.loads(
-        ai_client_span1["data"]["gen_ai.request.available_tools"]
-    )[0]
-    assert all(
-        ai_client_span1_available_tool[k] == v for k, v in available_tool.items()
-    )
+        assert ai_client_span2["attributes"]["gen_ai.request.max_tokens"] == 100
+        assert ai_client_span2["attributes"][
+            "gen_ai.request.messages"
+        ] == safe_serialize(
+            [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "text", "text": "Please use the simple test tool"}
+                    ],
+                },
+                {
+                    "role": "assistant",
+                    "content": [
+                        {
+                            "arguments": '{"message": "hello"}',
+                            "call_id": "call_123",
+                            "name": "simple_test_tool",
+                            "type": "function_call",
+                            "id": "call_123",
+                        }
+                    ],
+                },
+                {
+                    "role": "tool",
+                    "content": [
+                        {
+                            "call_id": "call_123",
+                            "output": "Tool executed with: hello",
+                            "type": "function_call_output",
+                        }
+                    ],
+                },
+            ]
+        )
+        assert ai_client_span2["attributes"]["gen_ai.request.model"] == "gpt-4"
+        assert ai_client_span2["attributes"]["gen_ai.request.temperature"] == 0.7
+        assert ai_client_span2["attributes"]["gen_ai.request.top_p"] == 1.0
+        assert (
+            ai_client_span2["attributes"]["gen_ai.response.text"]
+            == "Task completed using the tool"
+        )
+        assert ai_client_span2["attributes"]["gen_ai.system"] == "openai"
+        assert ai_client_span2["attributes"]["gen_ai.usage.input_tokens.cached"] == 0
+        assert ai_client_span2["attributes"]["gen_ai.usage.input_tokens"] == 15
+        assert (
+            ai_client_span2["attributes"]["gen_ai.usage.output_tokens.reasoning"] == 0
+        )
+        assert ai_client_span2["attributes"]["gen_ai.usage.output_tokens"] == 10
+        assert ai_client_span2["attributes"]["gen_ai.usage.total_tokens"] == 25
 
-    assert ai_client_span1["data"]["gen_ai.request.max_tokens"] == 100
-    assert ai_client_span1["data"]["gen_ai.request.messages"] == safe_serialize(
-        [
-            {
-                "role": "user",
-                "content": [
-                    {"type": "text", "text": "Please use the simple test tool"}
-                ],
-            },
-        ]
-    )
-    assert ai_client_span1["data"]["gen_ai.request.model"] == "gpt-4"
-    assert ai_client_span1["data"]["gen_ai.request.temperature"] == 0.7
-    assert ai_client_span1["data"]["gen_ai.request.top_p"] == 1.0
-    assert ai_client_span1["data"]["gen_ai.usage.input_tokens"] == 10
-    assert ai_client_span1["data"]["gen_ai.usage.input_tokens.cached"] == 0
-    assert ai_client_span1["data"]["gen_ai.usage.output_tokens"] == 5
-    assert ai_client_span1["data"]["gen_ai.usage.output_tokens.reasoning"] == 0
-    assert ai_client_span1["data"]["gen_ai.usage.total_tokens"] == 15
-
-    tool_call = {
-        "arguments": '{"message": "hello"}',
-        "call_id": "call_123",
-        "name": "simple_test_tool",
-        "type": "function_call",
-        "id": "call_123",
-        "status": None,
-    }
-
-    if OPENAI_VERSION >= (2, 25, 0):
-        tool_call["namespace"] = None
-
-    assert json.loads(ai_client_span1["data"]["gen_ai.response.tool_calls"]) == [
-        tool_call
-    ]
+    else:
+        with patch.object(
+            agent_with_tool.model._client._client,
+            "send",
+            side_effect=[tool_response, final_response],
+        ) as _:
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+                send_default_pii=True,
+                stream_gen_ai_spans=stream_gen_ai_spans,
+            )
 
-    assert tool_span["description"] == "execute_tool simple_test_tool"
-    assert tool_span["data"]["gen_ai.agent.name"] == "test_agent"
-    assert tool_span["data"]["gen_ai.operation.name"] == "execute_tool"
-
-    tool_span_available_tool = json.loads(
-        tool_span["data"]["gen_ai.request.available_tools"]
-    )[0]
-    assert all(tool_span_available_tool[k] == v for k, v in available_tool.items())
-
-    assert tool_span["data"]["gen_ai.request.max_tokens"] == 100
-    assert tool_span["data"]["gen_ai.request.model"] == "gpt-4"
-    assert tool_span["data"]["gen_ai.request.temperature"] == 0.7
-    assert tool_span["data"]["gen_ai.request.top_p"] == 1.0
-    assert tool_span["data"]["gen_ai.system"] == "openai"
-    assert tool_span["data"]["gen_ai.tool.description"] == "A simple tool"
-    assert tool_span["data"]["gen_ai.tool.input"] == '{"message": "hello"}'
-    assert tool_span["data"]["gen_ai.tool.name"] == "simple_test_tool"
-    assert tool_span["data"]["gen_ai.tool.output"] == "Tool executed with: hello"
-    assert ai_client_span2["description"] == "chat gpt-4"
-    assert ai_client_span2["data"]["gen_ai.agent.name"] == "test_agent"
-    assert ai_client_span2["data"]["gen_ai.operation.name"] == "chat"
-
-    ai_client_span2_available_tool = json.loads(
-        ai_client_span2["data"]["gen_ai.request.available_tools"]
-    )[0]
-    assert all(
-        ai_client_span2_available_tool[k] == v for k, v in available_tool.items()
-    )
+            events = capture_events()
 
-    assert ai_client_span2["data"]["gen_ai.request.max_tokens"] == 100
-    assert ai_client_span2["data"]["gen_ai.request.messages"] == safe_serialize(
-        [
-            {
-                "role": "tool",
-                "content": [
-                    {
-                        "call_id": "call_123",
-                        "output": "Tool executed with: hello",
-                        "type": "function_call_output",
-                    }
-                ],
+            await agents.Runner.run(
+                agent_with_tool,
+                "Please use the simple test tool",
+                run_config=test_run_config,
+            )
+
+        (transaction,) = events
+        spans = transaction["spans"]
+        agent_span = next(
+            span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT
+        )
+        ai_client_span1, ai_client_span2 = (
+            span for span in spans if span["op"] == OP.GEN_AI_CHAT
+        )
+        tool_span = next(span for span in spans if span["op"] == OP.GEN_AI_EXECUTE_TOOL)
+
+        available_tool = {
+            "name": "simple_test_tool",
+            "description": "A simple tool",
+            "params_json_schema": {
+                "properties": {"message": {"title": "Message", "type": "string"}},
+                "required": ["message"],
+                "title": "simple_test_tool_args",
+                "type": "object",
+                "additionalProperties": False,
             },
+            "on_invoke_tool": mock.ANY,
+            "strict_json_schema": True,
+            "is_enabled": True,
+        }
+
+        if parse_version(OPENAI_AGENTS_VERSION) >= (0, 3, 3):
+            available_tool.update(
+                {"tool_input_guardrails": None, "tool_output_guardrails": None}
+            )
+
+        if parse_version(OPENAI_AGENTS_VERSION) >= (
+            0,
+            8,
+        ):
+            available_tool["needs_approval"] = False
+        if parse_version(OPENAI_AGENTS_VERSION) >= (
+            0,
+            9,
+            0,
+        ):
+            available_tool.update(
+                {
+                    "timeout_seconds": None,
+                    "timeout_behavior": "error_as_result",
+                    "timeout_error_function": None,
+                }
+            )
+
+        assert transaction["transaction"] == "test_agent workflow"
+        assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
+
+        assert agent_span["description"] == "invoke_agent test_agent"
+        assert agent_span["origin"] == "auto.ai.openai_agents"
+        assert agent_span["data"]["gen_ai.agent.name"] == "test_agent"
+        assert agent_span["data"]["gen_ai.operation.name"] == "invoke_agent"
+
+        agent_span_available_tool = json.loads(
+            agent_span["data"]["gen_ai.request.available_tools"]
+        )[0]
+        assert all(agent_span_available_tool[k] == v for k, v in available_tool.items())
+
+        assert agent_span["data"]["gen_ai.request.max_tokens"] == 100
+        assert agent_span["data"]["gen_ai.request.model"] == "gpt-4"
+        assert agent_span["data"]["gen_ai.request.temperature"] == 0.7
+        assert agent_span["data"]["gen_ai.request.top_p"] == 1.0
+        assert agent_span["data"]["gen_ai.system"] == "openai"
+
+        assert ai_client_span1["description"] == "chat gpt-4"
+        assert ai_client_span1["data"]["gen_ai.operation.name"] == "chat"
+        assert ai_client_span1["data"]["gen_ai.system"] == "openai"
+        assert ai_client_span1["data"]["gen_ai.agent.name"] == "test_agent"
+
+        ai_client_span1_available_tool = json.loads(
+            ai_client_span1["data"]["gen_ai.request.available_tools"]
+        )[0]
+        assert all(
+            ai_client_span1_available_tool[k] == v for k, v in available_tool.items()
+        )
+
+        assert ai_client_span1["data"]["gen_ai.request.max_tokens"] == 100
+        assert ai_client_span1["data"]["gen_ai.request.messages"] == safe_serialize(
+            [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "text", "text": "Please use the simple test tool"}
+                    ],
+                },
+            ]
+        )
+        assert ai_client_span1["data"]["gen_ai.request.model"] == "gpt-4"
+        assert ai_client_span1["data"]["gen_ai.request.temperature"] == 0.7
+        assert ai_client_span1["data"]["gen_ai.request.top_p"] == 1.0
+        assert ai_client_span1["data"]["gen_ai.usage.input_tokens"] == 10
+        assert ai_client_span1["data"]["gen_ai.usage.input_tokens.cached"] == 0
+        assert ai_client_span1["data"]["gen_ai.usage.output_tokens"] == 5
+        assert ai_client_span1["data"]["gen_ai.usage.output_tokens.reasoning"] == 0
+        assert ai_client_span1["data"]["gen_ai.usage.total_tokens"] == 15
+
+        tool_call = {
+            "arguments": '{"message": "hello"}',
+            "call_id": "call_123",
+            "name": "simple_test_tool",
+            "type": "function_call",
+            "id": "call_123",
+            "status": None,
+        }
+
+        if OPENAI_VERSION >= (2, 25, 0):
+            tool_call["namespace"] = None
+
+        assert json.loads(ai_client_span1["data"]["gen_ai.response.tool_calls"]) == [
+            tool_call
         ]
-    )
-    assert ai_client_span2["data"]["gen_ai.request.model"] == "gpt-4"
-    assert ai_client_span2["data"]["gen_ai.request.temperature"] == 0.7
-    assert ai_client_span2["data"]["gen_ai.request.top_p"] == 1.0
-    assert (
-        ai_client_span2["data"]["gen_ai.response.text"]
-        == "Task completed using the tool"
-    )
-    assert ai_client_span2["data"]["gen_ai.system"] == "openai"
-    assert ai_client_span2["data"]["gen_ai.usage.input_tokens.cached"] == 0
-    assert ai_client_span2["data"]["gen_ai.usage.input_tokens"] == 15
-    assert ai_client_span2["data"]["gen_ai.usage.output_tokens.reasoning"] == 0
-    assert ai_client_span2["data"]["gen_ai.usage.output_tokens"] == 10
-    assert ai_client_span2["data"]["gen_ai.usage.total_tokens"] == 25
+
+        assert tool_span["description"] == "execute_tool simple_test_tool"
+        assert tool_span["data"]["gen_ai.agent.name"] == "test_agent"
+        assert tool_span["data"]["gen_ai.operation.name"] == "execute_tool"
+
+        tool_span_available_tool = json.loads(
+            tool_span["data"]["gen_ai.request.available_tools"]
+        )[0]
+        assert all(tool_span_available_tool[k] == v for k, v in available_tool.items())
+
+        assert tool_span["data"]["gen_ai.request.max_tokens"] == 100
+        assert tool_span["data"]["gen_ai.request.model"] == "gpt-4"
+        assert tool_span["data"]["gen_ai.request.temperature"] == 0.7
+        assert tool_span["data"]["gen_ai.request.top_p"] == 1.0
+        assert tool_span["data"]["gen_ai.system"] == "openai"
+        assert tool_span["data"]["gen_ai.tool.description"] == "A simple tool"
+        assert tool_span["data"]["gen_ai.tool.input"] == '{"message": "hello"}'
+        assert tool_span["data"]["gen_ai.tool.name"] == "simple_test_tool"
+        assert tool_span["data"]["gen_ai.tool.output"] == "Tool executed with: hello"
+        assert ai_client_span2["description"] == "chat gpt-4"
+        assert ai_client_span2["data"]["gen_ai.agent.name"] == "test_agent"
+        assert ai_client_span2["data"]["gen_ai.operation.name"] == "chat"
+
+        ai_client_span2_available_tool = json.loads(
+            ai_client_span2["data"]["gen_ai.request.available_tools"]
+        )[0]
+        assert all(
+            ai_client_span2_available_tool[k] == v for k, v in available_tool.items()
+        )
+
+        assert ai_client_span2["data"]["gen_ai.request.max_tokens"] == 100
+        assert ai_client_span2["data"]["gen_ai.request.messages"] == safe_serialize(
+            [
+                {
+                    "role": "tool",
+                    "content": [
+                        {
+                            "call_id": "call_123",
+                            "output": "Tool executed with: hello",
+                            "type": "function_call_output",
+                        }
+                    ],
+                },
+            ]
+        )
+        assert ai_client_span2["data"]["gen_ai.request.model"] == "gpt-4"
+        assert ai_client_span2["data"]["gen_ai.request.temperature"] == 0.7
+        assert ai_client_span2["data"]["gen_ai.request.top_p"] == 1.0
+        assert (
+            ai_client_span2["data"]["gen_ai.response.text"]
+            == "Task completed using the tool"
+        )
+        assert ai_client_span2["data"]["gen_ai.system"] == "openai"
+        assert ai_client_span2["data"]["gen_ai.usage.input_tokens.cached"] == 0
+        assert ai_client_span2["data"]["gen_ai.usage.input_tokens"] == 15
+        assert ai_client_span2["data"]["gen_ai.usage.output_tokens.reasoning"] == 0
+        assert ai_client_span2["data"]["gen_ai.usage.output_tokens"] == 10
+        assert ai_client_span2["data"]["gen_ai.usage.total_tokens"] == 25
 
 
 @pytest.mark.asyncio

From ec57859f5bfe0a4b50c8993d56a3415564060a1f Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Wed, 13 May 2026 07:42:29 +0200
Subject: [PATCH 69/84] test(langchain): Inline global state

---
 .../integrations/langchain/test_langchain.py  | 100 ++++++++++--------
 1 file changed, 58 insertions(+), 42 deletions(-)

diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py
index 240a78e2cc..336be2fb1e 100644
--- a/tests/integrations/langchain/test_langchain.py
+++ b/tests/integrations/langchain/test_langchain.py
@@ -237,26 +237,6 @@ def get_word_length(word: str) -> int:
     return len(word)
 
 
-global stream_result_mock  # type: Mock
-global llm_type  # type: str
-
-
-class MockOpenAI(ChatOpenAI):
-    def _stream(
-        self,
-        messages: List[BaseMessage],
-        stop: Optional[List[str]] = None,
-        run_manager: Optional[CallbackManagerForLLMRun] = None,
-        **kwargs: Any,
-    ) -> Iterator[ChatGenerationChunk]:
-        for x in stream_result_mock():
-            yield x
-
-    @property
-    def _llm_type(self) -> str:
-        return llm_type
-
-
 def test_langchain_text_completion(
     sentry_init,
     capture_events,
@@ -1488,8 +1468,22 @@ def test_langchain_openai_tools_agent_stream_with_config(
 
 
 def test_langchain_error(sentry_init, capture_events):
-    global llm_type
-    llm_type = "acme-llm"
+    class MockOpenAI(ChatOpenAI):
+        def _stream(
+            self,
+            messages: List[BaseMessage],
+            stop: Optional[List[str]] = None,
+            run_manager: Optional[CallbackManagerForLLMRun] = None,
+            **kwargs: Any,
+        ) -> Iterator[ChatGenerationChunk]:
+            stream_result_mock = Mock(side_effect=ValueError("API rate limit error"))
+
+            for x in stream_result_mock():
+                yield x
+
+        @property
+        def _llm_type(self) -> str:
+            return "acme-llm"
 
     sentry_init(
         integrations=[LangchainIntegration(include_prompts=True)],
@@ -1508,8 +1502,6 @@ def test_langchain_error(sentry_init, capture_events):
             MessagesPlaceholder(variable_name="agent_scratchpad"),
         ]
     )
-    global stream_result_mock
-    stream_result_mock = Mock(side_effect=ValueError("API rate limit error"))
     llm = MockOpenAI(
         model_name="gpt-3.5-turbo",
         temperature=0,
@@ -1527,8 +1519,22 @@ def test_langchain_error(sentry_init, capture_events):
 
 
 def test_span_status_error(sentry_init, capture_events):
-    global llm_type
-    llm_type = "acme-llm"
+    class MockOpenAI(ChatOpenAI):
+        def _stream(
+            self,
+            messages: List[BaseMessage],
+            stop: Optional[List[str]] = None,
+            run_manager: Optional[CallbackManagerForLLMRun] = None,
+            **kwargs: Any,
+        ) -> Iterator[ChatGenerationChunk]:
+            stream_result_mock = Mock(side_effect=ValueError("API rate limit error"))
+
+            for x in stream_result_mock():
+                yield x
+
+        @property
+        def _llm_type(self) -> str:
+            return "acme-llm"
 
     sentry_init(
         integrations=[LangchainIntegration(include_prompts=True)],
@@ -1547,8 +1553,6 @@ def test_span_status_error(sentry_init, capture_events):
                 MessagesPlaceholder(variable_name="agent_scratchpad"),
             ]
         )
-        global stream_result_mock
-        stream_result_mock = Mock(side_effect=ValueError("API rate limit error"))
         llm = MockOpenAI(
             model_name="gpt-3.5-turbo",
             temperature=0,
@@ -1781,8 +1785,32 @@ def test_langchain_callback_list_existing_callback(sentry_init):
 
 def test_langchain_message_role_mapping(sentry_init, capture_events):
     """Test that message roles are properly normalized in langchain integration."""
-    global llm_type
-    llm_type = "openai-chat"
+
+    class MockOpenAI(ChatOpenAI):
+        def _stream(
+            self,
+            messages: List[BaseMessage],
+            stop: Optional[List[str]] = None,
+            run_manager: Optional[CallbackManagerForLLMRun] = None,
+            **kwargs: Any,
+        ) -> Iterator[ChatGenerationChunk]:
+            stream_result_mock = Mock(
+                side_effect=[
+                    [
+                        ChatGenerationChunk(
+                            type="ChatGenerationChunk",
+                            message=AIMessageChunk(content="Test response"),
+                        ),
+                    ]
+                ]
+            )
+
+            for x in stream_result_mock():
+                yield x
+
+        @property
+        def _llm_type(self) -> str:
+            return "openai-chat"
 
     sentry_init(
         integrations=[LangchainIntegration(include_prompts=True)],
@@ -1799,18 +1827,6 @@ def test_langchain_message_role_mapping(sentry_init, capture_events):
         ]
     )
 
-    global stream_result_mock
-    stream_result_mock = Mock(
-        side_effect=[
-            [
-                ChatGenerationChunk(
-                    type="ChatGenerationChunk",
-                    message=AIMessageChunk(content="Test response"),
-                ),
-            ]
-        ]
-    )
-
     llm = MockOpenAI(
         model_name="gpt-3.5-turbo",
         temperature=0,

From 7886629e3e1d240ab43f146063ca36b4d6c4ec3b Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Wed, 13 May 2026 07:46:45 +0200
Subject: [PATCH 70/84] add parameterization

---
 tests/integrations/langchain/test_langchain.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py
index 22b11e83aa..be19e9a790 100644
--- a/tests/integrations/langchain/test_langchain.py
+++ b/tests/integrations/langchain/test_langchain.py
@@ -237,6 +237,7 @@ def get_word_length(word: str) -> int:
     return len(word)
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 def test_langchain_text_completion(
     sentry_init,
     capture_events,

From b7811723c19ecd7b916614ac63c8451f3b3aeef7 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Wed, 13 May 2026 07:54:11 +0200
Subject: [PATCH 71/84] restore langgraph test

---
 .../integrations/langgraph/test_langgraph.py  | 168 +++++++++++++-----
 1 file changed, 127 insertions(+), 41 deletions(-)

diff --git a/tests/integrations/langgraph/test_langgraph.py b/tests/integrations/langgraph/test_langgraph.py
index c7032b009b..80a20fb617 100644
--- a/tests/integrations/langgraph/test_langgraph.py
+++ b/tests/integrations/langgraph/test_langgraph.py
@@ -242,6 +242,7 @@ def original_compile(self, *args, **kwargs):
         assert "calculator" in tools_data
 
 
+@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.parametrize(
     "send_default_pii, include_prompts",
     [
@@ -251,14 +252,21 @@ def original_compile(self, *args, **kwargs):
         (False, False),
     ],
 )
-def test_pregel_invoke(sentry_init, capture_events, send_default_pii, include_prompts):
+def test_pregel_invoke(
+    sentry_init,
+    capture_events,
+    capture_items,
+    send_default_pii,
+    include_prompts,
+    stream_gen_ai_spans,
+):
     """Test Pregel.invoke() wrapper creates proper invoke_agent span."""
     sentry_init(
         integrations=[LanggraphIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
+        stream_gen_ai_spans=stream_gen_ai_spans,
     )
-    events = capture_events()
 
     test_state = {
         "messages": [
@@ -289,57 +297,135 @@ def original_invoke(self, *args, **kwargs):
         ]
         return {"messages": new_messages}
 
-    with start_transaction():
-        wrapped_invoke = _wrap_pregel_invoke(original_invoke)
-        result = wrapped_invoke(pregel, test_state)
+    if stream_gen_ai_spans:
+        items = capture_items("transaction", "span")
 
-    assert result is not None
+        with start_transaction():
+            wrapped_invoke = _wrap_pregel_invoke(original_invoke)
+            result = wrapped_invoke(pregel, test_state)
 
-    tx = events[0]
-    assert tx["type"] == "transaction"
+        assert result is not None
 
-    invoke_spans = [
-        span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT
-    ]
-    assert len(invoke_spans) == 1
+        spans = [item.payload for item in items if item.type == "span"]
+        invoke_spans = [
+            span
+            for span in spans
+            if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
+        ]
 
-    invoke_span = invoke_spans[0]
-    assert invoke_span["description"] == "invoke_agent test_graph"
-    assert invoke_span["origin"] == "auto.ai.langgraph"
-    assert invoke_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "invoke_agent"
-    assert invoke_span["data"][SPANDATA.GEN_AI_PIPELINE_NAME] == "test_graph"
-    assert invoke_span["data"][SPANDATA.GEN_AI_AGENT_NAME] == "test_graph"
-
-    if send_default_pii and include_prompts:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["data"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT in invoke_span["data"]
+        assert len(invoke_spans) == 1
 
-        request_messages = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        invoke_span = invoke_spans[0]
 
-        if isinstance(request_messages, str):
-            import json
+        assert invoke_span["name"] == "invoke_agent test_graph"
+        assert invoke_span["attributes"]["sentry.origin"] == "auto.ai.langgraph"
+        assert (
+            invoke_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "invoke_agent"
+        )
+        assert invoke_span["attributes"][SPANDATA.GEN_AI_PIPELINE_NAME] == "test_graph"
+        assert invoke_span["attributes"][SPANDATA.GEN_AI_AGENT_NAME] == "test_graph"
 
-            request_messages = json.loads(request_messages)
-        assert len(request_messages) == 1
-        assert request_messages[0]["content"] == "Of course! How can I assist you?"
+        if send_default_pii and include_prompts:
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["attributes"]
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT in invoke_span["attributes"]
 
-        response_text = invoke_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
-        assert response_text == expected_assistant_response
+            request_messages = invoke_span["attributes"][
+                SPANDATA.GEN_AI_REQUEST_MESSAGES
+            ]
 
-        assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in invoke_span["data"]
-        tool_calls_data = invoke_span["data"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS]
-        if isinstance(tool_calls_data, str):
-            import json
+            if isinstance(request_messages, str):
+                import json
+
+                request_messages = json.loads(request_messages)
+            assert len(request_messages) == 1
+            assert request_messages[0]["content"] == "Hello, can you help me?"
+            assert request_messages[1]["content"] == "Of course! How can I assist you?"
+
+            response_text = invoke_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+            assert response_text == expected_assistant_response
+
+            assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in invoke_span["attributes"]
+            tool_calls_data = invoke_span["attributes"][
+                SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS
+            ]
+
+            if isinstance(tool_calls_data, str):
+                import json
 
-            tool_calls_data = json.loads(tool_calls_data)
+                tool_calls_data = json.loads(tool_calls_data)
 
-        assert len(tool_calls_data) == 1
-        assert tool_calls_data[0]["id"] == "call_test_123"
-        assert tool_calls_data[0]["function"]["name"] == "search_tool"
+            assert len(tool_calls_data) == 1
+            assert tool_calls_data[0]["id"] == "call_test_123"
+            assert tool_calls_data[0]["function"]["name"] == "search_tool"
+        else:
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in invoke_span.get(
+                "attributes", {}
+            )
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT not in invoke_span.get(
+                "attributes", {}
+            )
+            assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in invoke_span.get(
+                "attributes", {}
+            )
     else:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in invoke_span.get("data", {})
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in invoke_span.get("data", {})
-        assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in invoke_span.get("data", {})
+        events = capture_events()
+
+        with start_transaction():
+            wrapped_invoke = _wrap_pregel_invoke(original_invoke)
+            result = wrapped_invoke(pregel, test_state)
+
+        assert result is not None
+
+        tx = events[0]
+        assert tx["type"] == "transaction"
+
+        invoke_spans = [
+            span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT
+        ]
+
+        assert len(invoke_spans) == 1
+
+        invoke_span = invoke_spans[0]
+
+        assert invoke_span["description"] == "invoke_agent test_graph"
+        assert invoke_span["origin"] == "auto.ai.langgraph"
+        assert invoke_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "invoke_agent"
+        assert invoke_span["data"][SPANDATA.GEN_AI_PIPELINE_NAME] == "test_graph"
+        assert invoke_span["data"][SPANDATA.GEN_AI_AGENT_NAME] == "test_graph"
+
+        if send_default_pii and include_prompts:
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["data"]
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT in invoke_span["data"]
+
+            request_messages = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+
+            if isinstance(request_messages, str):
+                import json
+
+                request_messages = json.loads(request_messages)
+            assert len(request_messages) == 1
+            assert request_messages[0]["content"] == "Of course! How can I assist you?"
+
+            response_text = invoke_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+            assert response_text == expected_assistant_response
+
+            assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in invoke_span["data"]
+            tool_calls_data = invoke_span["data"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS]
+
+            if isinstance(tool_calls_data, str):
+                import json
+
+                tool_calls_data = json.loads(tool_calls_data)
+
+            assert len(tool_calls_data) == 1
+            assert tool_calls_data[0]["id"] == "call_test_123"
+            assert tool_calls_data[0]["function"]["name"] == "search_tool"
+        else:
+            assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in invoke_span.get("data", {})
+            assert SPANDATA.GEN_AI_RESPONSE_TEXT not in invoke_span.get("data", {})
+            assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in invoke_span.get(
+                "data", {}
+            )
 
 
 @pytest.mark.parametrize("stream_gen_ai_spans", [True, False])

From b618cc8be08ea91f7c01cf21d36c52d490da663a Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Wed, 13 May 2026 08:13:01 +0200
Subject: [PATCH 72/84] update test

---
 tests/integrations/langgraph/test_langgraph.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integrations/langgraph/test_langgraph.py b/tests/integrations/langgraph/test_langgraph.py
index 80a20fb617..f308127276 100644
--- a/tests/integrations/langgraph/test_langgraph.py
+++ b/tests/integrations/langgraph/test_langgraph.py
@@ -337,7 +337,7 @@ def original_invoke(self, *args, **kwargs):
                 import json
 
                 request_messages = json.loads(request_messages)
-            assert len(request_messages) == 1
+            assert len(request_messages) == 2
             assert request_messages[0]["content"] == "Hello, can you help me?"
             assert request_messages[1]["content"] == "Of course! How can I assist you?"
 

From e85dffe2ab7c7495170f998817f55dba3c737a04 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Wed, 13 May 2026 09:58:22 +0200
Subject: [PATCH 73/84] remove None conversion

---
 sentry_sdk/client.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py
index d198b7f854..39d8a4dea4 100644
--- a/sentry_sdk/client.py
+++ b/sentry_sdk/client.py
@@ -149,12 +149,6 @@ def _serialized_v1_attribute_to_serialized_v2_attribute(
             "type": "string",
         }
 
-    if attribute_value is None:
-        return {
-            "value": "None",
-            "type": "string",
-        }
-
     return None
 
 

From f8f98c16f38abba9dd0d0a042c054636c17ea303 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Wed, 13 May 2026 10:01:57 +0200
Subject: [PATCH 74/84] update test with None attribute assertion

---
 tests/integrations/openai_agents/test_openai_agents.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py
index 9cdb3ea6f8..bf44562b14 100644
--- a/tests/integrations/openai_agents/test_openai_agents.py
+++ b/tests/integrations/openai_agents/test_openai_agents.py
@@ -3140,7 +3140,6 @@ async def test_mcp_tool_execution_with_error(
         assert mcp_tool_span["name"] == "execute_tool failing_mcp_tool"
         assert mcp_tool_span["attributes"]["gen_ai.tool.name"] == "failing_mcp_tool"
         assert mcp_tool_span["attributes"]["gen_ai.tool.input"] == '{"query": "test"}'
-        assert mcp_tool_span["attributes"]["gen_ai.tool.output"] == "None"
 
         # Verify error status was set
         assert mcp_tool_span["status"] == "error"

From b46fd5f087f1b0203b054b675f45b27742ad6bd7 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Wed, 13 May 2026 10:06:41 +0200
Subject: [PATCH 75/84] mostly whitespace test cleanup

---
 .../integrations/anthropic/test_anthropic.py  |   3 -
 .../google_genai/test_google_genai.py         |  29 +-
 .../huggingface_hub/test_huggingface_hub.py   |   1 -
 .../integrations/langchain/test_langchain.py  |  49 --
 .../integrations/langgraph/test_langgraph.py  |  35 --
 tests/integrations/litellm/test_litellm.py    |  27 -
 tests/integrations/openai/test_openai.py      | 487 +++++++++++-------
 .../pydantic_ai/test_pydantic_ai.py           |  25 +-
 8 files changed, 319 insertions(+), 337 deletions(-)

diff --git a/tests/integrations/anthropic/test_anthropic.py b/tests/integrations/anthropic/test_anthropic.py
index f9f6241997..31f487aef2 100644
--- a/tests/integrations/anthropic/test_anthropic.py
+++ b/tests/integrations/anthropic/test_anthropic.py
@@ -3675,7 +3675,6 @@ def test_anthropic_message_truncation(
         assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["attributes"]
 
         messages_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-
         assert isinstance(messages_data, str)
 
         parsed_messages = json.loads(messages_data)
@@ -3760,7 +3759,6 @@ async def test_anthropic_message_truncation_async(
             for span in spans
             if span["attributes"].get("sentry.op") == OP.GEN_AI_CHAT
         ]
-
         assert len(chat_spans) > 0
 
         chat_span = chat_spans[0]
@@ -3794,7 +3792,6 @@ async def test_anthropic_message_truncation_async(
         chat_spans = [
             span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT
         ]
-
         assert len(chat_spans) > 0
 
         chat_span = chat_spans[0]
diff --git a/tests/integrations/google_genai/test_google_genai.py b/tests/integrations/google_genai/test_google_genai.py
index 94bfea91fd..79318eaea5 100644
--- a/tests/integrations/google_genai/test_google_genai.py
+++ b/tests/integrations/google_genai/test_google_genai.py
@@ -427,7 +427,6 @@ def get_weather(location: str) -> str:
         tools_data_str = invoke_span["attributes"][
             SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS
         ]
-
         # Parse the JSON string to verify content
         tools_data = json.loads(tools_data_str)
         assert len(tools_data) == 2
@@ -452,7 +451,6 @@ def get_weather(location: str) -> str:
 
         # Check that tools are recorded (data is serialized as a string)
         tools_data_str = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS]
-
         # Parse the JSON string to verify content
         tools_data = json.loads(tools_data_str)
         assert len(tools_data) == 2
@@ -701,14 +699,10 @@ def test_streaming_generate_content(
         # Verify accumulated response text (all chunks combined)
         expected_full_text = "Hello! How can I help you today?"
         # Response text is stored as a JSON string
-        if stream_gen_ai_spans:
-            chat_response_text = json.loads(
-                chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
-            )
-        else:
-            chat_response_text = json.loads(
-                chat_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
-            )
+        chat_response_text = json.loads(
+            chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+        )
+
         assert chat_response_text == [expected_full_text]
 
         # Verify finish reasons (only the final chunk has a finish reason)
@@ -762,14 +756,9 @@ def test_streaming_generate_content(
         # Verify accumulated response text (all chunks combined)
         expected_full_text = "Hello! How can I help you today?"
         # Response text is stored as a JSON string
-        if stream_gen_ai_spans:
-            chat_response_text = json.loads(
-                chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
-            )
-        else:
-            chat_response_text = json.loads(
-                chat_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
-            )
+        chat_response_text = json.loads(
+            chat_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+        )
         assert chat_response_text == [expected_full_text]
 
         # Verify finish reasons (only the final chunk has a finish reason)
@@ -1592,7 +1581,6 @@ def test_embed_content(
         (event,) = events
 
         assert event["type"] == "transaction"
-
         assert event["transaction"] == "google_genai_embeddings"
 
         # Should have 1 span for embeddings
@@ -1697,7 +1685,6 @@ def test_embed_content_string_input(
 
         # Check that single string is handled correctly
         input_texts = json.loads(embed_span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT])
-
         assert input_texts == ["Single text input"]
         # Should use token_count from statistics (5), not billable_character_count (10)
         # Note: Only available in newer versions with ContentEmbeddingStatistics
@@ -2064,7 +2051,6 @@ async def test_async_embed_content_string_input(
         input_texts = json.loads(
             embed_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
         )
-
         assert input_texts == ["Single text input"]
         # Should use token_count from statistics (5), not billable_character_count (10)
         # Note: Only available in newer versions with ContentEmbeddingStatistics
@@ -2088,7 +2074,6 @@ async def test_async_embed_content_string_input(
 
         # Check that single string is handled correctly
         input_texts = json.loads(embed_span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT])
-
         assert input_texts == ["Single text input"]
         # Should use token_count from statistics (5), not billable_character_count (10)
         # Note: Only available in newer versions with ContentEmbeddingStatistics
diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py
index dc9d7925ff..5417cec250 100644
--- a/tests/integrations/huggingface_hub/test_huggingface_hub.py
+++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py
@@ -938,7 +938,6 @@ def test_chat_completion_streaming(
             "thread.id": mock.ANY,
             "thread.name": mock.ANY,
         }
-
         # usage is not available in older versions of the library
         if HF_VERSION and HF_VERSION >= (0, 26, 0):
             expected_data["gen_ai.usage.input_tokens"] = 183
diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py
index be19e9a790..79ecc7e96b 100644
--- a/tests/integrations/langchain/test_langchain.py
+++ b/tests/integrations/langchain/test_langchain.py
@@ -305,11 +305,9 @@ def test_langchain_text_completion(
             for span in spans
             if span["attributes"].get("sentry.op") == "gen_ai.text_completion"
         ]
-
         assert len(llm_spans) > 0
 
         llm_span = llm_spans[0]
-
         assert llm_span["name"] == "text_completion gpt-3.5-turbo"
         assert llm_span["attributes"]["gen_ai.system"] == "openai"
         assert llm_span["attributes"]["gen_ai.function_id"] == "my-snazzy-pipeline"
@@ -340,11 +338,9 @@ def test_langchain_text_completion(
             for span in tx.get("spans", [])
             if span.get("op") == "gen_ai.text_completion"
         ]
-
         assert len(llm_spans) > 0
 
         llm_span = llm_spans[0]
-
         assert llm_span["description"] == "text_completion gpt-3.5-turbo"
         assert llm_span["data"]["gen_ai.system"] == "openai"
         assert llm_span["data"]["gen_ai.function_id"] == "my-snazzy-pipeline"
@@ -587,7 +583,6 @@ def test_langchain_create_agent(
             )
 
         tx = next(item.payload for item in items if item.type == "transaction")
-
         assert tx["type"] == "transaction"
         assert tx["contexts"]["trace"]["origin"] == "manual"
 
@@ -662,7 +657,6 @@ def test_langchain_create_agent(
             )
 
         tx = events[0]
-
         assert tx["type"] == "transaction"
         assert tx["contexts"]["trace"]["origin"] == "manual"
 
@@ -826,7 +820,6 @@ def test_tool_execution_span(
             )
 
         tx = next(item.payload for item in items if item.type == "transaction")
-
         assert tx["type"] == "transaction"
         assert tx["contexts"]["trace"]["origin"] == "manual"
 
@@ -839,7 +832,6 @@ def test_tool_execution_span(
         tool_exec_spans = list(
             x for x in spans if x["attributes"]["sentry.op"] == "gen_ai.execute_tool"
         )
-
         assert len(tool_exec_spans) == 1
         tool_exec_span = tool_exec_spans[0]
 
@@ -934,13 +926,11 @@ def test_tool_execution_span(
             )
 
         tx = events[0]
-
         assert tx["type"] == "transaction"
         assert tx["contexts"]["trace"]["origin"] == "manual"
 
         chat_spans = list(x for x in tx["spans"] if x["op"] == "gen_ai.chat")
         assert len(chat_spans) == 2
-
         tool_exec_spans = list(
             x for x in tx["spans"] if x["op"] == "gen_ai.execute_tool"
         )
@@ -1091,7 +1081,6 @@ def test_langchain_openai_tools_agent_no_prompts(
             )
 
         tx = next(item.payload for item in items if item.type == "transaction")
-
         assert tx["type"] == "transaction"
         assert tx["contexts"]["trace"]["origin"] == "manual"
 
@@ -1200,7 +1189,6 @@ def test_langchain_openai_tools_agent_no_prompts(
             )
 
         tx = events[0]
-
         assert tx["type"] == "transaction"
         assert tx["contexts"]["trace"]["origin"] == "manual"
 
@@ -1358,7 +1346,6 @@ def test_langchain_openai_tools_agent(
             list(agent_executor.stream({"input": "How many letters in the word eudca"}))
 
         tx = next(item.payload for item in items if item.type == "transaction")
-
         assert tx["type"] == "transaction"
         assert tx["contexts"]["trace"]["origin"] == "manual"
 
@@ -1474,7 +1461,6 @@ def test_langchain_openai_tools_agent(
             list(agent_executor.stream({"input": "How many letters in the word eudca"}))
 
         tx = events[0]
-
         assert tx["type"] == "transaction"
         assert tx["contexts"]["trace"]["origin"] == "manual"
 
@@ -1641,7 +1627,6 @@ def test_langchain_openai_tools_agent_with_config(
             )
 
         tx = next(item.payload for item in items if item.type == "transaction")
-
         assert tx["type"] == "transaction"
         assert tx["contexts"]["trace"]["origin"] == "manual"
 
@@ -1668,7 +1653,6 @@ def test_langchain_openai_tools_agent_with_config(
             )
 
         tx = events[0]
-
         assert tx["type"] == "transaction"
         assert tx["contexts"]["trace"]["origin"] == "manual"
 
@@ -1761,7 +1745,6 @@ def test_langchain_openai_tools_agent_stream_no_prompts(
             )
 
         tx = next(item.payload for item in items if item.type == "transaction")
-
         assert tx["type"] == "transaction"
         assert tx["contexts"]["trace"]["origin"] == "manual"
 
@@ -1872,7 +1855,6 @@ def test_langchain_openai_tools_agent_stream_no_prompts(
             )
 
         tx = events[0]
-
         assert tx["type"] == "transaction"
         assert tx["contexts"]["trace"]["origin"] == "manual"
 
@@ -2035,7 +2017,6 @@ def test_langchain_openai_tools_agent_stream(
             )
 
         tx = next(item.payload for item in items if item.type == "transaction")
-
         assert tx["type"] == "transaction"
         assert tx["contexts"]["trace"]["origin"] == "manual"
 
@@ -2161,7 +2142,6 @@ def test_langchain_openai_tools_agent_stream(
             )
 
         tx = events[0]
-
         assert tx["type"] == "transaction"
         assert tx["contexts"]["trace"]["origin"] == "manual"
 
@@ -2329,7 +2309,6 @@ def test_langchain_openai_tools_agent_stream_with_config(
             )
 
         tx = next(item.payload for item in items if item.type == "transaction")
-
         assert tx["type"] == "transaction"
         assert tx["contexts"]["trace"]["origin"] == "manual"
 
@@ -2356,7 +2335,6 @@ def test_langchain_openai_tools_agent_stream_with_config(
             )
 
         tx = events[0]
-
         assert tx["type"] == "transaction"
         assert tx["contexts"]["trace"]["origin"] == "manual"
 
@@ -2496,10 +2474,8 @@ def _llm_type(self) -> str:
 
         (error,) = (item.payload for item in items if item.type == "event")
         assert error["level"] == "error"
-
         spans = [item.payload for item in items if item.type == "span"]
         assert spans[0]["status"] == "error"
-
         (transaction,) = (item.payload for item in items if item.type == "transaction")
     else:
         events = capture_events()
@@ -3164,7 +3140,6 @@ def test_langchain_embeddings_sync(
             for span in spans
             if span["attributes"].get("sentry.op") == "gen_ai.embeddings"
         ]
-
         assert len(embeddings_spans) == 1
 
         embeddings_span = embeddings_spans[0]
@@ -3226,7 +3201,6 @@ def test_langchain_embeddings_sync(
             for span in tx.get("spans", [])
             if span.get("op") == "gen_ai.embeddings"
         ]
-
         assert len(embeddings_spans) == 1
 
         embeddings_span = embeddings_spans[0]
@@ -3311,11 +3285,9 @@ def test_langchain_embeddings_embed_query(
             for span in spans
             if span["attributes"].get("sentry.op") == "gen_ai.embeddings"
         ]
-
         assert len(embeddings_spans) == 1
 
         embeddings_span = embeddings_spans[0]
-
         assert embeddings_span["attributes"]["gen_ai.operation.name"] == "embeddings"
         assert (
             embeddings_span["attributes"]["gen_ai.request.model"]
@@ -3369,11 +3341,9 @@ def test_langchain_embeddings_embed_query(
             for span in tx.get("spans", [])
             if span.get("op") == "gen_ai.embeddings"
         ]
-
         assert len(embeddings_spans) == 1
 
         embeddings_span = embeddings_spans[0]
-
         assert embeddings_span["data"]["gen_ai.operation.name"] == "embeddings"
         assert (
             embeddings_span["data"]["gen_ai.request.model"] == "text-embedding-ada-002"
@@ -3458,7 +3428,6 @@ async def mock_aembed_documents(self, texts):
             for span in spans
             if span["attributes"].get("sentry.op") == "gen_ai.embeddings"
         ]
-
         assert len(embeddings_spans) == 1
 
         embeddings_span = embeddings_spans[0]
@@ -3524,7 +3493,6 @@ async def mock_aembed_documents(self, texts):
             for span in tx.get("spans", [])
             if span.get("op") == "gen_ai.embeddings"
         ]
-
         assert len(embeddings_spans) == 1
 
         embeddings_span = embeddings_spans[0]
@@ -3607,7 +3575,6 @@ async def mock_aembed_query(self, text):
             for span in spans
             if span["attributes"].get("sentry.op") == "gen_ai.embeddings"
         ]
-
         assert len(embeddings_spans) == 1
 
         embeddings_span = embeddings_spans[0]
@@ -3653,7 +3620,6 @@ async def mock_aembed_query(self, text):
             for span in tx.get("spans", [])
             if span.get("op") == "gen_ai.embeddings"
         ]
-
         assert len(embeddings_spans) == 1
 
         embeddings_span = embeddings_spans[0]
@@ -3719,7 +3685,6 @@ def test_langchain_embeddings_no_model_name(
             for span in spans
             if span["attributes"].get("sentry.op") == "gen_ai.embeddings"
         ]
-
         assert len(embeddings_spans) == 1
 
         embeddings_span = embeddings_spans[0]
@@ -3762,7 +3727,6 @@ def test_langchain_embeddings_no_model_name(
             for span in tx.get("spans", [])
             if span.get("op") == "gen_ai.embeddings"
         ]
-
         assert len(embeddings_spans) == 1
 
         embeddings_span = embeddings_spans[0]
@@ -3900,7 +3864,6 @@ def test_langchain_embeddings_multiple_providers(
             for span in spans
             if span["attributes"].get("sentry.op") == "gen_ai.embeddings"
         ]
-
         # Should have 2 spans, one for each provider
         assert len(embeddings_spans) == 2
 
@@ -3951,7 +3914,6 @@ def test_langchain_embeddings_multiple_providers(
             for span in tx.get("spans", [])
             if span.get("op") == "gen_ai.embeddings"
         ]
-
         # Should have 2 spans, one for each provider
         assert len(embeddings_spans) == 2
 
@@ -4054,7 +4016,6 @@ def test_langchain_embeddings_multiple_calls(
             for span in spans
             if span["attributes"].get("sentry.op") == "gen_ai.embeddings"
         ]
-
         assert len(embeddings_spans) == 3
 
         # Verify all spans have proper data
@@ -4109,7 +4070,6 @@ def test_langchain_embeddings_multiple_calls(
             for span in tx.get("spans", [])
             if span.get("op") == "gen_ai.embeddings"
         ]
-
         assert len(embeddings_spans) == 3
 
         # Verify all spans have proper data
@@ -4173,9 +4133,7 @@ def test_langchain_embeddings_span_hierarchy(
             for span in spans
             if span["attributes"].get("sentry.op") == "gen_ai.embeddings"
         ]
-
         tx = next(item.payload for item in items if item.type == "transaction")
-
         custom_spans = [
             span for span in tx.get("spans", []) if span.get("op") == "custom"
         ]
@@ -4220,7 +4178,6 @@ def test_langchain_embeddings_span_hierarchy(
             for span in tx.get("spans", [])
             if span.get("op") == "gen_ai.embeddings"
         ]
-
         custom_spans = [
             span for span in tx.get("spans", []) if span.get("op") == "custom"
         ]
@@ -4290,7 +4247,6 @@ def test_langchain_embeddings_with_list_and_string_inputs(
             for span in spans
             if span["attributes"].get("sentry.op") == "gen_ai.embeddings"
         ]
-
         assert len(embeddings_spans) == 2
 
         # Both should have input data captured as lists
@@ -4342,7 +4298,6 @@ def test_langchain_embeddings_with_list_and_string_inputs(
             for span in tx.get("spans", [])
             if span.get("op") == "gen_ai.embeddings"
         ]
-
         assert len(embeddings_spans) == 2
 
         # Both should have input data captured as lists
@@ -4412,11 +4367,9 @@ def test_langchain_response_model_extraction(
             for span in spans
             if span["attributes"].get("sentry.op") == "gen_ai.text_completion"
         ]
-
         assert len(llm_spans) > 0
 
         llm_span = llm_spans[0]
-
         assert llm_span["attributes"]["gen_ai.operation.name"] == "text_completion"
 
         if expected_model is not None:
@@ -4455,11 +4408,9 @@ def test_langchain_response_model_extraction(
             for span in tx.get("spans", [])
             if span.get("op") == "gen_ai.text_completion"
         ]
-
         assert len(llm_spans) > 0
 
         llm_span = llm_spans[0]
-
         assert llm_span["data"]["gen_ai.operation.name"] == "text_completion"
 
         if expected_model is not None:
diff --git a/tests/integrations/langgraph/test_langgraph.py b/tests/integrations/langgraph/test_langgraph.py
index 6dd5c3cace..f8df60739f 100644
--- a/tests/integrations/langgraph/test_langgraph.py
+++ b/tests/integrations/langgraph/test_langgraph.py
@@ -186,7 +186,6 @@ def original_compile(self, *args, **kwargs):
             for span in spans
             if span["attributes"]["sentry.op"] == OP.GEN_AI_CREATE_AGENT
         ]
-
         assert len(agent_spans) == 1
         agent_span = agent_spans[0]
 
@@ -200,7 +199,6 @@ def original_compile(self, *args, **kwargs):
         assert SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS in agent_span["attributes"]
 
         tools_data = agent_span["attributes"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS]
-
         assert tools_data == ["search_tool", "calculator"]
         assert len(tools_data) == 2
         assert "search_tool" in tools_data
@@ -223,7 +221,6 @@ def original_compile(self, *args, **kwargs):
         agent_spans = [
             span for span in tx["spans"] if span["op"] == OP.GEN_AI_CREATE_AGENT
         ]
-
         assert len(agent_spans) == 1
         agent_span = agent_spans[0]
 
@@ -312,11 +309,9 @@ def original_invoke(self, *args, **kwargs):
             for span in spans
             if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
         ]
-
         assert len(invoke_spans) == 1
 
         invoke_span = invoke_spans[0]
-
         assert invoke_span["name"] == "invoke_agent test_graph"
         assert invoke_span["attributes"]["sentry.origin"] == "auto.ai.langgraph"
         assert (
@@ -381,11 +376,9 @@ def original_invoke(self, *args, **kwargs):
         invoke_spans = [
             span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT
         ]
-
         assert len(invoke_spans) == 1
 
         invoke_span = invoke_spans[0]
-
         assert invoke_span["description"] == "invoke_agent test_graph"
         assert invoke_span["origin"] == "auto.ai.langgraph"
         assert invoke_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "invoke_agent"
@@ -494,11 +487,9 @@ async def run_test():
             for span in spans
             if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
         ]
-
         assert len(invoke_spans) == 1
 
         invoke_span = invoke_spans[0]
-
         assert invoke_span["name"] == "invoke_agent async_graph"
         assert invoke_span["attributes"]["sentry.origin"] == "auto.ai.langgraph"
         assert (
@@ -549,11 +540,9 @@ async def run_test():
         invoke_spans = [
             span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT
         ]
-
         assert len(invoke_spans) == 1
 
         invoke_span = invoke_spans[0]
-
         assert invoke_span["description"] == "invoke_agent async_graph"
         assert invoke_span["origin"] == "auto.ai.langgraph"
         assert invoke_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "invoke_agent"
@@ -622,7 +611,6 @@ def original_invoke(self, *args, **kwargs):
             for span in spans
             if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
         ]
-
         assert len(invoke_spans) == 1
 
         invoke_span = invoke_spans[0]
@@ -640,7 +628,6 @@ def original_invoke(self, *args, **kwargs):
         invoke_spans = [
             span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT
         ]
-
         assert len(invoke_spans) == 1
 
         invoke_span = invoke_spans[0]
@@ -687,7 +674,6 @@ async def run_error_test():
             for span in spans
             if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
         ]
-
         assert len(invoke_spans) == 1
 
         invoke_span = invoke_spans[0]
@@ -701,7 +687,6 @@ async def run_error_test():
         invoke_spans = [
             span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT
         ]
-
         assert len(invoke_spans) == 1
 
         invoke_span = invoke_spans[0]
@@ -797,7 +782,6 @@ def original_invoke(self, *args, **kwargs):
             for span in spans
             if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
         ]
-
         assert len(invoke_spans) == 1
 
         invoke_span = invoke_spans[0]
@@ -825,7 +809,6 @@ def original_invoke(self, *args, **kwargs):
         invoke_spans = [
             span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT
         ]
-
         assert len(invoke_spans) == 1
 
         invoke_span = invoke_spans[0]
@@ -909,7 +892,6 @@ def original_invoke(self, *args, **kwargs):
             for span in spans
             if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
         ]
-
         assert len(invoke_spans) == 1
 
         invoke_agent_span = invoke_spans[0]
@@ -939,7 +921,6 @@ def original_invoke(self, *args, **kwargs):
         invoke_spans = [
             span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT
         ]
-
         assert len(invoke_spans) == 1
 
         invoke_agent_span = invoke_spans[0]
@@ -1028,7 +1009,6 @@ async def run_test():
             for span in spans
             if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
         ]
-
         assert len(invoke_spans) == 1
 
         invoke_agent_span = invoke_spans[0]
@@ -1055,7 +1035,6 @@ async def run_test():
         invoke_spans = [
             span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT
         ]
-
         assert len(invoke_spans) == 1
 
         invoke_agent_span = invoke_spans[0]
@@ -1152,7 +1131,6 @@ def original_invoke(self, *args, **kwargs):
             for span in spans
             if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
         ]
-
         assert len(invoke_spans) == 1
         invoke_agent_span = invoke_spans[0]
 
@@ -1176,7 +1154,6 @@ def original_invoke(self, *args, **kwargs):
         invoke_spans = [
             span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT
         ]
-
         assert len(invoke_spans) == 1
         invoke_agent_span = invoke_spans[0]
 
@@ -1270,7 +1247,6 @@ async def run_test():
             for span in spans
             if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
         ]
-
         assert len(invoke_spans) == 1
         invoke_agent_span = invoke_spans[0]
 
@@ -1291,7 +1267,6 @@ async def run_test():
         invoke_spans = [
             span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT
         ]
-
         assert len(invoke_spans) == 1
         invoke_agent_span = invoke_spans[0]
 
@@ -1371,7 +1346,6 @@ def original_invoke(self, *args, **kwargs):
             for span in spans
             if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
         ]
-
         assert len(invoke_spans) == 1
 
         invoke_agent_span = invoke_spans[0]
@@ -1398,7 +1372,6 @@ def original_invoke(self, *args, **kwargs):
         invoke_spans = [
             span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT
         ]
-
         assert len(invoke_spans) == 1
 
         invoke_agent_span = invoke_spans[0]
@@ -1483,7 +1456,6 @@ async def run_test():
             for span in spans
             if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
         ]
-
         assert len(invoke_spans) == 1
 
         invoke_agent_span = invoke_spans[0]
@@ -1507,7 +1479,6 @@ async def run_test():
         invoke_spans = [
             span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT
         ]
-
         assert len(invoke_spans) == 1
 
         invoke_agent_span = invoke_spans[0]
@@ -1602,7 +1573,6 @@ def original_invoke(self, *args, **kwargs):
             for span in spans
             if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
         ]
-
         assert len(invoke_spans) == 1
 
         invoke_agent_span = invoke_spans[0]
@@ -1628,7 +1598,6 @@ def original_invoke(self, *args, **kwargs):
         invoke_spans = [
             span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT
         ]
-
         assert len(invoke_spans) == 1
 
         invoke_agent_span = invoke_spans[0]
@@ -1725,7 +1694,6 @@ async def run_test():
             for span in spans
             if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
         ]
-
         assert len(invoke_spans) == 1
 
         invoke_agent_span = invoke_spans[0]
@@ -1748,7 +1716,6 @@ async def run_test():
         invoke_spans = [
             span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT
         ]
-
         assert len(invoke_spans) == 1
 
         invoke_agent_span = invoke_spans[0]
@@ -1870,7 +1837,6 @@ def original_invoke(self, *args, **kwargs):
             for span in spans
             if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT
         ]
-
         assert len(invoke_spans) == 1
 
         invoke_span = invoke_spans[0]
@@ -1895,7 +1861,6 @@ def original_invoke(self, *args, **kwargs):
         invoke_spans = [
             span for span in tx["spans"] if span["op"] == OP.GEN_AI_INVOKE_AGENT
         ]
-
         assert len(invoke_spans) == 1
 
         invoke_span = invoke_spans[0]
diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py
index a0120cd7b5..22663f9472 100644
--- a/tests/integrations/litellm/test_litellm.py
+++ b/tests/integrations/litellm/test_litellm.py
@@ -688,7 +688,6 @@ def test_embeddings_create(
 
             # Response is processed by litellm, so just check it exists
             assert response is not None
-
             spans = [item.payload for item in items if item.type == "span"]
             spans = list(
                 x
@@ -710,7 +709,6 @@ def test_embeddings_create(
             )
             # Check that embeddings input is captured (it's JSON serialized)
             embeddings_input = span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
-
             assert json.loads(embeddings_input) == ["Hello, world!"]
     else:
         events = capture_events()
@@ -731,7 +729,6 @@ def test_embeddings_create(
 
             # Response is processed by litellm, so just check it exists
             assert response is not None
-
             assert len(events) == 1
             (event,) = events
 
@@ -808,7 +805,6 @@ async def test_async_embeddings_create(
 
             # Response is processed by litellm, so just check it exists
             assert response is not None
-
             spans = [item.payload for item in items if item.type == "span"]
             spans = list(
                 x
@@ -816,7 +812,6 @@ async def test_async_embeddings_create(
                 if x["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS
                 and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
             )
-
             assert len(spans) == 1
             span = spans[0]
 
@@ -852,7 +847,6 @@ async def test_async_embeddings_create(
 
             # Response is processed by litellm, so just check it exists
             assert response is not None
-
             assert len(events) == 1
             (event,) = events
 
@@ -930,7 +924,6 @@ def test_embeddings_create_with_list_input(
                 if x["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS
                 and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
             )
-
             assert len(spans) == 1
             span = spans[0]
 
@@ -963,7 +956,6 @@ def test_embeddings_create_with_list_input(
 
             # Response is processed by litellm, so just check it exists
             assert response is not None
-
             assert len(events) == 1
             (event,) = events
 
@@ -1034,7 +1026,6 @@ async def test_async_embeddings_create_with_list_input(
 
             # Response is processed by litellm, so just check it exists
             assert response is not None
-
             spans = [item.payload for item in items if item.type == "span"]
             spans = list(
                 x
@@ -1042,7 +1033,6 @@ async def test_async_embeddings_create_with_list_input(
                 if x["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS
                 and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
             )
-
             assert len(spans) == 1
             span = spans[0]
 
@@ -1076,7 +1066,6 @@ async def test_async_embeddings_create_with_list_input(
 
             # Response is processed by litellm, so just check it exists
             assert response is not None
-
             assert len(events) == 1
             (event,) = events
 
@@ -1145,7 +1134,6 @@ def test_embeddings_no_pii(
 
             # Response is processed by litellm, so just check it exists
             assert response is not None
-
             spans = [item.payload for item in items if item.type == "span"]
             spans = list(
                 x
@@ -1153,7 +1141,6 @@ def test_embeddings_no_pii(
                 if x["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS
                 and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
             )
-
             assert len(spans) == 1
             span = spans[0]
 
@@ -1179,7 +1166,6 @@ def test_embeddings_no_pii(
 
             # Response is processed by litellm, so just check it exists
             assert response is not None
-
             assert len(events) == 1
             (event,) = events
 
@@ -1251,7 +1237,6 @@ async def test_async_embeddings_no_pii(
                 if x["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS
                 and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
             )
-
             assert len(spans) == 1
             span = spans[0]
 
@@ -1278,7 +1263,6 @@ async def test_async_embeddings_no_pii(
 
             # Response is processed by litellm, so just check it exists
             assert response is not None
-
             assert len(events) == 1
             (event,) = events
 
@@ -2391,7 +2375,6 @@ def test_litellm_message_truncation(
             for span in spans
             if span["attributes"].get("sentry.op") == OP.GEN_AI_CHAT
         ]
-
         assert len(chat_spans) > 0
 
         chat_span = chat_spans[0]
@@ -2873,7 +2856,6 @@ async def test_async_binary_content_encoding_mixed_content(
             if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
             and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
         )
-
         assert len(chat_spans) == 1
         span = chat_spans[0]
         messages_data = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
@@ -2901,7 +2883,6 @@ async def test_async_binary_content_encoding_mixed_content(
             for x in event["spans"]
             if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm"
         )
-
         assert len(chat_spans) == 1
         span = chat_spans[0]
         messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
@@ -2982,10 +2963,8 @@ def test_binary_content_encoding_uri_type(
             if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
             and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
         )
-
         assert len(chat_spans) == 1
         span = chat_spans[0]
-
         messages_data = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     else:
         events = capture_events()
@@ -3010,10 +2989,8 @@ def test_binary_content_encoding_uri_type(
             for x in event["spans"]
             if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm"
         )
-
         assert len(chat_spans) == 1
         span = chat_spans[0]
-
         messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
 
     uri_item = next(
@@ -3101,10 +3078,8 @@ async def test_async_binary_content_encoding_uri_type(
             if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
             and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
         )
-
         assert len(chat_spans) == 1
         span = chat_spans[0]
-
         messages_data = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     else:
         events = capture_events()
@@ -3130,10 +3105,8 @@ async def test_async_binary_content_encoding_uri_type(
             for x in event["spans"]
             if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm"
         )
-
         assert len(chat_spans) == 1
         span = chat_spans[0]
-
         messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
 
     uri_item = next(
diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py
index af0932eeb9..6c113078a3 100644
--- a/tests/integrations/openai/test_openai.py
+++ b/tests/integrations/openai/test_openai.py
@@ -165,7 +165,6 @@ def test_nonstreaming_chat_completion_no_prompts(
             )
 
         assert response == "the model response"
-
         span = next(item.payload for item in items if item.type == "span")
         assert span["attributes"]["sentry.op"] == "gen_ai.chat"
         assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
@@ -207,7 +206,6 @@ def test_nonstreaming_chat_completion_no_prompts(
             )
 
         assert response == "the model response"
-
         tx = events[0]
         assert tx["type"] == "transaction"
         span = tx["spans"][0]
@@ -325,7 +323,6 @@ def test_nonstreaming_chat_completion(
             )
 
         assert response == "the model response"
-
         span = next(item.payload for item in items if item.type == "span")
         assert span["attributes"]["sentry.op"] == "gen_ai.chat"
         assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
@@ -387,7 +384,6 @@ def test_nonstreaming_chat_completion(
             )
 
         assert response == "the model response"
-
         tx = events[0]
         assert tx["type"] == "transaction"
         span = tx["spans"][0]
@@ -490,7 +486,6 @@ async def test_nonstreaming_chat_completion_async_no_prompts(
             response = response.choices[0].message.content
 
         assert response == "the model response"
-
         span = next(item.payload for item in items if item.type == "span")
         assert span["attributes"]["sentry.op"] == "gen_ai.chat"
         assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
@@ -529,7 +524,6 @@ async def test_nonstreaming_chat_completion_async_no_prompts(
             response = response.choices[0].message.content
 
         assert response == "the model response"
-
         tx = events[0]
         assert tx["type"] == "transaction"
         span = tx["spans"][0]
@@ -645,7 +639,6 @@ async def test_nonstreaming_chat_completion_async(
             response = response.choices[0].message.content
 
         assert response == "the model response"
-
         span = next(item.payload for item in items if item.type == "span")
         assert span["attributes"]["sentry.op"] == "gen_ai.chat"
         assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
@@ -704,7 +697,6 @@ async def test_nonstreaming_chat_completion_async(
             response = response.choices[0].message.content
 
         assert response == "the model response"
-
         tx = events[0]
         assert tx["type"] == "transaction"
         span = tx["spans"][0]
@@ -862,7 +854,6 @@ def test_streaming_chat_completion_no_prompts(
             )
 
         assert response_string == "hello world"
-
         span = next(item.payload for item in items if item.type == "span")
         assert span["attributes"]["sentry.op"] == "gen_ai.chat"
         assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
@@ -915,7 +906,6 @@ def test_streaming_chat_completion_no_prompts(
             )
 
         assert response_string == "hello world"
-
         tx = events[0]
         assert tx["type"] == "transaction"
         span = tx["spans"][0]
@@ -1488,9 +1478,7 @@ def test_streaming_chat_completion(
             response_string = "".join(
                 map(lambda x: x.choices[0].delta.content, response_stream)
             )
-
         assert response_string == "hello world"
-
         span = next(item.payload for item in items if item.type == "span")
         assert span["attributes"]["sentry.op"] == "gen_ai.chat"
         assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
@@ -1567,9 +1555,7 @@ def test_streaming_chat_completion(
             response_string = "".join(
                 map(lambda x: x.choices[0].delta.content, response_stream)
             )
-
         assert response_string == "hello world"
-
         tx = events[0]
         assert tx["type"] == "transaction"
         span = tx["spans"][0]
@@ -1736,7 +1722,6 @@ async def test_streaming_chat_completion_async_no_prompts(
                 response_string += x.choices[0].delta.content
 
         assert response_string == "hello world"
-
         span = next(item.payload for item in items if item.type == "span")
         assert span["attributes"]["sentry.op"] == "gen_ai.chat"
         assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
@@ -1791,7 +1776,6 @@ async def test_streaming_chat_completion_async_no_prompts(
                 response_string += x.choices[0].delta.content
 
         assert response_string == "hello world"
-
         tx = events[0]
         assert tx["type"] == "transaction"
         span = tx["spans"][0]
@@ -1967,7 +1951,6 @@ async def test_streaming_chat_completion_async(
                 response_string += x.choices[0].delta.content
 
         assert response_string == "hello world"
-
         span = next(item.payload for item in items if item.type == "span")
         assert span["attributes"]["sentry.op"] == "gen_ai.chat"
         assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai"
@@ -2065,7 +2048,6 @@ async def test_streaming_chat_completion_async(
                 response_string += x.choices[0].delta.content
 
         assert response_string == "hello world"
-
         tx = events[0]
         assert tx["type"] == "transaction"
         span = tx["spans"][0]
@@ -2694,7 +2676,6 @@ async def test_embeddings_create_async(
         )
 
         param_id = request.node.callspec.id
-
         if (
             "string" in param_id
             and "string_sequence" not in param_id
@@ -2745,7 +2726,6 @@ async def test_embeddings_create_async(
         assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-3-large"
 
         param_id = request.node.callspec.id
-
         if (
             "string" in param_id
             and "string_sequence" not in param_id
@@ -4975,6 +4955,162 @@ async def test_ai_client_span_streaming_responses_async_api(
             "thread.id": mock.ANY,
             "thread.name": mock.ANY,
         }
+
+        param_id = request.node.callspec.id
+        if "string" in param_id and (
+            instructions is None or isinstance(instructions, Omit)
+        ):  # type: ignore
+            expected_data.update(
+                {
+                    "gen_ai.request.messages": safe_serialize(
+                        ["How do I check if a Python object is an instance of a class?"]
+                    ),
+                }
+            )
+        elif "string" in param_id:
+            expected_data.update(
+                {
+                    "gen_ai.system_instructions": safe_serialize(
+                        [
+                            {
+                                "type": "text",
+                                "content": "You are a coding assistant that talks like a pirate.",
+                            }
+                        ]
+                    ),
+                    "gen_ai.request.messages": safe_serialize(
+                        ["How do I check if a Python object is an instance of a class?"]
+                    ),
+                }
+            )
+        elif "blocks_no_type" in param_id and (
+            instructions is None or isinstance(instructions, Omit)
+        ):  # type: ignore
+            expected_data.update(
+                {
+                    "gen_ai.system_instructions": safe_serialize(
+                        [{"type": "text", "content": "You are a helpful assistant."}]
+                    ),
+                    "gen_ai.request.messages": safe_serialize(
+                        [{"role": "user", "content": "hello"}]
+                    ),
+                }
+            )
+        elif "blocks_no_type" in param_id:
+            expected_data.update(
+                {
+                    "gen_ai.system_instructions": safe_serialize(
+                        [
+                            {
+                                "type": "text",
+                                "content": "You are a coding assistant that talks like a pirate.",
+                            },
+                            {"type": "text", "content": "You are a helpful assistant."},
+                        ]
+                    ),
+                    "gen_ai.request.messages": safe_serialize(
+                        [{"role": "user", "content": "hello"}]
+                    ),
+                }
+            )
+        elif "blocks" in param_id and (
+            instructions is None or isinstance(instructions, Omit)
+        ):  # type: ignore
+            expected_data.update(
+                {
+                    "gen_ai.system_instructions": safe_serialize(
+                        [{"type": "text", "content": "You are a helpful assistant."}]
+                    ),
+                    "gen_ai.request.messages": safe_serialize(
+                        [{"type": "message", "role": "user", "content": "hello"}]
+                    ),
+                }
+            )
+        elif "blocks" in param_id:
+            expected_data.update(
+                {
+                    "gen_ai.system_instructions": safe_serialize(
+                        [
+                            {
+                                "type": "text",
+                                "content": "You are a coding assistant that talks like a pirate.",
+                            },
+                            {"type": "text", "content": "You are a helpful assistant."},
+                        ]
+                    ),
+                    "gen_ai.request.messages": safe_serialize(
+                        [{"type": "message", "role": "user", "content": "hello"}]
+                    ),
+                }
+            )
+        elif "parts_no_type" in param_id and (
+            instructions is None or isinstance(instructions, Omit)
+        ):  # type: ignore
+            expected_data.update(
+                {
+                    "gen_ai.system_instructions": safe_serialize(
+                        [
+                            {"type": "text", "content": "You are a helpful assistant."},
+                            {"type": "text", "content": "Be concise and clear."},
+                        ]
+                    ),
+                    "gen_ai.request.messages": safe_serialize(
+                        [{"role": "user", "content": "hello"}]
+                    ),
+                }
+            )
+        elif "parts_no_type" in param_id:
+            expected_data.update(
+                {
+                    "gen_ai.system_instructions": safe_serialize(
+                        [
+                            {
+                                "type": "text",
+                                "content": "You are a coding assistant that talks like a pirate.",
+                            },
+                            {"type": "text", "content": "You are a helpful assistant."},
+                            {"type": "text", "content": "Be concise and clear."},
+                        ]
+                    ),
+                    "gen_ai.request.messages": safe_serialize(
+                        [{"role": "user", "content": "hello"}]
+                    ),
+                }
+            )
+        elif instructions is None or isinstance(instructions, Omit):  # type: ignore
+            expected_data.update(
+                {
+                    "gen_ai.system_instructions": safe_serialize(
+                        [
+                            {"type": "text", "content": "You are a helpful assistant."},
+                            {"type": "text", "content": "Be concise and clear."},
+                        ]
+                    ),
+                    "gen_ai.request.messages": safe_serialize(
+                        [{"type": "message", "role": "user", "content": "hello"}]
+                    ),
+                }
+            )
+        else:
+            expected_data.update(
+                {
+                    "gen_ai.system_instructions": safe_serialize(
+                        [
+                            {
+                                "type": "text",
+                                "content": "You are a coding assistant that talks like a pirate.",
+                            },
+                            {"type": "text", "content": "You are a helpful assistant."},
+                            {"type": "text", "content": "Be concise and clear."},
+                        ]
+                    ),
+                    "gen_ai.request.messages": safe_serialize(
+                        [{"type": "message", "role": "user", "content": "hello"}]
+                    ),
+                }
+            )
+
+        assert spans[0]["attributes"] == expected_data
     else:
         events = capture_events()
 
@@ -5023,163 +5159,160 @@ async def test_ai_client_span_streaming_responses_async_api(
             "thread.name": mock.ANY,
         }
 
-    param_id = request.node.callspec.id
-    if "string" in param_id and (
-        instructions is None or isinstance(instructions, Omit)
-    ):  # type: ignore
-        expected_data.update(
-            {
-                "gen_ai.request.messages": safe_serialize(
-                    ["How do I check if a Python object is an instance of a class?"]
-                ),
-            }
-        )
-    elif "string" in param_id:
-        expected_data.update(
-            {
-                "gen_ai.system_instructions": safe_serialize(
-                    [
-                        {
-                            "type": "text",
-                            "content": "You are a coding assistant that talks like a pirate.",
-                        }
-                    ]
-                ),
-                "gen_ai.request.messages": safe_serialize(
-                    ["How do I check if a Python object is an instance of a class?"]
-                ),
-            }
-        )
-    elif "blocks_no_type" in param_id and (
-        instructions is None or isinstance(instructions, Omit)
-    ):  # type: ignore
-        expected_data.update(
-            {
-                "gen_ai.system_instructions": safe_serialize(
-                    [{"type": "text", "content": "You are a helpful assistant."}]
-                ),
-                "gen_ai.request.messages": safe_serialize(
-                    [{"role": "user", "content": "hello"}]
-                ),
-            }
-        )
-    elif "blocks_no_type" in param_id:
-        expected_data.update(
-            {
-                "gen_ai.system_instructions": safe_serialize(
-                    [
-                        {
-                            "type": "text",
-                            "content": "You are a coding assistant that talks like a pirate.",
-                        },
-                        {"type": "text", "content": "You are a helpful assistant."},
-                    ]
-                ),
-                "gen_ai.request.messages": safe_serialize(
-                    [{"role": "user", "content": "hello"}]
-                ),
-            }
-        )
-    elif "blocks" in param_id and (
-        instructions is None or isinstance(instructions, Omit)
-    ):  # type: ignore
-        expected_data.update(
-            {
-                "gen_ai.system_instructions": safe_serialize(
-                    [{"type": "text", "content": "You are a helpful assistant."}]
-                ),
-                "gen_ai.request.messages": safe_serialize(
-                    [{"type": "message", "role": "user", "content": "hello"}]
-                ),
-            }
-        )
-    elif "blocks" in param_id:
-        expected_data.update(
-            {
-                "gen_ai.system_instructions": safe_serialize(
-                    [
-                        {
-                            "type": "text",
-                            "content": "You are a coding assistant that talks like a pirate.",
-                        },
-                        {"type": "text", "content": "You are a helpful assistant."},
-                    ]
-                ),
-                "gen_ai.request.messages": safe_serialize(
-                    [{"type": "message", "role": "user", "content": "hello"}]
-                ),
-            }
-        )
-    elif "parts_no_type" in param_id and (
-        instructions is None or isinstance(instructions, Omit)
-    ):  # type: ignore
-        expected_data.update(
-            {
-                "gen_ai.system_instructions": safe_serialize(
-                    [
-                        {"type": "text", "content": "You are a helpful assistant."},
-                        {"type": "text", "content": "Be concise and clear."},
-                    ]
-                ),
-                "gen_ai.request.messages": safe_serialize(
-                    [{"role": "user", "content": "hello"}]
-                ),
-            }
-        )
-    elif "parts_no_type" in param_id:
-        expected_data.update(
-            {
-                "gen_ai.system_instructions": safe_serialize(
-                    [
-                        {
-                            "type": "text",
-                            "content": "You are a coding assistant that talks like a pirate.",
-                        },
-                        {"type": "text", "content": "You are a helpful assistant."},
-                        {"type": "text", "content": "Be concise and clear."},
-                    ]
-                ),
-                "gen_ai.request.messages": safe_serialize(
-                    [{"role": "user", "content": "hello"}]
-                ),
-            }
-        )
-    elif instructions is None or isinstance(instructions, Omit):  # type: ignore
-        expected_data.update(
-            {
-                "gen_ai.system_instructions": safe_serialize(
-                    [
-                        {"type": "text", "content": "You are a helpful assistant."},
-                        {"type": "text", "content": "Be concise and clear."},
-                    ]
-                ),
-                "gen_ai.request.messages": safe_serialize(
-                    [{"type": "message", "role": "user", "content": "hello"}]
-                ),
-            }
-        )
-    else:
-        expected_data.update(
-            {
-                "gen_ai.system_instructions": safe_serialize(
-                    [
-                        {
-                            "type": "text",
-                            "content": "You are a coding assistant that talks like a pirate.",
-                        },
-                        {"type": "text", "content": "You are a helpful assistant."},
-                        {"type": "text", "content": "Be concise and clear."},
-                    ]
-                ),
-                "gen_ai.request.messages": safe_serialize(
-                    [{"type": "message", "role": "user", "content": "hello"}]
-                ),
-            }
-        )
+        param_id = request.node.callspec.id
+        if "string" in param_id and (
+            instructions is None or isinstance(instructions, Omit)
+        ):  # type: ignore
+            expected_data.update(
+                {
+                    "gen_ai.request.messages": safe_serialize(
+                        ["How do I check if a Python object is an instance of a class?"]
+                    ),
+                }
+            )
+        elif "string" in param_id:
+            expected_data.update(
+                {
+                    "gen_ai.system_instructions": safe_serialize(
+                        [
+                            {
+                                "type": "text",
+                                "content": "You are a coding assistant that talks like a pirate.",
+                            }
+                        ]
+                    ),
+                    "gen_ai.request.messages": safe_serialize(
+                        ["How do I check if a Python object is an instance of a class?"]
+                    ),
+                }
+            )
+        elif "blocks_no_type" in param_id and (
+            instructions is None or isinstance(instructions, Omit)
+        ):  # type: ignore
+            expected_data.update(
+                {
+                    "gen_ai.system_instructions": safe_serialize(
+                        [{"type": "text", "content": "You are a helpful assistant."}]
+                    ),
+                    "gen_ai.request.messages": safe_serialize(
+                        [{"role": "user", "content": "hello"}]
+                    ),
+                }
+            )
+        elif "blocks_no_type" in param_id:
+            expected_data.update(
+                {
+                    "gen_ai.system_instructions": safe_serialize(
+                        [
+                            {
+                                "type": "text",
+                                "content": "You are a coding assistant that talks like a pirate.",
+                            },
+                            {"type": "text", "content": "You are a helpful assistant."},
+                        ]
+                    ),
+                    "gen_ai.request.messages": safe_serialize(
+                        [{"role": "user", "content": "hello"}]
+                    ),
+                }
+            )
+        elif "blocks" in param_id and (
+            instructions is None or isinstance(instructions, Omit)
+        ):  # type: ignore
+            expected_data.update(
+                {
+                    "gen_ai.system_instructions": safe_serialize(
+                        [{"type": "text", "content": "You are a helpful assistant."}]
+                    ),
+                    "gen_ai.request.messages": safe_serialize(
+                        [{"type": "message", "role": "user", "content": "hello"}]
+                    ),
+                }
+            )
+        elif "blocks" in param_id:
+            expected_data.update(
+                {
+                    "gen_ai.system_instructions": safe_serialize(
+                        [
+                            {
+                                "type": "text",
+                                "content": "You are a coding assistant that talks like a pirate.",
+                            },
+                            {"type": "text", "content": "You are a helpful assistant."},
+                        ]
+                    ),
+                    "gen_ai.request.messages": safe_serialize(
+                        [{"type": "message", "role": "user", "content": "hello"}]
+                    ),
+                }
+            )
+        elif "parts_no_type" in param_id and (
+            instructions is None or isinstance(instructions, Omit)
+        ):  # type: ignore
+            expected_data.update(
+                {
+                    "gen_ai.system_instructions": safe_serialize(
+                        [
+                            {"type": "text", "content": "You are a helpful assistant."},
+                            {"type": "text", "content": "Be concise and clear."},
+                        ]
+                    ),
+                    "gen_ai.request.messages": safe_serialize(
+                        [{"role": "user", "content": "hello"}]
+                    ),
+                }
+            )
+        elif "parts_no_type" in param_id:
+            expected_data.update(
+                {
+                    "gen_ai.system_instructions": safe_serialize(
+                        [
+                            {
+                                "type": "text",
+                                "content": "You are a coding assistant that talks like a pirate.",
+                            },
+                            {"type": "text", "content": "You are a helpful assistant."},
+                            {"type": "text", "content": "Be concise and clear."},
+                        ]
+                    ),
+                    "gen_ai.request.messages": safe_serialize(
+                        [{"role": "user", "content": "hello"}]
+                    ),
+                }
+            )
+        elif instructions is None or isinstance(instructions, Omit):  # type: ignore
+            expected_data.update(
+                {
+                    "gen_ai.system_instructions": safe_serialize(
+                        [
+                            {"type": "text", "content": "You are a helpful assistant."},
+                            {"type": "text", "content": "Be concise and clear."},
+                        ]
+                    ),
+                    "gen_ai.request.messages": safe_serialize(
+                        [{"type": "message", "role": "user", "content": "hello"}]
+                    ),
+                }
+            )
+        else:
+            expected_data.update(
+                {
+                    "gen_ai.system_instructions": safe_serialize(
+                        [
+                            {
+                                "type": "text",
+                                "content": "You are a coding assistant that talks like a pirate.",
+                            },
+                            {"type": "text", "content": "You are a helpful assistant."},
+                            {"type": "text", "content": "Be concise and clear."},
+                        ]
+                    ),
+                    "gen_ai.request.messages": safe_serialize(
+                        [{"type": "message", "role": "user", "content": "hello"}]
+                    ),
+                }
+            )
 
-    if stream_gen_ai_spans:
-        assert spans[0]["attributes"] == expected_data
-    else:
         assert spans[0]["data"] == expected_data
 
 
@@ -5774,7 +5907,6 @@ def test_openai_message_truncation(
         assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]
 
         messages_data = span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-
         assert isinstance(messages_data, str)
 
         parsed_messages = json.loads(messages_data)
@@ -5796,7 +5928,6 @@ def test_openai_message_truncation(
         assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]
 
         messages_data = span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-
         assert isinstance(messages_data, str)
 
         parsed_messages = json.loads(messages_data)
diff --git a/tests/integrations/pydantic_ai/test_pydantic_ai.py b/tests/integrations/pydantic_ai/test_pydantic_ai.py
index 5cea5063ff..42a666644e 100644
--- a/tests/integrations/pydantic_ai/test_pydantic_ai.py
+++ b/tests/integrations/pydantic_ai/test_pydantic_ai.py
@@ -95,7 +95,6 @@ async def test_agent_run_async(
         chat_spans = [
             s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
         ]
-
         assert len(chat_spans) >= 1
 
         # Check chat span
@@ -126,7 +125,6 @@ async def test_agent_run_async(
 
         # Find child span types (invoke_agent is the transaction, not a child span)
         chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
-
         assert len(chat_spans) >= 1
 
         # Check chat span
@@ -282,8 +280,6 @@ def test_agent_run_sync(
         assert result is not None
         assert result.output is not None
 
-        spans = [item.payload for item in items if item.type == "span"]
-
         # Verify transaction
         (transaction,) = (item.payload for item in items if item.type == "transaction")
 
@@ -292,10 +288,10 @@ def test_agent_run_sync(
         assert transaction["contexts"]["trace"]["origin"] == "auto.ai.pydantic_ai"
 
         # Find span types
+        spans = [item.payload for item in items if item.type == "span"]
         chat_spans = [
             s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
         ]
-
         assert len(chat_spans) >= 1
 
         # Verify streaming flag is False for sync
@@ -318,7 +314,6 @@ def test_agent_run_sync(
 
         # Find span types
         chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
-
         assert len(chat_spans) >= 1
 
         # Verify streaming flag is False for sync
@@ -404,8 +399,6 @@ async def test_agent_run_stream(
             async for _ in result.stream_output():
                 pass
 
-        spans = [item.payload for item in items if item.type == "span"]
-
         # Verify transaction
         (transaction,) = (item.payload for item in items if item.type == "transaction")
 
@@ -414,10 +407,10 @@ async def test_agent_run_stream(
         assert transaction["contexts"]["trace"]["origin"] == "auto.ai.pydantic_ai"
 
         # Find chat spans
+        spans = [item.payload for item in items if item.type == "span"]
         chat_spans = [
             s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
         ]
-
         assert len(chat_spans) >= 1
 
         # Verify streaming flag is True for streaming
@@ -447,7 +440,6 @@ async def test_agent_run_stream(
 
         # Find chat spans
         chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
-
         assert len(chat_spans) >= 1
 
         # Verify streaming flag is True for streaming
@@ -501,7 +493,6 @@ async def test_agent_run_stream_events(
         chat_spans = [
             s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
         ]
-
         assert len(chat_spans) >= 1
 
         # run_stream_events uses run() internally, so streaming should be False
@@ -521,7 +512,6 @@ async def test_agent_run_stream_events(
         # Find chat spans
         spans = transaction["spans"]
         chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
-
         assert len(chat_spans) >= 1
 
         # run_stream_events uses run() internally, so streaming should be False
@@ -1082,7 +1072,6 @@ async def test_system_prompt_attribute(
         chat_spans = [
             s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
         ]
-
         assert len(chat_spans) >= 1
 
         chat_span = chat_spans[0]
@@ -1109,7 +1098,6 @@ async def test_system_prompt_attribute(
 
         # The transaction IS the invoke_agent span, check for messages in chat spans instead
         chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
-
         assert len(chat_spans) >= 1
 
         chat_span = chat_spans[0]
@@ -1332,7 +1320,6 @@ async def run_agent(input_text):
         results = await asyncio.gather(*[run_agent(f"Input {i}") for i in range(3)])
 
         assert len(results) == 3
-
         assert len(events) == 3
 
         # Verify each transaction is separate
@@ -1393,7 +1380,6 @@ async def test_message_history(
         # Check the second transaction has the full history
         second_transaction = events[1]
         spans = second_transaction["spans"]
-
         chat_spans = [
             s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
         ]
@@ -1418,7 +1404,6 @@ async def test_message_history(
         # Check the second transaction has the full history
         second_transaction = events[1]
         spans = second_transaction["spans"]
-
         chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
 
         if chat_spans:
@@ -2160,7 +2145,6 @@ async def test_invoke_agent_with_instructions(
         chat_spans = [
             s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
         ]
-
         assert len(chat_spans) >= 1
 
         chat_span = chat_spans[0]
@@ -2186,7 +2170,6 @@ async def test_invoke_agent_with_instructions(
 
         # The transaction IS the invoke_agent span, check for messages in chat spans instead
         chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
-
         assert len(chat_spans) >= 1
 
         chat_span = chat_spans[0]
@@ -2378,7 +2361,7 @@ async def test_agent_data_from_scope(
         # Verify agent name is capture
         (transaction,) = events
 
-        # Verify agent name is captured
+    # Verify agent name is captured
     assert transaction["transaction"] == "invoke_agent test_scope_agent"
 
 
@@ -3917,7 +3900,6 @@ async def test_binary_content_in_agent_run(
         chat_spans = [
             s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat"
         ]
-
         assert len(chat_spans) >= 1
 
         chat_span = chat_spans[0]
@@ -3934,7 +3916,6 @@ async def test_binary_content_in_agent_run(
 
         (transaction,) = events
         chat_spans = [s for s in transaction["spans"] if s["op"] == "gen_ai.chat"]
-
         assert len(chat_spans) >= 1
 
         chat_span = chat_spans[0]

From dde7bf4d4ae1d0413baf0fc2680069e6facf7884 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Wed, 13 May 2026 10:10:58 +0200
Subject: [PATCH 76/84] restore type annotations in huggingface_hub tests

---
 .../huggingface_hub/test_huggingface_hub.py   | 120 +++++++++---------
 1 file changed, 60 insertions(+), 60 deletions(-)

diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py
index 5417cec250..4772eb368f 100644
--- a/tests/integrations/huggingface_hub/test_huggingface_hub.py
+++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py
@@ -17,7 +17,7 @@
 
 
 if TYPE_CHECKING:
-    pass
+    from typing import Any
 
 
 HF_VERSION = package_version("huggingface-hub")
@@ -471,14 +471,14 @@ def mock_hf_chat_completion_api_streaming_tools(httpx_mock):
 @pytest.mark.parametrize("send_default_pii", [True, False])
 @pytest.mark.parametrize("include_prompts", [True, False])
 def test_text_generation(
-    sentry_init,
-    capture_events,
-    capture_items,
-    send_default_pii,
-    include_prompts,
-    mock_hf_text_generation_api,
-    stream_gen_ai_spans,
-):
+    sentry_init: "Any",
+    capture_events: "Any",
+    capture_items: "Any",
+    send_default_pii: "Any",
+    include_prompts: "Any",
+    mock_hf_text_generation_api: "Any",
+    stream_gen_ai_spans: "Any",
+) -> None:
     sentry_init(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
@@ -606,14 +606,14 @@ def test_text_generation(
 @pytest.mark.parametrize("send_default_pii", [True, False])
 @pytest.mark.parametrize("include_prompts", [True, False])
 def test_text_generation_streaming(
-    sentry_init,
-    capture_events,
-    capture_items,
-    send_default_pii,
-    include_prompts,
-    mock_hf_text_generation_api_streaming,
-    stream_gen_ai_spans,
-):
+    sentry_init: "Any",
+    capture_events: "Any",
+    capture_items: "Any",
+    send_default_pii: "Any",
+    include_prompts: "Any",
+    mock_hf_text_generation_api_streaming: "Any",
+    stream_gen_ai_spans: "Any",
+) -> None:
     sentry_init(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
@@ -739,14 +739,14 @@ def test_text_generation_streaming(
 @pytest.mark.parametrize("send_default_pii", [True, False])
 @pytest.mark.parametrize("include_prompts", [True, False])
 def test_chat_completion(
-    sentry_init,
-    capture_events,
-    capture_items,
-    send_default_pii,
-    include_prompts,
-    mock_hf_chat_completion_api,
-    stream_gen_ai_spans,
-):
+    sentry_init: "Any",
+    capture_events: "Any",
+    capture_items: "Any",
+    send_default_pii: "Any",
+    include_prompts: "Any",
+    mock_hf_chat_completion_api: "Any",
+    stream_gen_ai_spans: "Any",
+) -> None:
     sentry_init(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
@@ -876,14 +876,14 @@ def test_chat_completion(
 @pytest.mark.parametrize("send_default_pii", [True, False])
 @pytest.mark.parametrize("include_prompts", [True, False])
 def test_chat_completion_streaming(
-    sentry_init,
-    capture_events,
-    capture_items,
-    send_default_pii,
-    include_prompts,
-    mock_hf_chat_completion_api_streaming,
-    stream_gen_ai_spans,
-):
+    sentry_init: "Any",
+    capture_events: "Any",
+    capture_items: "Any",
+    send_default_pii: "Any",
+    include_prompts: "Any",
+    mock_hf_chat_completion_api_streaming: "Any",
+    stream_gen_ai_spans: "Any",
+) -> None:
     sentry_init(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,
@@ -1015,12 +1015,12 @@ def test_chat_completion_streaming(
 @pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.httpx_mock(assert_all_requests_were_expected=False)
 def test_chat_completion_api_error(
-    sentry_init,
-    capture_events,
-    capture_items,
-    mock_hf_api_with_errors,
-    stream_gen_ai_spans,
-):
+    sentry_init: "Any",
+    capture_events: "Any",
+    capture_items: "Any",
+    mock_hf_api_with_errors: "Any",
+    stream_gen_ai_spans: "Any",
+) -> None:
     sentry_init(
         traces_sample_rate=1.0,
         stream_gen_ai_spans=stream_gen_ai_spans,
@@ -1129,12 +1129,12 @@ def test_chat_completion_api_error(
 @pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.httpx_mock(assert_all_requests_were_expected=False)
 def test_span_status_error(
-    sentry_init,
-    capture_events,
-    capture_items,
-    mock_hf_api_with_errors,
-    stream_gen_ai_spans,
-):
+    sentry_init: "Any",
+    capture_events: "Any",
+    capture_items: "Any",
+    mock_hf_api_with_errors: "Any",
+    stream_gen_ai_spans: "Any",
+) -> None:
     client = get_hf_provider_inference_client()
 
     sentry_init(
@@ -1197,13 +1197,13 @@ def test_span_status_error(
 @pytest.mark.parametrize("send_default_pii", [True, False])
 @pytest.mark.parametrize("include_prompts", [True, False])
 def test_chat_completion_with_tools(
-    sentry_init,
-    capture_events,
-    capture_items,
-    send_default_pii,
-    include_prompts,
-    mock_hf_chat_completion_api_tools,
-    stream_gen_ai_spans,
+    sentry_init: "Any",
+    capture_events: "Any",
+    capture_items: "Any",
+    send_default_pii: "Any",
+    include_prompts: "Any",
+    mock_hf_chat_completion_api_tools: "Any",
+    stream_gen_ai_spans: "Any",
 ):
     sentry_init(
         traces_sample_rate=1.0,
@@ -1353,14 +1353,14 @@ def test_chat_completion_with_tools(
 @pytest.mark.parametrize("send_default_pii", [True, False])
 @pytest.mark.parametrize("include_prompts", [True, False])
 def test_chat_completion_streaming_with_tools(
-    sentry_init,
-    capture_events,
-    capture_items,
-    send_default_pii,
-    include_prompts,
-    mock_hf_chat_completion_api_streaming_tools,
-    stream_gen_ai_spans,
-):
+    sentry_init: "Any",
+    capture_events: "Any",
+    capture_items: "Any",
+    send_default_pii: "Any",
+    include_prompts: "Any",
+    mock_hf_chat_completion_api_streaming_tools: "Any",
+    stream_gen_ai_spans: "Any",
+) -> None:
     sentry_init(
         traces_sample_rate=1.0,
         send_default_pii=send_default_pii,

From 913ec9af4eefb8296bba602e65ade30be9efa9b1 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Wed, 13 May 2026 10:29:43 +0200
Subject: [PATCH 77/84] litellm test

---
 tests/integrations/litellm/test_litellm.py | 103 ++++++---------------
 1 file changed, 29 insertions(+), 74 deletions(-)

diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py
index c04619d838..b463387daf 100644
--- a/tests/integrations/litellm/test_litellm.py
+++ b/tests/integrations/litellm/test_litellm.py
@@ -2325,20 +2325,14 @@ def test_integration_setup(sentry_init):
     assert _failure_callback in (litellm.failure_callback or [])
 
 
-@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
-def test_litellm_message_truncation(
-    sentry_init,
-    capture_events,
-    capture_items,
-    stream_gen_ai_spans,
-):
+def test_litellm_message_truncation(sentry_init, capture_events):
     """Test that large messages are truncated properly in LiteLLM integration."""
     sentry_init(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        stream_gen_ai_spans=stream_gen_ai_spans,
     )
+    events = capture_events()
 
     large_content = (
         "This is a very long message that will exceed our size limits. " * 1000
@@ -2352,78 +2346,39 @@ def test_litellm_message_truncation(
     ]
     mock_response = MockCompletionResponse()
 
-    if stream_gen_ai_spans:
-        items = capture_items("transaction", "span")
-
-        with start_transaction(name="litellm test"):
-            kwargs = {
-                "model": "gpt-3.5-turbo",
-                "messages": messages,
-            }
-
-            _input_callback(kwargs)
-            _success_callback(
-                kwargs,
-                mock_response,
-                datetime.now(),
-                datetime.now(),
-            )
-
-        spans = [item.payload for item in items if item.type == "span"]
-        chat_spans = [
-            span
-            for span in spans
-            if span["attributes"].get("sentry.op") == OP.GEN_AI_CHAT
-        ]
-        assert len(chat_spans) > 0
-
-        chat_span = chat_spans[0]
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["attributes"]
-
-        messages_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-        assert isinstance(messages_data, str)
-
-        parsed_messages = json.loads(messages_data)
-        assert isinstance(parsed_messages, list)
-        assert len(parsed_messages) == 1
-        assert "small message 5" in str(parsed_messages[0])
-        tx = next(item.payload for item in items if item.type == "transaction")
-    else:
-        events = capture_events()
-
-        with start_transaction(name="litellm test"):
-            kwargs = {
-                "model": "gpt-3.5-turbo",
-                "messages": messages,
-            }
+    with start_transaction(name="litellm test"):
+        kwargs = {
+            "model": "gpt-3.5-turbo",
+            "messages": messages,
+        }
 
-            _input_callback(kwargs)
-            _success_callback(
-                kwargs,
-                mock_response,
-                datetime.now(),
-                datetime.now(),
-            )
+        _input_callback(kwargs)
+        _success_callback(
+            kwargs,
+            mock_response,
+            datetime.now(),
+            datetime.now(),
+        )
 
-        assert len(events) > 0
-        tx = events[0]
-        assert tx["type"] == "transaction"
+    assert len(events) > 0
+    tx = events[0]
+    assert tx["type"] == "transaction"
 
-        chat_spans = [
-            span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT
-        ]
-        assert len(chat_spans) > 0
+    chat_spans = [
+        span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT
+    ]
+    assert len(chat_spans) > 0
 
-        chat_span = chat_spans[0]
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"]
+    chat_span = chat_spans[0]
+    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"]
 
-        messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-        assert isinstance(messages_data, str)
+    messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+    assert isinstance(messages_data, str)
 
-        parsed_messages = json.loads(messages_data)
-        assert isinstance(parsed_messages, list)
-        assert len(parsed_messages) == 1
-        assert "small message 5" in str(parsed_messages[0])
+    parsed_messages = json.loads(messages_data)
+    assert isinstance(parsed_messages, list)
+    assert len(parsed_messages) == 1
+    assert "small message 5" in str(parsed_messages[0])
     assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5
 
 

From f2bdff5cc6967b30cf08796fbd7eddd92c7a2746 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Wed, 13 May 2026 10:31:11 +0200
Subject: [PATCH 78/84] remove whitespace changes

---
 tests/integrations/litellm/test_litellm.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py
index b463387daf..b76980ddd3 100644
--- a/tests/integrations/litellm/test_litellm.py
+++ b/tests/integrations/litellm/test_litellm.py
@@ -2920,7 +2920,6 @@ def test_binary_content_encoding_uri_type(
         )
         assert len(chat_spans) == 1
         span = chat_spans[0]
-
         messages_data = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     else:
         events = capture_events()
@@ -2948,7 +2947,6 @@ def test_binary_content_encoding_uri_type(
 
         assert len(chat_spans) == 1
         span = chat_spans[0]
-
         messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
 
     uri_item = next(
@@ -3036,10 +3034,8 @@ async def test_async_binary_content_encoding_uri_type(
             if x["attributes"]["sentry.op"] == OP.GEN_AI_CHAT
             and x["attributes"]["sentry.origin"] == "auto.ai.litellm"
         )
-
         assert len(chat_spans) == 1
         span = chat_spans[0]
-
         messages_data = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     else:
         events = capture_events()
@@ -3065,10 +3061,8 @@ async def test_async_binary_content_encoding_uri_type(
             for x in event["spans"]
             if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm"
         )
-
         assert len(chat_spans) == 1
         span = chat_spans[0]
-
         messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
 
     uri_item = next(

From ec26b90f87b22ad048a0024f144143fa5b4cf385 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Wed, 13 May 2026 10:31:46 +0200
Subject: [PATCH 79/84] one more whitespace removal

---
 tests/integrations/litellm/test_litellm.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py
index b76980ddd3..aab289b28f 100644
--- a/tests/integrations/litellm/test_litellm.py
+++ b/tests/integrations/litellm/test_litellm.py
@@ -2944,7 +2944,6 @@ def test_binary_content_encoding_uri_type(
             for x in event["spans"]
             if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm"
         )
-
         assert len(chat_spans) == 1
         span = chat_spans[0]
         messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])

From 4ec3ff7e96f6ea4c2c2764b6a0d91eed4b497d08 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Wed, 13 May 2026 10:48:43 +0200
Subject: [PATCH 80/84] remove truncation per integration instead

---
 sentry_sdk/ai/utils.py                        |  8 ----
 sentry_sdk/integrations/anthropic.py          |  8 +++-
 sentry_sdk/integrations/google_genai/utils.py |  7 ++-
 sentry_sdk/integrations/langchain.py          | 38 +++++++++++----
 sentry_sdk/integrations/langgraph.py          | 20 ++++++--
 sentry_sdk/integrations/litellm.py            | 14 ++++--
 sentry_sdk/integrations/openai.py             | 46 +++++++++++++++----
 .../openai_agents/spans/invoke_agent.py       |  7 ++-
 .../integrations/openai_agents/utils.py       |  7 ++-
 .../pydantic_ai/spans/ai_client.py            |  7 ++-
 .../pydantic_ai/spans/invoke_agent.py         |  7 ++-
 11 files changed, 127 insertions(+), 42 deletions(-)

diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py
index fb9edcd335..8efa077ce5 100644
--- a/sentry_sdk/ai/utils.py
+++ b/sentry_sdk/ai/utils.py
@@ -741,10 +741,6 @@ def truncate_and_annotate_messages(
     scope: "Any",
     max_single_message_chars: int = MAX_SINGLE_MESSAGE_CONTENT_CHARS,
 ) -> "Optional[List[Dict[str, Any]]]":
-    client = sentry_sdk.get_client()
-    if client.options.get("stream_gen_ai_spans", False):
-        return messages
-
     if not messages:
         return None
 
@@ -765,10 +761,6 @@ def truncate_and_annotate_embedding_inputs(
     scope: "Any",
     max_bytes: int = MAX_GEN_AI_MESSAGE_BYTES,
 ) -> "Optional[List[Dict[str, Any]]]":
-    client = sentry_sdk.get_client()
-    if client.options.get("stream_gen_ai_spans", False):
-        return messages
-
     if not messages:
         return None
 
diff --git a/sentry_sdk/integrations/anthropic.py b/sentry_sdk/integrations/anthropic.py
index efc2f70ffd..ca9e60e59d 100644
--- a/sentry_sdk/integrations/anthropic.py
+++ b/sentry_sdk/integrations/anthropic.py
@@ -438,9 +438,13 @@ def _set_common_input_data(
                 normalized_messages.append(transformed_message)
 
         role_normalized_messages = normalize_message_roles(normalized_messages)
+
+        client = sentry_sdk.get_client()
         scope = sentry_sdk.get_current_scope()
-        messages_data = truncate_and_annotate_messages(
-            role_normalized_messages, span, scope
+        messages_data = (
+            role_normalized_messages
+            if client.options.get("stream_gen_ai_spans", False)
+            else truncate_and_annotate_messages(role_normalized_messages, span, scope)
         )
         if messages_data is not None:
             set_data_normalized(
diff --git a/sentry_sdk/integrations/google_genai/utils.py b/sentry_sdk/integrations/google_genai/utils.py
index 25763ebe07..55a5b80233 100644
--- a/sentry_sdk/integrations/google_genai/utils.py
+++ b/sentry_sdk/integrations/google_genai/utils.py
@@ -892,9 +892,12 @@ def set_span_data_for_request(
 
         if messages:
             normalized_messages = normalize_message_roles(messages)
+            client = sentry_sdk.get_client()
             scope = sentry_sdk.get_current_scope()
-            messages_data = truncate_and_annotate_messages(
-                normalized_messages, span, scope
+            messages_data = (
+                normalized_messages
+                if client.options.get("stream_gen_ai_spans", False)
+                else truncate_and_annotate_messages(normalized_messages, span, scope)
             )
             if messages_data is not None:
                 set_data_normalized(
diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py
index 8acf215bfe..4f5a1b4939 100644
--- a/sentry_sdk/integrations/langchain.py
+++ b/sentry_sdk/integrations/langchain.py
@@ -374,9 +374,15 @@ def on_llm_start(
                     }
                     for prompt in prompts
                 ]
+
+                client = sentry_sdk.get_client()
                 scope = sentry_sdk.get_current_scope()
-                messages_data = truncate_and_annotate_messages(
-                    normalized_messages, span, scope
+                messages_data = (
+                    normalized_messages
+                    if client.options.get("stream_gen_ai_spans", False)
+                    else truncate_and_annotate_messages(
+                        normalized_messages, span, scope
+                    )
                 )
                 if messages_data is not None:
                     set_data_normalized(
@@ -463,9 +469,15 @@ def on_chat_model_start(
                             self._normalize_langchain_message(message)
                         )
                 normalized_messages = normalize_message_roles(normalized_messages)
+
+                client = sentry_sdk.get_client()
                 scope = sentry_sdk.get_current_scope()
-                messages_data = truncate_and_annotate_messages(
-                    normalized_messages, span, scope
+                messages_data = (
+                    normalized_messages
+                    if client.options.get("stream_gen_ai_spans", False)
+                    else truncate_and_annotate_messages(
+                        normalized_messages, span, scope
+                    )
                 )
                 if messages_data is not None:
                     set_data_normalized(
@@ -992,9 +1004,15 @@ def new_invoke(self: "Any", *args: "Any", **kwargs: "Any") -> "Any":
                 and integration.include_prompts
             ):
                 normalized_messages = normalize_message_roles([input])
+
+                client = sentry_sdk.get_client()
                 scope = sentry_sdk.get_current_scope()
-                messages_data = truncate_and_annotate_messages(
-                    normalized_messages, span, scope
+                messages_data = (
+                    normalized_messages
+                    if client.options.get("stream_gen_ai_spans", False)
+                    else truncate_and_annotate_messages(
+                        normalized_messages, span, scope
+                    )
                 )
                 if messages_data is not None:
                     set_data_normalized(
@@ -1049,9 +1067,13 @@ def new_stream(self: "Any", *args: "Any", **kwargs: "Any") -> "Any":
             and integration.include_prompts
         ):
             normalized_messages = normalize_message_roles([input])
+
+            client = sentry_sdk.get_client()
             scope = sentry_sdk.get_current_scope()
-            messages_data = truncate_and_annotate_messages(
-                normalized_messages, span, scope
+            messages_data = (
+                normalized_messages
+                if client.options.get("stream_gen_ai_spans", False)
+                else truncate_and_annotate_messages(normalized_messages, span, scope)
             )
             if messages_data is not None:
                 set_data_normalized(
diff --git a/sentry_sdk/integrations/langgraph.py b/sentry_sdk/integrations/langgraph.py
index e5ea12b90a..1454d151f4 100644
--- a/sentry_sdk/integrations/langgraph.py
+++ b/sentry_sdk/integrations/langgraph.py
@@ -181,9 +181,15 @@ def new_invoke(self: "Any", *args: "Any", **kwargs: "Any") -> "Any":
                 input_messages = _parse_langgraph_messages(args[0])
                 if input_messages:
                     normalized_input_messages = normalize_message_roles(input_messages)
+
+                    client = sentry_sdk.get_client()
                     scope = sentry_sdk.get_current_scope()
-                    messages_data = truncate_and_annotate_messages(
-                        normalized_input_messages, span, scope
+                    messages_data = (
+                        normalized_input_messages
+                        if client.options.get("stream_gen_ai_spans", False)
+                        else truncate_and_annotate_messages(
+                            normalized_input_messages, span, scope
+                        )
                     )
                     if messages_data is not None:
                         set_data_normalized(
@@ -234,9 +240,15 @@ async def new_ainvoke(self: "Any", *args: "Any", **kwargs: "Any") -> "Any":
                 input_messages = _parse_langgraph_messages(args[0])
                 if input_messages:
                     normalized_input_messages = normalize_message_roles(input_messages)
+
+                    client = sentry_sdk.get_client()
                     scope = sentry_sdk.get_current_scope()
-                    messages_data = truncate_and_annotate_messages(
-                        normalized_input_messages, span, scope
+                    messages_data = (
+                        normalized_input_messages
+                        if client.options.get("stream_gen_ai_spans", False)
+                        else truncate_and_annotate_messages(
+                            normalized_input_messages, span, scope
+                        )
                     )
                     if messages_data is not None:
                         set_data_normalized(
diff --git a/sentry_sdk/integrations/litellm.py b/sentry_sdk/integrations/litellm.py
index 3cff0fbc23..9561bd61f3 100644
--- a/sentry_sdk/integrations/litellm.py
+++ b/sentry_sdk/integrations/litellm.py
@@ -119,8 +119,11 @@ def _input_callback(kwargs: "Dict[str, Any]") -> None:
                     if isinstance(embedding_input, list)
                     else [embedding_input]
                 )
-                messages_data = truncate_and_annotate_embedding_inputs(
-                    input_list, span, scope
+                client = sentry_sdk.get_client()
+                messages_data = (
+                    input_list
+                    if client.options.get("stream_gen_ai_spans", False)
+                    else truncate_and_annotate_embedding_inputs(input_list, span, scope)
                 )
                 if messages_data is not None:
                     set_data_normalized(
@@ -133,9 +136,14 @@ def _input_callback(kwargs: "Dict[str, Any]") -> None:
             # For chat, look for the 'messages' parameter
             messages = kwargs.get("messages", [])
             if messages:
+                client = sentry_sdk.get_client()
                 scope = sentry_sdk.get_current_scope()
                 messages = _convert_message_parts(messages)
-                messages_data = truncate_and_annotate_messages(messages, span, scope)
+                messages_data = (
+                    messages
+                    if client.options.get("stream_gen_ai_spans", False)
+                    else truncate_and_annotate_messages(messages, span, scope)
+                )
                 if messages_data is not None:
                     set_data_normalized(
                         span,
diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py
index b3919d1a9d..7bb328741e 100644
--- a/sentry_sdk/integrations/openai.py
+++ b/sentry_sdk/integrations/openai.py
@@ -398,8 +398,13 @@ def _set_responses_api_input_data(
 
     if isinstance(messages, str):
         normalized_messages = normalize_message_roles([messages])  # type: ignore
+        client = sentry_sdk.get_client()
         scope = sentry_sdk.get_current_scope()
-        messages_data = truncate_and_annotate_messages(normalized_messages, span, scope)
+        messages_data = (
+            normalized_messages
+            if client.options.get("stream_gen_ai_spans", False)
+            else truncate_and_annotate_messages(normalized_messages, span, scope)
+        )
         if messages_data is not None:
             set_data_normalized(
                 span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False
@@ -413,8 +418,13 @@ def _set_responses_api_input_data(
     ]
     if len(non_system_messages) > 0:
         normalized_messages = normalize_message_roles(non_system_messages)
+        client = sentry_sdk.get_client()
         scope = sentry_sdk.get_current_scope()
-        messages_data = truncate_and_annotate_messages(normalized_messages, span, scope)
+        messages_data = (
+            normalized_messages
+            if client.options.get("stream_gen_ai_spans", False)
+            else truncate_and_annotate_messages(normalized_messages, span, scope)
+        )
         if messages_data is not None:
             set_data_normalized(
                 span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False
@@ -472,8 +482,13 @@ def _set_completions_api_input_data(
 
     if isinstance(messages, str):
         normalized_messages = normalize_message_roles([messages])  # type: ignore
+        client = sentry_sdk.get_client()
         scope = sentry_sdk.get_current_scope()
-        messages_data = truncate_and_annotate_messages(normalized_messages, span, scope)
+        messages_data = (
+            normalized_messages
+            if client.options.get("stream_gen_ai_spans", False)
+            else truncate_and_annotate_messages(normalized_messages, span, scope)
+        )
         if messages_data is not None:
             set_data_normalized(
                 span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False
@@ -503,8 +518,13 @@ def _set_completions_api_input_data(
     ]
     if len(non_system_messages) > 0:
         normalized_messages = normalize_message_roles(non_system_messages)
+        client = sentry_sdk.get_client()
         scope = sentry_sdk.get_current_scope()
-        messages_data = truncate_and_annotate_messages(normalized_messages, span, scope)
+        messages_data = (
+            normalized_messages
+            if client.options.get("stream_gen_ai_spans", False)
+            else truncate_and_annotate_messages(normalized_messages, span, scope)
+        )
         if messages_data is not None:
             set_data_normalized(
                 span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False
@@ -539,9 +559,14 @@ def _set_embeddings_input_data(
         set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, "embeddings")
 
         normalized_messages = normalize_message_roles([messages])  # type: ignore
+        client = sentry_sdk.get_client()
         scope = sentry_sdk.get_current_scope()
-        messages_data = truncate_and_annotate_embedding_inputs(
-            normalized_messages, span, scope
+        messages_data = (
+            normalized_messages
+            if client.options.get("stream_gen_ai_spans", False)
+            else truncate_and_annotate_embedding_inputs(
+                normalized_messages, span, scope
+            )
         )
         if messages_data is not None:
             set_data_normalized(
@@ -560,9 +585,14 @@ def _set_embeddings_input_data(
 
     if len(messages) > 0:
         normalized_messages = normalize_message_roles(messages)
+        client = sentry_sdk.get_client()
         scope = sentry_sdk.get_current_scope()
-        messages_data = truncate_and_annotate_embedding_inputs(
-            normalized_messages, span, scope
+        messages_data = (
+            normalized_messages
+            if client.options.get("stream_gen_ai_spans", False)
+            else truncate_and_annotate_embedding_inputs(
+                normalized_messages, span, scope
+            )
         )
         if messages_data is not None:
             set_data_normalized(
diff --git a/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py b/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py
index 27f9fdab25..2346189a96 100644
--- a/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py
+++ b/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py
@@ -63,9 +63,12 @@ def invoke_agent_span(
 
         if len(messages) > 0:
             normalized_messages = normalize_message_roles(messages)
+            client = sentry_sdk.get_client()
             scope = sentry_sdk.get_current_scope()
-            messages_data = truncate_and_annotate_messages(
-                normalized_messages, span, scope
+            messages_data = (
+                normalized_messages
+                if client.options.get("stream_gen_ai_spans", False)
+                else truncate_and_annotate_messages(normalized_messages, span, scope)
             )
             if messages_data is not None:
                 set_data_normalized(
diff --git a/sentry_sdk/integrations/openai_agents/utils.py b/sentry_sdk/integrations/openai_agents/utils.py
index ee504b3496..ea1faefde7 100644
--- a/sentry_sdk/integrations/openai_agents/utils.py
+++ b/sentry_sdk/integrations/openai_agents/utils.py
@@ -173,8 +173,13 @@ def _set_input_data(
                 )
 
     normalized_messages = normalize_message_roles(request_messages)
+    client = sentry_sdk.get_client()
     scope = sentry_sdk.get_current_scope()
-    messages_data = truncate_and_annotate_messages(normalized_messages, span, scope)
+    messages_data = (
+        normalized_messages
+        if client.options.get("stream_gen_ai_spans", False)
+        else truncate_and_annotate_messages(normalized_messages, span, scope)
+    )
     if messages_data is not None:
         set_data_normalized(
             span,
diff --git a/sentry_sdk/integrations/pydantic_ai/spans/ai_client.py b/sentry_sdk/integrations/pydantic_ai/spans/ai_client.py
index dc95acad45..e549083fed 100644
--- a/sentry_sdk/integrations/pydantic_ai/spans/ai_client.py
+++ b/sentry_sdk/integrations/pydantic_ai/spans/ai_client.py
@@ -182,9 +182,12 @@ def _set_input_messages(span: "sentry_sdk.tracing.Span", messages: "Any") -> Non
 
         if formatted_messages:
             normalized_messages = normalize_message_roles(formatted_messages)
+            client = sentry_sdk.get_client()
             scope = sentry_sdk.get_current_scope()
-            messages_data = truncate_and_annotate_messages(
-                normalized_messages, span, scope
+            messages_data = (
+                normalized_messages
+                if client.options.get("stream_gen_ai_spans", False)
+                else truncate_and_annotate_messages(normalized_messages, span, scope)
             )
             set_data_normalized(
                 span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False
diff --git a/sentry_sdk/integrations/pydantic_ai/spans/invoke_agent.py b/sentry_sdk/integrations/pydantic_ai/spans/invoke_agent.py
index ee08ca7036..c507315dcd 100644
--- a/sentry_sdk/integrations/pydantic_ai/spans/invoke_agent.py
+++ b/sentry_sdk/integrations/pydantic_ai/spans/invoke_agent.py
@@ -122,9 +122,12 @@ def invoke_agent_span(
 
         if messages:
             normalized_messages = normalize_message_roles(messages)
+            client = sentry_sdk.get_client()
             scope = sentry_sdk.get_current_scope()
-            messages_data = truncate_and_annotate_messages(
-                normalized_messages, span, scope
+            messages_data = (
+                normalized_messages
+                if client.options.get("stream_gen_ai_spans", False)
+                else truncate_and_annotate_messages(normalized_messages, span, scope)
             )
             set_data_normalized(
                 span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False

From 962fd656b084deeb5c465d13dd2234c793fa0995 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Wed, 13 May 2026 13:33:56 +0200
Subject: [PATCH 81/84] update tests with to have more than one input message

---
 .../integrations/anthropic/test_anthropic.py  | 120 ++++--
 .../google_genai/test_google_genai.py         |  50 ++-
 .../huggingface_hub/test_huggingface_hub.py   |  37 +-
 .../integrations/langchain/test_langchain.py  |  52 ++-
 tests/integrations/litellm/test_litellm.py    |  36 +-
 tests/integrations/openai/test_openai.py      | 366 +++++++++++++++---
 .../openai_agents/test_openai_agents.py       | 274 +++++++++++++
 .../pydantic_ai/test_pydantic_ai.py           |  52 ++-
 8 files changed, 891 insertions(+), 96 deletions(-)

diff --git a/tests/integrations/anthropic/test_anthropic.py b/tests/integrations/anthropic/test_anthropic.py
index 4255a0e6fc..d6b2c269d9 100644
--- a/tests/integrations/anthropic/test_anthropic.py
+++ b/tests/integrations/anthropic/test_anthropic.py
@@ -110,10 +110,14 @@ def test_nonstreaming_create_message(
     client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE)
 
     messages = [
+        {
+            "role": "user",
+            "content": "Message demonstrating the absence of truncation.",
+        },
         {
             "role": "user",
             "content": "Hello, Claude",
-        }
+        },
     ]
 
     if stream_gen_ai_spans:
@@ -144,10 +148,16 @@ def test_nonstreaming_create_message(
         assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
         if send_default_pii and include_prompts:
-            assert (
-                span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-                == '[{"role": "user", "content": "Hello, Claude"}]'
-            )
+            assert json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) == [
+                {
+                    "role": "user",
+                    "content": "Message demonstrating the absence of truncation.",
+                },
+                {
+                    "role": "user",
+                    "content": "Hello, Claude",
+                },
+            ]
             assert (
                 span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude."
             )
@@ -245,10 +255,14 @@ async def test_nonstreaming_create_message_async(
     client.messages._post = AsyncMock(return_value=EXAMPLE_MESSAGE)
 
     messages = [
+        {
+            "role": "user",
+            "content": "Message demonstrating the absence of truncation.",
+        },
         {
             "role": "user",
             "content": "Hello, Claude",
-        }
+        },
     ]
 
     if stream_gen_ai_spans:
@@ -279,10 +293,16 @@ async def test_nonstreaming_create_message_async(
         assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
         if send_default_pii and include_prompts:
-            assert (
-                span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-                == '[{"role": "user", "content": "Hello, Claude"}]'
-            )
+            assert json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) == [
+                {
+                    "role": "user",
+                    "content": "Message demonstrating the absence of truncation.",
+                },
+                {
+                    "role": "user",
+                    "content": "Hello, Claude",
+                },
+            ]
             assert (
                 span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude."
             )
@@ -413,10 +433,14 @@ def test_streaming_create_message(
     )
 
     messages = [
+        {
+            "role": "user",
+            "content": "Message demonstrating the absence of truncation.",
+        },
         {
             "role": "user",
             "content": "Hello, Claude",
-        }
+        },
     ]
 
     if stream_gen_ai_spans:
@@ -449,10 +473,16 @@ def test_streaming_create_message(
         assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
         if send_default_pii and include_prompts:
-            assert (
-                span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-                == '[{"role": "user", "content": "Hello, Claude"}]'
-            )
+            assert json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) == [
+                {
+                    "role": "user",
+                    "content": "Message demonstrating the absence of truncation.",
+                },
+                {
+                    "role": "user",
+                    "content": "Hello, Claude",
+                },
+            ]
             assert (
                 span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!"
             )
@@ -895,10 +925,14 @@ def test_stream_messages(
     )
 
     messages = [
+        {
+            "role": "user",
+            "content": "Message demonstrating the absence of truncation.",
+        },
         {
             "role": "user",
             "content": "Hello, Claude",
-        }
+        },
     ]
 
     if stream_gen_ai_spans:
@@ -931,10 +965,16 @@ def test_stream_messages(
         assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
         if send_default_pii and include_prompts:
-            assert (
-                span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-                == '[{"role": "user", "content": "Hello, Claude"}]'
-            )
+            assert json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) == [
+                {
+                    "role": "user",
+                    "content": "Message demonstrating the absence of truncation.",
+                },
+                {
+                    "role": "user",
+                    "content": "Hello, Claude",
+                },
+            ]
             assert (
                 span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!"
             )
@@ -1390,10 +1430,14 @@ async def test_streaming_create_message_async(
     )
 
     messages = [
+        {
+            "role": "user",
+            "content": "Message demonstrating the absence of truncation.",
+        },
         {
             "role": "user",
             "content": "Hello, Claude",
-        }
+        },
     ]
 
     if stream_gen_ai_spans:
@@ -1425,10 +1469,16 @@ async def test_streaming_create_message_async(
         assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
         if send_default_pii and include_prompts:
-            assert (
-                span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-                == '[{"role": "user", "content": "Hello, Claude"}]'
-            )
+            assert json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) == [
+                {
+                    "role": "user",
+                    "content": "Message demonstrating the absence of truncation.",
+                },
+                {
+                    "role": "user",
+                    "content": "Hello, Claude",
+                },
+            ]
             assert (
                 span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!"
             )
@@ -1883,10 +1933,14 @@ async def test_stream_message_async(
     )
 
     messages = [
+        {
+            "role": "user",
+            "content": "Message demonstrating the absence of truncation.",
+        },
         {
             "role": "user",
             "content": "Hello, Claude",
-        }
+        },
     ]
 
     if stream_gen_ai_spans:
@@ -1919,10 +1973,16 @@ async def test_stream_message_async(
         assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model"
 
         if send_default_pii and include_prompts:
-            assert (
-                span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-                == '[{"role": "user", "content": "Hello, Claude"}]'
-            )
+            assert json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) == [
+                {
+                    "role": "user",
+                    "content": "Message demonstrating the absence of truncation.",
+                },
+                {
+                    "role": "user",
+                    "content": "Hello, Claude",
+                },
+            ]
             assert (
                 span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!"
             )
diff --git a/tests/integrations/google_genai/test_google_genai.py b/tests/integrations/google_genai/test_google_genai.py
index ff0b59178b..723a71959d 100644
--- a/tests/integrations/google_genai/test_google_genai.py
+++ b/tests/integrations/google_genai/test_google_genai.py
@@ -153,7 +153,12 @@ def test_nonstreaming_generate_content(
         ), start_transaction(name="google_genai"):
             config = create_test_config(temperature=0.7, max_output_tokens=100)
             mock_genai_client.models.generate_content(
-                model="gemini-1.5-flash", contents="Tell me a joke", config=config
+                model="gemini-1.5-flash",
+                contents=[
+                    "Message demonstrating the absence of truncation.",
+                    "Tell me a joke",
+                ],
+                config=config,
             )
 
         (event,) = (item.payload for item in items if item.type == "transaction")
@@ -173,6 +178,24 @@ def test_nonstreaming_generate_content(
         )
 
         if send_default_pii and include_prompts:
+            assert json.loads(
+                chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+            ) == [
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "text",
+                            "text": "Message demonstrating the absence of truncation.",
+                        },
+                        {
+                            "type": "text",
+                            "text": "Tell me a joke",
+                        },
+                    ],
+                }
+            ]
+
             # Response text is stored as a JSON array
             response_text = chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
 
@@ -675,7 +698,12 @@ def test_streaming_generate_content(
         ), start_transaction(name="google_genai"):
             config = create_test_config()
             stream = mock_genai_client.models.generate_content_stream(
-                model="gemini-1.5-flash", contents="Stream me a response", config=config
+                model="gemini-1.5-flash",
+                contents=[
+                    "Message demonstrating the absence of truncation.",
+                    "Stream me a response",
+                ],
+                config=config,
             )
 
             # Consume the stream (this is what users do with the integration wrapper)
@@ -693,6 +721,24 @@ def test_streaming_generate_content(
         assert len(spans) == 1
         chat_span = next(item.payload for item in items if item.type == "span")
 
+        assert json.loads(
+            chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        ) == [
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "text",
+                        "text": "Message demonstrating the absence of truncation.",
+                    },
+                    {
+                        "type": "text",
+                        "text": "Stream me a response",
+                    },
+                ],
+            }
+        ]
+
         # Check that streaming flag is set on both spans
         assert chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
 
diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py
index 4772eb368f..85ad55a47c 100644
--- a/tests/integrations/huggingface_hub/test_huggingface_hub.py
+++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py
@@ -2,6 +2,7 @@
 from typing import TYPE_CHECKING
 from unittest import mock
 
+from sentry_sdk.utils import safe_serialize
 import pytest
 import responses
 from huggingface_hub import InferenceClient
@@ -761,7 +762,13 @@ def test_chat_completion(
 
         with sentry_sdk.start_transaction(name="test"):
             client.chat_completion(
-                messages=[{"role": "user", "content": "Hello!"}],
+                messages=[
+                    {
+                        "role": "user",
+                        "content": "Message demonstrating the absence of truncation.",
+                    },
+                    {"role": "user", "content": "Hello!"},
+                ],
                 stream=False,
             )
 
@@ -804,8 +811,14 @@ def test_chat_completion(
         }
 
         if send_default_pii and include_prompts:
-            expected_data["gen_ai.request.messages"] = (
-                '[{"role": "user", "content": "Hello!"}]'
+            expected_data["gen_ai.request.messages"] = safe_serialize(
+                [
+                    {
+                        "role": "user",
+                        "content": "Message demonstrating the absence of truncation.",
+                    },
+                    {"role": "user", "content": "Hello!"},
+                ]
             )
             expected_data["gen_ai.response.text"] = (
                 "[mocked] Hello! How can I help you today?"
@@ -899,7 +912,13 @@ def test_chat_completion_streaming(
         with sentry_sdk.start_transaction(name="test"):
             _ = list(
                 client.chat_completion(
-                    [{"role": "user", "content": "Hello!"}],
+                    [
+                        {
+                            "role": "user",
+                            "content": "Message demonstrating the absence of truncation.",
+                        },
+                        {"role": "user", "content": "Hello!"},
+                    ],
                     stream=True,
                 )
             )
@@ -945,8 +964,14 @@ def test_chat_completion_streaming(
             expected_data["gen_ai.usage.total_tokens"] = 197
 
         if send_default_pii and include_prompts:
-            expected_data["gen_ai.request.messages"] = (
-                '[{"role": "user", "content": "Hello!"}]'
+            expected_data["gen_ai.request.messages"] = safe_serialize(
+                [
+                    {
+                        "role": "user",
+                        "content": "Message demonstrating the absence of truncation.",
+                    },
+                    {"role": "user", "content": "Hello!"},
+                ]
             )
             expected_data["gen_ai.response.text"] = "the mocked model response"
 
diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py
index 2c0f8af977..3a2ef76a5a 100644
--- a/tests/integrations/langchain/test_langchain.py
+++ b/tests/integrations/langchain/test_langchain.py
@@ -577,6 +577,9 @@ def test_langchain_create_agent(
             agent.invoke(
                 {
                     "messages": [
+                        HumanMessage(
+                            content="Message demonstrating the absence of truncation."
+                        ),
                         HumanMessage(content="How many letters in the word eudca"),
                     ],
                 },
@@ -606,6 +609,19 @@ def test_langchain_create_agent(
                 == "Hello, how can I help you?"
             )
 
+            assert json.loads(
+                chat_spans[0]["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+            ) == [
+                {
+                    "role": "user",
+                    "content": "Message demonstrating the absence of truncation.",
+                },
+                {
+                    "role": "user",
+                    "content": "How many letters in the word eudca",
+                },
+            ]
+
             param_id = request.node.callspec.id
             if "string" in param_id:
                 assert [
@@ -1343,7 +1359,16 @@ def test_langchain_openai_tools_agent(
             "send",
             side_effect=[tool_response, final_response],
         ) as _, start_transaction():
-            list(agent_executor.stream({"input": "How many letters in the word eudca"}))
+            list(
+                agent_executor.stream(
+                    {
+                        "input": [
+                            "Message demonstrating the absence of truncation.",
+                            "How many letters in the word eudca",
+                        ]
+                    }
+                )
+            )
 
         tx = next(item.payload for item in items if item.type == "transaction")
         assert tx["type"] == "transaction"
@@ -1389,6 +1414,15 @@ def test_langchain_openai_tools_agent(
         assert "word" in tool_exec_span["attributes"][SPANDATA.GEN_AI_TOOL_INPUT]
         assert 5 == int(tool_exec_span["attributes"][SPANDATA.GEN_AI_TOOL_OUTPUT])
 
+        assert json.loads(
+            chat_spans[0]["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        ) == [
+            {
+                "role": "user",
+                "content": "['Message demonstrating the absence of truncation.', 'How many letters in the word eudca']",
+            }
+        ]
+
         param_id = request.node.callspec.id
         if "string" in param_id:
             assert [
@@ -2011,7 +2045,12 @@ def test_langchain_openai_tools_agent_stream(
         ) as _, start_transaction():
             list(
                 agent_executor.stream(
-                    {"input": "How many letters in the word eudca"},
+                    {
+                        "input": [
+                            "Message demonstrating the absence of truncation.",
+                            "How many letters in the word eudca",
+                        ]
+                    },
                     {"run_name": "my-snazzy-pipeline"},
                 )
             )
@@ -2065,6 +2104,15 @@ def test_langchain_openai_tools_agent_stream(
         assert "word" in tool_exec_span["attributes"][SPANDATA.GEN_AI_TOOL_INPUT]
         assert 5 == int(tool_exec_span["attributes"][SPANDATA.GEN_AI_TOOL_OUTPUT])
 
+        assert json.loads(
+            chat_spans[0]["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        ) == [
+            {
+                "role": "user",
+                "content": "['Message demonstrating the absence of truncation.', 'How many letters in the word eudca']",
+            }
+        ]
+
         param_id = request.node.callspec.id
         if "string" in param_id:
             assert [
diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py
index aab289b28f..703ae67b1a 100644
--- a/tests/integrations/litellm/test_litellm.py
+++ b/tests/integrations/litellm/test_litellm.py
@@ -159,7 +159,10 @@ def test_nonstreaming_chat_completion(
         stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
-    messages = [{"role": "user", "content": "Hello!"}]
+    messages = [
+        {"role": "user", "content": "Message demonstrating the absence of truncation."},
+        {"role": "user", "content": "Hello!"},
+    ]
 
     client = OpenAI(api_key="test-key")
 
@@ -216,7 +219,13 @@ def test_nonstreaming_chat_completion(
         assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
 
         if send_default_pii and include_prompts:
-            assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]
+            assert json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) == [
+                {
+                    "role": "user",
+                    "content": "Message demonstrating the absence of truncation.",
+                },
+                {"role": "user", "content": "Hello!"},
+            ]
             assert SPANDATA.GEN_AI_RESPONSE_TEXT in span["attributes"]
         else:
             assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
@@ -302,7 +311,10 @@ async def test_async_nonstreaming_chat_completion(
         stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
-    messages = [{"role": "user", "content": "Hello!"}]
+    messages = [
+        {"role": "user", "content": "Message demonstrating the absence of truncation."},
+        {"role": "user", "content": "Hello!"},
+    ]
 
     client = AsyncOpenAI(api_key="test-key")
 
@@ -360,7 +372,13 @@ async def test_async_nonstreaming_chat_completion(
         assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
 
         if send_default_pii and include_prompts:
-            assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]
+            assert json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) == [
+                {
+                    "role": "user",
+                    "content": "Message demonstrating the absence of truncation.",
+                },
+                {"role": "user", "content": "Hello!"},
+            ]
             assert SPANDATA.GEN_AI_RESPONSE_TEXT in span["attributes"]
         else:
             assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"]
@@ -448,7 +466,10 @@ def test_streaming_chat_completion(
         stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
-    messages = [{"role": "user", "content": "Hello!"}]
+    messages = [
+        {"role": "user", "content": "Message demonstrating the absence of truncation."},
+        {"role": "user", "content": "Hello!"},
+    ]
 
     client = OpenAI(api_key="test-key")
 
@@ -556,7 +577,10 @@ async def test_async_streaming_chat_completion(
         stream_gen_ai_spans=stream_gen_ai_spans,
     )
 
-    messages = [{"role": "user", "content": "Hello!"}]
+    messages = [
+        {"role": "user", "content": "Message demonstrating the absence of truncation."},
+        {"role": "user", "content": "Hello!"},
+    ]
 
     client = AsyncOpenAI(api_key="test-key")
 
diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py
index c9e734da69..5bc9e35b22 100644
--- a/tests/integrations/openai/test_openai.py
+++ b/tests/integrations/openai/test_openai.py
@@ -239,6 +239,10 @@ def test_nonstreaming_chat_completion_no_prompts(
                     "role": "system",
                     "content": "You are a helpful assistant.",
                 },
+                {
+                    "role": "user",
+                    "content": "Message demonstrating the absence of truncation.",
+                },
                 {"role": "user", "content": "hello"},
             ],
             id="blocks",
@@ -252,6 +256,10 @@ def test_nonstreaming_chat_completion_no_prompts(
                         {"type": "text", "text": "Be concise and clear."},
                     ],
                 },
+                {
+                    "role": "user",
+                    "content": "Message demonstrating the absence of truncation.",
+                },
                 {"role": "user", "content": "hello"},
             ],
             id="parts",
@@ -266,6 +274,10 @@ def test_nonstreaming_chat_completion_no_prompts(
                             {"type": "text", "text": "Be concise and clear."},
                         ],
                     },
+                    {
+                        "role": "user",
+                        "content": "Message demonstrating the absence of truncation.",
+                    },
                     {"role": "user", "content": "hello"},
                 ]
             ),
@@ -360,6 +372,10 @@ def test_nonstreaming_chat_completion(
             ]
 
         assert "hello" in span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        assert (
+            "Message demonstrating the absence of truncation."
+            in span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        )
         assert "the model response" in span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
 
         assert span["attributes"]["gen_ai.usage.output_tokens"] == 10
@@ -558,6 +574,10 @@ async def test_nonstreaming_chat_completion_async_no_prompts(
                     "role": "system",
                     "content": "You are a helpful assistant.",
                 },
+                {
+                    "role": "user",
+                    "content": "Message demonstrating the absence of truncation.",
+                },
                 {"role": "user", "content": "hello"},
             ],
             id="blocks",
@@ -571,6 +591,10 @@ async def test_nonstreaming_chat_completion_async_no_prompts(
                         {"type": "text", "text": "Be concise and clear."},
                     ],
                 },
+                {
+                    "role": "user",
+                    "content": "Message demonstrating the absence of truncation.",
+                },
                 {"role": "user", "content": "hello"},
             ],
             id="parts",
@@ -585,6 +609,10 @@ async def test_nonstreaming_chat_completion_async_no_prompts(
                             {"type": "text", "text": "Be concise and clear."},
                         ],
                     },
+                    {
+                        "role": "user",
+                        "content": "Message demonstrating the absence of truncation.",
+                    },
                     {"role": "user", "content": "hello"},
                 ]
             ),
@@ -676,6 +704,10 @@ async def test_nonstreaming_chat_completion_async(
             ]
 
         assert "hello" in span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        assert (
+            "Message demonstrating the absence of truncation."
+            in span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        )
         assert "the model response" in span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
 
         assert span["attributes"]["gen_ai.usage.output_tokens"] == 10
@@ -1353,6 +1385,10 @@ async def test_streaming_chat_completion_async_with_usage_in_stream(
                     "role": "system",
                     "content": "You are a helpful assistant.",
                 },
+                {
+                    "role": "user",
+                    "content": "Message demonstrating the absence of truncation.",
+                },
                 {"role": "user", "content": "hello"},
             ],
             id="blocks",
@@ -1366,6 +1402,10 @@ async def test_streaming_chat_completion_async_with_usage_in_stream(
                         {"type": "text", "text": "Be concise and clear."},
                     ],
                 },
+                {
+                    "role": "user",
+                    "content": "Message demonstrating the absence of truncation.",
+                },
                 {"role": "user", "content": "hello"},
             ],
             id="parts",
@@ -1380,6 +1420,10 @@ async def test_streaming_chat_completion_async_with_usage_in_stream(
                             {"type": "text", "text": "Be concise and clear."},
                         ],
                     },
+                    {
+                        "role": "user",
+                        "content": "Message demonstrating the absence of truncation.",
+                    },
                     {"role": "user", "content": "hello"},
                 ]
             ),
@@ -1517,6 +1561,10 @@ def test_streaming_chat_completion(
 
         assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "model-id"
 
+        assert (
+            "Message demonstrating the absence of truncation."
+            in span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        )
         assert "hello" in span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
         assert "hello world" in span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
 
@@ -1525,12 +1573,12 @@ def test_streaming_chat_completion(
 
             if "blocks" in param_id:
                 assert span["attributes"]["gen_ai.usage.output_tokens"] == 2
-                assert span["attributes"]["gen_ai.usage.input_tokens"] == 7
-                assert span["attributes"]["gen_ai.usage.total_tokens"] == 9
+                assert span["attributes"]["gen_ai.usage.input_tokens"] == 15
+                assert span["attributes"]["gen_ai.usage.total_tokens"] == 17
             else:
                 assert span["attributes"]["gen_ai.usage.output_tokens"] == 2
-                assert span["attributes"]["gen_ai.usage.input_tokens"] == 12
-                assert span["attributes"]["gen_ai.usage.total_tokens"] == 14
+                assert span["attributes"]["gen_ai.usage.input_tokens"] == 20
+                assert span["attributes"]["gen_ai.usage.total_tokens"] == 22
 
         except ImportError:
             pass  # if tiktoken is not installed, we can't guarantee token usage will be calculated properly
@@ -1600,12 +1648,12 @@ def test_streaming_chat_completion(
 
             if "blocks" in param_id:
                 assert span["data"]["gen_ai.usage.output_tokens"] == 2
-                assert span["data"]["gen_ai.usage.input_tokens"] == 7
-                assert span["data"]["gen_ai.usage.total_tokens"] == 9
+                assert span["data"]["gen_ai.usage.input_tokens"] == 15
+                assert span["data"]["gen_ai.usage.total_tokens"] == 17
             else:
                 assert span["data"]["gen_ai.usage.output_tokens"] == 2
-                assert span["data"]["gen_ai.usage.input_tokens"] == 12
-                assert span["data"]["gen_ai.usage.total_tokens"] == 14
+                assert span["data"]["gen_ai.usage.input_tokens"] == 20
+                assert span["data"]["gen_ai.usage.total_tokens"] == 22
 
         except ImportError:
             pass  # if tiktoken is not installed, we can't guarantee token usage will be calculated properly
@@ -1819,6 +1867,10 @@ async def test_streaming_chat_completion_async_no_prompts(
                     "role": "system",
                     "content": "You are a helpful assistant.",
                 },
+                {
+                    "role": "user",
+                    "content": "Message demonstrating the absence of truncation.",
+                },
                 {"role": "user", "content": "hello"},
             ],
             id="blocks",
@@ -1832,6 +1884,10 @@ async def test_streaming_chat_completion_async_no_prompts(
                         {"type": "text", "text": "Be concise and clear."},
                     ],
                 },
+                {
+                    "role": "user",
+                    "content": "Message demonstrating the absence of truncation.",
+                },
                 {"role": "user", "content": "hello"},
             ],
             id="parts",
@@ -1846,6 +1902,10 @@ async def test_streaming_chat_completion_async_no_prompts(
                             {"type": "text", "text": "Be concise and clear."},
                         ],
                     },
+                    {
+                        "role": "user",
+                        "content": "Message demonstrating the absence of truncation.",
+                    },
                     {"role": "user", "content": "hello"},
                 ]
             ),
@@ -1976,6 +2036,10 @@ async def test_streaming_chat_completion_async(
                 }
             ]
 
+            assert (
+                "Message demonstrating the absence of truncation."
+                in span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+            )
             assert "hello" in span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
             assert "hello world" in span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
 
@@ -1984,12 +2048,12 @@ async def test_streaming_chat_completion_async(
 
                 if "blocks" in param_id:
                     assert span["attributes"]["gen_ai.usage.output_tokens"] == 2
-                    assert span["attributes"]["gen_ai.usage.input_tokens"] == 7
-                    assert span["attributes"]["gen_ai.usage.total_tokens"] == 9
+                    assert span["attributes"]["gen_ai.usage.input_tokens"] == 15
+                    assert span["attributes"]["gen_ai.usage.total_tokens"] == 17
                 else:
                     assert span["attributes"]["gen_ai.usage.output_tokens"] == 2
-                    assert span["attributes"]["gen_ai.usage.input_tokens"] == 12
-                    assert span["attributes"]["gen_ai.usage.total_tokens"] == 14
+                    assert span["attributes"]["gen_ai.usage.input_tokens"] == 20
+                    assert span["attributes"]["gen_ai.usage.total_tokens"] == 22
 
             except ImportError:
                 pass  # if tiktoken is not installed, we can't guarantee token usage will be calculated properly
@@ -2007,6 +2071,10 @@ async def test_streaming_chat_completion_async(
                 },
             ]
 
+            assert (
+                "Message demonstrating the absence of truncation."
+                in span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+            )
             assert "hello" in span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
             assert "hello world" in span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
 
@@ -2015,12 +2083,12 @@ async def test_streaming_chat_completion_async(
 
                 if "blocks" in param_id:
                     assert span["attributes"]["gen_ai.usage.output_tokens"] == 2
-                    assert span["attributes"]["gen_ai.usage.input_tokens"] == 7
-                    assert span["attributes"]["gen_ai.usage.total_tokens"] == 9
+                    assert span["attributes"]["gen_ai.usage.input_tokens"] == 15
+                    assert span["attributes"]["gen_ai.usage.total_tokens"] == 17
                 else:
                     assert span["attributes"]["gen_ai.usage.output_tokens"] == 2
-                    assert span["attributes"]["gen_ai.usage.input_tokens"] == 12
-                    assert span["attributes"]["gen_ai.usage.total_tokens"] == 14
+                    assert span["attributes"]["gen_ai.usage.input_tokens"] == 20
+                    assert span["attributes"]["gen_ai.usage.total_tokens"] == 22
 
             except ImportError:
                 pass  # if tiktoken is not installed, we can't guarantee token usage will be calculated properly
@@ -2092,12 +2160,12 @@ async def test_streaming_chat_completion_async(
 
                 if "blocks" in param_id:
                     assert span["data"]["gen_ai.usage.output_tokens"] == 2
-                    assert span["data"]["gen_ai.usage.input_tokens"] == 7
-                    assert span["data"]["gen_ai.usage.total_tokens"] == 9
+                    assert span["data"]["gen_ai.usage.input_tokens"] == 15
+                    assert span["data"]["gen_ai.usage.total_tokens"] == 17
                 else:
                     assert span["data"]["gen_ai.usage.output_tokens"] == 2
-                    assert span["data"]["gen_ai.usage.input_tokens"] == 12
-                    assert span["data"]["gen_ai.usage.total_tokens"] == 14
+                    assert span["data"]["gen_ai.usage.input_tokens"] == 20
+                    assert span["data"]["gen_ai.usage.total_tokens"] == 22
 
             except ImportError:
                 pass  # if tiktoken is not installed, we can't guarantee token usage will be calculated properly
@@ -3736,6 +3804,10 @@ def test_ai_client_span_responses_api_no_pii(
                     "role": "system",
                     "content": "You are a helpful assistant.",
                 },
+                {
+                    "role": "user",
+                    "content": "Message demonstrating the absence of truncation.",
+                },
                 {"role": "user", "content": "hello"},
             ],
             id="blocks_no_type",
@@ -3747,6 +3819,11 @@ def test_ai_client_span_responses_api_no_pii(
                     "role": "system",
                     "content": "You are a helpful assistant.",
                 },
+                {
+                    "type": "message",
+                    "role": "user",
+                    "content": "Message demonstrating the absence of truncation.",
+                },
                 {"type": "message", "role": "user", "content": "hello"},
             ],
             id="blocks",
@@ -3760,6 +3837,10 @@ def test_ai_client_span_responses_api_no_pii(
                         {"type": "text", "text": "Be concise and clear."},
                     ],
                 },
+                {
+                    "role": "user",
+                    "content": "Message demonstrating the absence of truncation.",
+                },
                 {"role": "user", "content": "hello"},
             ],
             id="parts_no_type",
@@ -3774,6 +3855,11 @@ def test_ai_client_span_responses_api_no_pii(
                         {"type": "text", "text": "Be concise and clear."},
                     ],
                 },
+                {
+                    "type": "message",
+                    "role": "user",
+                    "content": "Message demonstrating the absence of truncation.",
+                },
                 {"type": "message", "role": "user", "content": "hello"},
             ],
             id="parts",
@@ -3880,7 +3966,13 @@ def test_ai_client_span_responses_api(
                         [{"type": "text", "content": "You are a helpful assistant."}]
                     ),
                     "gen_ai.request.messages": safe_serialize(
-                        [{"role": "user", "content": "hello"}]
+                        [
+                            {
+                                "role": "user",
+                                "content": "Message demonstrating the absence of truncation.",
+                            },
+                            {"role": "user", "content": "hello"},
+                        ]
                     ),
                 }
             )
@@ -3897,7 +3989,13 @@ def test_ai_client_span_responses_api(
                         ]
                     ),
                     "gen_ai.request.messages": safe_serialize(
-                        [{"role": "user", "content": "hello"}]
+                        [
+                            {
+                                "role": "user",
+                                "content": "Message demonstrating the absence of truncation.",
+                            },
+                            {"role": "user", "content": "hello"},
+                        ]
                     ),
                 }
             )
@@ -3910,7 +4008,14 @@ def test_ai_client_span_responses_api(
                         [{"type": "text", "content": "You are a helpful assistant."}]
                     ),
                     "gen_ai.request.messages": safe_serialize(
-                        [{"type": "message", "role": "user", "content": "hello"}]
+                        [
+                            {
+                                "type": "message",
+                                "role": "user",
+                                "content": "Message demonstrating the absence of truncation.",
+                            },
+                            {"type": "message", "role": "user", "content": "hello"},
+                        ]
                     ),
                 }
             )
@@ -3927,7 +4032,14 @@ def test_ai_client_span_responses_api(
                         ]
                     ),
                     "gen_ai.request.messages": safe_serialize(
-                        [{"type": "message", "role": "user", "content": "hello"}]
+                        [
+                            {
+                                "type": "message",
+                                "role": "user",
+                                "content": "Message demonstrating the absence of truncation.",
+                            },
+                            {"type": "message", "role": "user", "content": "hello"},
+                        ]
                     ),
                 }
             )
@@ -3943,7 +4055,13 @@ def test_ai_client_span_responses_api(
                         ]
                     ),
                     "gen_ai.request.messages": safe_serialize(
-                        [{"role": "user", "content": "hello"}]
+                        [
+                            {
+                                "role": "user",
+                                "content": "Message demonstrating the absence of truncation.",
+                            },
+                            {"role": "user", "content": "hello"},
+                        ]
                     ),
                 }
             )
@@ -3961,7 +4079,13 @@ def test_ai_client_span_responses_api(
                         ]
                     ),
                     "gen_ai.request.messages": safe_serialize(
-                        [{"role": "user", "content": "hello"}]
+                        [
+                            {
+                                "role": "user",
+                                "content": "Message demonstrating the absence of truncation.",
+                            },
+                            {"role": "user", "content": "hello"},
+                        ]
                     ),
                 }
             )
@@ -3975,7 +4099,14 @@ def test_ai_client_span_responses_api(
                         ]
                     ),
                     "gen_ai.request.messages": safe_serialize(
-                        [{"type": "message", "role": "user", "content": "hello"}]
+                        [
+                            {
+                                "type": "message",
+                                "role": "user",
+                                "content": "Message demonstrating the absence of truncation.",
+                            },
+                            {"type": "message", "role": "user", "content": "hello"},
+                        ]
                     ),
                 }
             )
@@ -3993,7 +4124,14 @@ def test_ai_client_span_responses_api(
                         ]
                     ),
                     "gen_ai.request.messages": safe_serialize(
-                        [{"type": "message", "role": "user", "content": "hello"}]
+                        [
+                            {
+                                "type": "message",
+                                "role": "user",
+                                "content": "Message demonstrating the absence of truncation.",
+                            },
+                            {"type": "message", "role": "user", "content": "hello"},
+                        ]
                     ),
                 }
             )
@@ -4348,6 +4486,10 @@ def test_error_in_responses_api(
                     "role": "system",
                     "content": "You are a helpful assistant.",
                 },
+                {
+                    "role": "user",
+                    "content": "Message demonstrating the absence of truncation.",
+                },
                 {"role": "user", "content": "hello"},
             ],
             id="blocks_no_type",
@@ -4359,6 +4501,11 @@ def test_error_in_responses_api(
                     "role": "system",
                     "content": "You are a helpful assistant.",
                 },
+                {
+                    "type": "message",
+                    "role": "user",
+                    "content": "Message demonstrating the absence of truncation.",
+                },
                 {"type": "message", "role": "user", "content": "hello"},
             ],
             id="blocks",
@@ -4372,6 +4519,10 @@ def test_error_in_responses_api(
                         {"type": "text", "text": "Be concise and clear."},
                     ],
                 },
+                {
+                    "role": "user",
+                    "content": "Message demonstrating the absence of truncation.",
+                },
                 {"role": "user", "content": "hello"},
             ],
             id="parts_no_type",
@@ -4386,6 +4537,11 @@ def test_error_in_responses_api(
                         {"type": "text", "text": "Be concise and clear."},
                     ],
                 },
+                {
+                    "type": "message",
+                    "role": "user",
+                    "content": "Message demonstrating the absence of truncation.",
+                },
                 {"type": "message", "role": "user", "content": "hello"},
             ],
             id="parts",
@@ -4492,7 +4648,13 @@ async def test_ai_client_span_responses_async_api(
                         [{"type": "text", "content": "You are a helpful assistant."}]
                     ),
                     "gen_ai.request.messages": safe_serialize(
-                        [{"role": "user", "content": "hello"}]
+                        [
+                            {
+                                "role": "user",
+                                "content": "Message demonstrating the absence of truncation.",
+                            },
+                            {"role": "user", "content": "hello"},
+                        ]
                     ),
                 }
             )
@@ -4509,7 +4671,13 @@ async def test_ai_client_span_responses_async_api(
                         ]
                     ),
                     "gen_ai.request.messages": safe_serialize(
-                        [{"role": "user", "content": "hello"}]
+                        [
+                            {
+                                "role": "user",
+                                "content": "Message demonstrating the absence of truncation.",
+                            },
+                            {"role": "user", "content": "hello"},
+                        ]
                     ),
                 }
             )
@@ -4522,7 +4690,14 @@ async def test_ai_client_span_responses_async_api(
                         [{"type": "text", "content": "You are a helpful assistant."}]
                     ),
                     "gen_ai.request.messages": safe_serialize(
-                        [{"type": "message", "role": "user", "content": "hello"}]
+                        [
+                            {
+                                "type": "message",
+                                "role": "user",
+                                "content": "Message demonstrating the absence of truncation.",
+                            },
+                            {"type": "message", "role": "user", "content": "hello"},
+                        ]
                     ),
                 }
             )
@@ -4539,7 +4714,14 @@ async def test_ai_client_span_responses_async_api(
                         ]
                     ),
                     "gen_ai.request.messages": safe_serialize(
-                        [{"type": "message", "role": "user", "content": "hello"}]
+                        [
+                            {
+                                "type": "message",
+                                "role": "user",
+                                "content": "Message demonstrating the absence of truncation.",
+                            },
+                            {"type": "message", "role": "user", "content": "hello"},
+                        ]
                     ),
                 }
             )
@@ -4555,7 +4737,13 @@ async def test_ai_client_span_responses_async_api(
                         ]
                     ),
                     "gen_ai.request.messages": safe_serialize(
-                        [{"role": "user", "content": "hello"}]
+                        [
+                            {
+                                "role": "user",
+                                "content": "Message demonstrating the absence of truncation.",
+                            },
+                            {"role": "user", "content": "hello"},
+                        ]
                     ),
                 }
             )
@@ -4573,7 +4761,13 @@ async def test_ai_client_span_responses_async_api(
                         ]
                     ),
                     "gen_ai.request.messages": safe_serialize(
-                        [{"role": "user", "content": "hello"}]
+                        [
+                            {
+                                "role": "user",
+                                "content": "Message demonstrating the absence of truncation.",
+                            },
+                            {"role": "user", "content": "hello"},
+                        ]
                     ),
                 }
             )
@@ -4587,7 +4781,14 @@ async def test_ai_client_span_responses_async_api(
                         ]
                     ),
                     "gen_ai.request.messages": safe_serialize(
-                        [{"type": "message", "role": "user", "content": "hello"}]
+                        [
+                            {
+                                "type": "message",
+                                "role": "user",
+                                "content": "Message demonstrating the absence of truncation.",
+                            },
+                            {"type": "message", "role": "user", "content": "hello"},
+                        ]
                     ),
                 }
             )
@@ -4605,7 +4806,14 @@ async def test_ai_client_span_responses_async_api(
                         ]
                     ),
                     "gen_ai.request.messages": safe_serialize(
-                        [{"type": "message", "role": "user", "content": "hello"}]
+                        [
+                            {
+                                "type": "message",
+                                "role": "user",
+                                "content": "Message demonstrating the absence of truncation.",
+                            },
+                            {"type": "message", "role": "user", "content": "hello"},
+                        ]
                     ),
                 }
             )
@@ -4830,6 +5038,10 @@ async def test_ai_client_span_responses_async_api(
                     "role": "system",
                     "content": "You are a helpful assistant.",
                 },
+                {
+                    "role": "user",
+                    "content": "Message demonstrating the absence of truncation.",
+                },
                 {"role": "user", "content": "hello"},
             ],
             id="blocks_no_type",
@@ -4841,6 +5053,11 @@ async def test_ai_client_span_responses_async_api(
                     "role": "system",
                     "content": "You are a helpful assistant.",
                 },
+                {
+                    "type": "message",
+                    "role": "user",
+                    "content": "Message demonstrating the absence of truncation.",
+                },
                 {"type": "message", "role": "user", "content": "hello"},
             ],
             id="blocks",
@@ -4854,6 +5071,10 @@ async def test_ai_client_span_responses_async_api(
                         {"type": "text", "text": "Be concise and clear."},
                     ],
                 },
+                {
+                    "role": "user",
+                    "content": "Message demonstrating the absence of truncation.",
+                },
                 {"role": "user", "content": "hello"},
             ],
             id="parts_no_type",
@@ -4868,6 +5089,11 @@ async def test_ai_client_span_responses_async_api(
                         {"type": "text", "text": "Be concise and clear."},
                     ],
                 },
+                {
+                    "type": "message",
+                    "role": "user",
+                    "content": "Message demonstrating the absence of truncation.",
+                },
                 {"type": "message", "role": "user", "content": "hello"},
             ],
             id="parts",
@@ -4992,7 +5218,13 @@ async def test_ai_client_span_streaming_responses_async_api(
                         [{"type": "text", "content": "You are a helpful assistant."}]
                     ),
                     "gen_ai.request.messages": safe_serialize(
-                        [{"role": "user", "content": "hello"}]
+                        [
+                            {
+                                "role": "user",
+                                "content": "Message demonstrating the absence of truncation.",
+                            },
+                            {"role": "user", "content": "hello"},
+                        ]
                     ),
                 }
             )
@@ -5009,7 +5241,13 @@ async def test_ai_client_span_streaming_responses_async_api(
                         ]
                     ),
                     "gen_ai.request.messages": safe_serialize(
-                        [{"role": "user", "content": "hello"}]
+                        [
+                            {
+                                "role": "user",
+                                "content": "Message demonstrating the absence of truncation.",
+                            },
+                            {"role": "user", "content": "hello"},
+                        ]
                     ),
                 }
             )
@@ -5022,7 +5260,14 @@ async def test_ai_client_span_streaming_responses_async_api(
                         [{"type": "text", "content": "You are a helpful assistant."}]
                     ),
                     "gen_ai.request.messages": safe_serialize(
-                        [{"type": "message", "role": "user", "content": "hello"}]
+                        [
+                            {
+                                "type": "message",
+                                "role": "user",
+                                "content": "Message demonstrating the absence of truncation.",
+                            },
+                            {"type": "message", "role": "user", "content": "hello"},
+                        ]
                     ),
                 }
             )
@@ -5039,7 +5284,14 @@ async def test_ai_client_span_streaming_responses_async_api(
                         ]
                     ),
                     "gen_ai.request.messages": safe_serialize(
-                        [{"type": "message", "role": "user", "content": "hello"}]
+                        [
+                            {
+                                "type": "message",
+                                "role": "user",
+                                "content": "Message demonstrating the absence of truncation.",
+                            },
+                            {"type": "message", "role": "user", "content": "hello"},
+                        ]
                     ),
                 }
             )
@@ -5055,7 +5307,13 @@ async def test_ai_client_span_streaming_responses_async_api(
                         ]
                     ),
                     "gen_ai.request.messages": safe_serialize(
-                        [{"role": "user", "content": "hello"}]
+                        [
+                            {
+                                "role": "user",
+                                "content": "Message demonstrating the absence of truncation.",
+                            },
+                            {"role": "user", "content": "hello"},
+                        ]
                     ),
                 }
             )
@@ -5073,7 +5331,13 @@ async def test_ai_client_span_streaming_responses_async_api(
                         ]
                     ),
                     "gen_ai.request.messages": safe_serialize(
-                        [{"role": "user", "content": "hello"}]
+                        [
+                            {
+                                "role": "user",
+                                "content": "Message demonstrating the absence of truncation.",
+                            },
+                            {"role": "user", "content": "hello"},
+                        ]
                     ),
                 }
             )
@@ -5087,7 +5351,14 @@ async def test_ai_client_span_streaming_responses_async_api(
                         ]
                     ),
                     "gen_ai.request.messages": safe_serialize(
-                        [{"type": "message", "role": "user", "content": "hello"}]
+                        [
+                            {
+                                "type": "message",
+                                "role": "user",
+                                "content": "Message demonstrating the absence of truncation.",
+                            },
+                            {"type": "message", "role": "user", "content": "hello"},
+                        ]
                     ),
                 }
             )
@@ -5105,7 +5376,14 @@ async def test_ai_client_span_streaming_responses_async_api(
                         ]
                     ),
                     "gen_ai.request.messages": safe_serialize(
-                        [{"type": "message", "role": "user", "content": "hello"}]
+                        [
+                            {
+                                "type": "message",
+                                "role": "user",
+                                "content": "Message demonstrating the absence of truncation.",
+                            },
+                            {"type": "message", "role": "user", "content": "hello"},
+                        ]
                     ),
                 }
             )
diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py
index beb44471de..5589352e5c 100644
--- a/tests/integrations/openai_agents/test_openai_agents.py
+++ b/tests/integrations/openai_agents/test_openai_agents.py
@@ -313,6 +313,10 @@ async def test_agent_invocation_span_no_pii(
                     "role": "system",
                     "content": "You are a helpful assistant.",
                 },
+                {
+                    "role": "user",
+                    "content": "Message demonstrating the absence of truncation.",
+                },
                 {
                     "role": "user",
                     "content": "Test input",
@@ -327,6 +331,10 @@ async def test_agent_invocation_span_no_pii(
                     "role": "system",
                     "content": "You are a helpful assistant.",
                 },
+                {
+                    "role": "user",
+                    "content": "Message demonstrating the absence of truncation.",
+                },
                 {
                     "type": "message",
                     "role": "user",
@@ -344,6 +352,10 @@ async def test_agent_invocation_span_no_pii(
                         {"type": "text", "text": "Be concise and clear."},
                     ],
                 },
+                {
+                    "role": "user",
+                    "content": "Message demonstrating the absence of truncation.",
+                },
                 {
                     "role": "user",
                     "content": "Test input",
@@ -361,6 +373,10 @@ async def test_agent_invocation_span_no_pii(
                         {"type": "text", "text": "Be concise and clear."},
                     ],
                 },
+                {
+                    "role": "user",
+                    "content": "Message demonstrating the absence of truncation.",
+                },
                 {
                     "type": "message",
                     "role": "user",
@@ -462,6 +478,21 @@ async def test_agent_invocation_span(
                     {"type": "text", "content": "You are a helpful assistant."},
                 ]
             )
+
+            assert json.loads(
+                ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+            ) == [
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "text",
+                            "text": "Message demonstrating the absence of truncation.",
+                        }
+                    ],
+                },
+                {"role": "user", "content": [{"type": "text", "text": "Test input"}]},
+            ]
         elif "blocks_no_type" in param_id:
             assert ai_client_span["attributes"][
                 "gen_ai.system_instructions"
@@ -474,6 +505,21 @@ async def test_agent_invocation_span(
                     {"type": "text", "content": "You are a helpful assistant."},
                 ]
             )
+
+            assert json.loads(
+                ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+            ) == [
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "text",
+                            "text": "Message demonstrating the absence of truncation.",
+                        }
+                    ],
+                },
+                {"role": "user", "content": [{"type": "text", "text": "Test input"}]},
+            ]
         elif "blocks" in param_id and instructions is None:  # type: ignore
             assert ai_client_span["attributes"][
                 "gen_ai.system_instructions"
@@ -482,6 +528,21 @@ async def test_agent_invocation_span(
                     {"type": "text", "content": "You are a helpful assistant."},
                 ]
             )
+
+            assert json.loads(
+                ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+            ) == [
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "text",
+                            "text": "Message demonstrating the absence of truncation.",
+                        }
+                    ],
+                },
+                {"role": "user", "content": [{"type": "text", "text": "Test input"}]},
+            ]
         elif "blocks" in param_id:
             assert ai_client_span["attributes"][
                 "gen_ai.system_instructions"
@@ -494,6 +555,21 @@ async def test_agent_invocation_span(
                     {"type": "text", "content": "You are a helpful assistant."},
                 ]
             )
+
+            assert json.loads(
+                ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+            ) == [
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "text",
+                            "text": "Message demonstrating the absence of truncation.",
+                        }
+                    ],
+                },
+                {"role": "user", "content": [{"type": "text", "text": "Test input"}]},
+            ]
         elif "parts_no_type" in param_id and instructions is None:
             assert ai_client_span["attributes"][
                 "gen_ai.system_instructions"
@@ -503,6 +579,21 @@ async def test_agent_invocation_span(
                     {"type": "text", "content": "Be concise and clear."},
                 ]
             )
+
+            assert json.loads(
+                ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+            ) == [
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "text",
+                            "text": "Message demonstrating the absence of truncation.",
+                        }
+                    ],
+                },
+                {"role": "user", "content": [{"type": "text", "text": "Test input"}]},
+            ]
         elif "parts_no_type" in param_id:
             assert ai_client_span["attributes"][
                 "gen_ai.system_instructions"
@@ -516,6 +607,21 @@ async def test_agent_invocation_span(
                     {"type": "text", "content": "Be concise and clear."},
                 ]
             )
+
+            assert json.loads(
+                ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+            ) == [
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "text",
+                            "text": "Message demonstrating the absence of truncation.",
+                        }
+                    ],
+                },
+                {"role": "user", "content": [{"type": "text", "text": "Test input"}]},
+            ]
         elif instructions is None:  # type: ignore
             assert ai_client_span["attributes"][
                 "gen_ai.system_instructions"
@@ -525,6 +631,21 @@ async def test_agent_invocation_span(
                     {"type": "text", "content": "Be concise and clear."},
                 ]
             )
+
+            assert json.loads(
+                ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+            ) == [
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "text",
+                            "text": "Message demonstrating the absence of truncation.",
+                        }
+                    ],
+                },
+                {"role": "user", "content": [{"type": "text", "text": "Test input"}]},
+            ]
         else:
             assert ai_client_span["attributes"][
                 "gen_ai.system_instructions"
@@ -539,6 +660,21 @@ async def test_agent_invocation_span(
                 ]
             )
 
+            assert json.loads(
+                ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+            ) == [
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "text",
+                            "text": "Message demonstrating the absence of truncation.",
+                        }
+                    ],
+                },
+                {"role": "user", "content": [{"type": "text", "text": "Test input"}]},
+            ]
+
         assert (
             invoke_agent_span["attributes"]["gen_ai.response.text"]
             == "Hello, how can I help you?"
@@ -960,6 +1096,10 @@ def test_agent_invocation_span_sync_no_pii(
                     "role": "system",
                     "content": "You are a helpful assistant.",
                 },
+                {
+                    "role": "user",
+                    "content": "Message demonstrating the absence of truncation.",
+                },
                 {
                     "role": "user",
                     "content": "Test input",
@@ -974,6 +1114,11 @@ def test_agent_invocation_span_sync_no_pii(
                     "role": "system",
                     "content": "You are a helpful assistant.",
                 },
+                {
+                    "type": "message",
+                    "role": "user",
+                    "content": "Message demonstrating the absence of truncation.",
+                },
                 {
                     "type": "message",
                     "role": "user",
@@ -991,6 +1136,10 @@ def test_agent_invocation_span_sync_no_pii(
                         {"type": "text", "text": "Be concise and clear."},
                     ],
                 },
+                {
+                    "role": "user",
+                    "content": "Message demonstrating the absence of truncation.",
+                },
                 {
                     "role": "user",
                     "content": "Test input",
@@ -1008,6 +1157,11 @@ def test_agent_invocation_span_sync_no_pii(
                         {"type": "text", "text": "Be concise and clear."},
                     ],
                 },
+                {
+                    "type": "message",
+                    "role": "user",
+                    "content": "Message demonstrating the absence of truncation.",
+                },
                 {
                     "type": "message",
                     "role": "user",
@@ -1114,6 +1268,21 @@ def test_agent_invocation_span_sync(
                     {"type": "text", "content": "You are a helpful assistant."},
                 ]
             )
+
+            assert json.loads(
+                ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+            ) == [
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "text",
+                            "text": "Message demonstrating the absence of truncation.",
+                        }
+                    ],
+                },
+                {"role": "user", "content": [{"type": "text", "text": "Test input"}]},
+            ]
         elif "blocks_no_type" in param_id:
             assert ai_client_span["attributes"][
                 "gen_ai.system_instructions"
@@ -1126,6 +1295,21 @@ def test_agent_invocation_span_sync(
                     {"type": "text", "content": "You are a helpful assistant."},
                 ]
             )
+
+            assert json.loads(
+                ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+            ) == [
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "text",
+                            "text": "Message demonstrating the absence of truncation.",
+                        }
+                    ],
+                },
+                {"role": "user", "content": [{"type": "text", "text": "Test input"}]},
+            ]
         elif "blocks" in param_id and instructions is None:  # type: ignore
             assert ai_client_span["attributes"][
                 "gen_ai.system_instructions"
@@ -1134,6 +1318,21 @@ def test_agent_invocation_span_sync(
                     {"type": "text", "content": "You are a helpful assistant."},
                 ]
             )
+
+            assert json.loads(
+                ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+            ) == [
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "text",
+                            "text": "Message demonstrating the absence of truncation.",
+                        }
+                    ],
+                },
+                {"role": "user", "content": [{"type": "text", "text": "Test input"}]},
+            ]
         elif "blocks" in param_id:
             assert ai_client_span["attributes"][
                 "gen_ai.system_instructions"
@@ -1146,6 +1345,21 @@ def test_agent_invocation_span_sync(
                     {"type": "text", "content": "You are a helpful assistant."},
                 ]
             )
+
+            assert json.loads(
+                ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+            ) == [
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "text",
+                            "text": "Message demonstrating the absence of truncation.",
+                        }
+                    ],
+                },
+                {"role": "user", "content": [{"type": "text", "text": "Test input"}]},
+            ]
         elif "parts_no_type" in param_id and instructions is None:
             assert ai_client_span["attributes"][
                 "gen_ai.system_instructions"
@@ -1155,6 +1369,21 @@ def test_agent_invocation_span_sync(
                     {"type": "text", "content": "Be concise and clear."},
                 ]
             )
+
+            assert json.loads(
+                ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+            ) == [
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "text",
+                            "text": "Message demonstrating the absence of truncation.",
+                        }
+                    ],
+                },
+                {"role": "user", "content": [{"type": "text", "text": "Test input"}]},
+            ]
         elif "parts_no_type" in param_id:
             assert ai_client_span["attributes"][
                 "gen_ai.system_instructions"
@@ -1168,6 +1397,21 @@ def test_agent_invocation_span_sync(
                     {"type": "text", "content": "Be concise and clear."},
                 ]
             )
+
+            assert json.loads(
+                ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+            ) == [
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "text",
+                            "text": "Message demonstrating the absence of truncation.",
+                        }
+                    ],
+                },
+                {"role": "user", "content": [{"type": "text", "text": "Test input"}]},
+            ]
         elif instructions is None:  # type: ignore
             assert ai_client_span["attributes"][
                 "gen_ai.system_instructions"
@@ -1177,6 +1421,21 @@ def test_agent_invocation_span_sync(
                     {"type": "text", "content": "Be concise and clear."},
                 ]
             )
+
+            assert json.loads(
+                ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+            ) == [
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "text",
+                            "text": "Message demonstrating the absence of truncation.",
+                        }
+                    ],
+                },
+                {"role": "user", "content": [{"type": "text", "text": "Test input"}]},
+            ]
         else:
             assert ai_client_span["attributes"][
                 "gen_ai.system_instructions"
@@ -1190,6 +1449,21 @@ def test_agent_invocation_span_sync(
                     {"type": "text", "content": "Be concise and clear."},
                 ]
             )
+
+            assert json.loads(
+                ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+            ) == [
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "text",
+                            "text": "Message demonstrating the absence of truncation.",
+                        }
+                    ],
+                },
+                {"role": "user", "content": [{"type": "text", "text": "Test input"}]},
+            ]
     else:
         with patch.object(
             agent.model._client._client,
diff --git a/tests/integrations/pydantic_ai/test_pydantic_ai.py b/tests/integrations/pydantic_ai/test_pydantic_ai.py
index 42a666644e..bcfb9f1df8 100644
--- a/tests/integrations/pydantic_ai/test_pydantic_ai.py
+++ b/tests/integrations/pydantic_ai/test_pydantic_ai.py
@@ -76,7 +76,9 @@ async def test_agent_run_async(
     if stream_gen_ai_spans:
         items = capture_items("transaction", "span")
 
-        result = await test_agent.run("Test input")
+        result = await test_agent.run(
+            ["Message demonstrating the absence of truncation.", "Test input"]
+        )
 
         assert result is not None
         assert result.output is not None
@@ -102,7 +104,23 @@ async def test_agent_run_async(
         assert "chat" in chat_span["name"]
         assert chat_span["attributes"]["gen_ai.operation.name"] == "chat"
         assert chat_span["attributes"]["gen_ai.response.streaming"] is False
-        assert "gen_ai.request.messages" in chat_span["attributes"]
+        assert json.loads(
+            chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        ) == [
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "text",
+                        "text": "Message demonstrating the absence of truncation.",
+                    },
+                    {
+                        "type": "text",
+                        "text": "Test input",
+                    },
+                ],
+            }
+        ]
         assert "gen_ai.usage.input_tokens" in chat_span["attributes"]
         assert "gen_ai.usage.output_tokens" in chat_span["attributes"]
     else:
@@ -275,7 +293,9 @@ def test_agent_run_sync(
     if stream_gen_ai_spans:
         items = capture_items("transaction", "span")
 
-        result = test_agent.run_sync("Test input")
+        result = test_agent.run_sync(
+            ["Message demonstrating the absence of truncation.", "Test input"]
+        )
 
         assert result is not None
         assert result.output is not None
@@ -394,7 +414,9 @@ async def test_agent_run_stream(
     if stream_gen_ai_spans:
         items = capture_items("transaction", "span")
 
-        async with test_agent.run_stream("Test input") as result:
+        async with test_agent.run_stream(
+            ["Message demonstrating the absence of truncation.", "Test input"]
+        ) as result:
             # Consume the stream
             async for _ in result.stream_output():
                 pass
@@ -416,7 +438,23 @@ async def test_agent_run_stream(
         # Verify streaming flag is True for streaming
         for chat_span in chat_spans:
             assert chat_span["attributes"]["gen_ai.response.streaming"] is True
-            assert "gen_ai.request.messages" in chat_span["attributes"]
+            assert json.loads(
+                chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+            ) == [
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "text",
+                            "text": "Message demonstrating the absence of truncation.",
+                        },
+                        {
+                            "type": "text",
+                            "text": "Test input",
+                        },
+                    ],
+                }
+            ]
             assert "gen_ai.usage.input_tokens" in chat_span["attributes"]
             # Streaming responses should still have output data
             assert (
@@ -479,7 +517,9 @@ async def test_agent_run_stream_events(
     if stream_gen_ai_spans:
         items = capture_items("transaction", "span")
 
-        async for _ in test_agent.run_stream_events("Test input"):
+        async for _ in test_agent.run_stream_events(
+            ["Message demonstrating the absence of truncation.", "Test input"]
+        ):
             pass
 
         # Verify transaction

From 425ae279029edef423e2f1d466487250002da497 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Wed, 13 May 2026 13:39:18 +0200
Subject: [PATCH 82/84] cleanup one openai test

---
 tests/integrations/openai/test_openai.py | 46 ++++++++----------------
 1 file changed, 14 insertions(+), 32 deletions(-)

diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py
index 6c113078a3..934a0b8f4e 100644
--- a/tests/integrations/openai/test_openai.py
+++ b/tests/integrations/openai/test_openai.py
@@ -1975,24 +1975,6 @@ async def test_streaming_chat_completion_async(
                     "content": "You are a helpful assistant.",
                 }
             ]
-
-            assert "hello" in span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-            assert "hello world" in span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
-
-            try:
-                import tiktoken  # type: ignore # noqa # pylint: disable=unused-import
-
-                if "blocks" in param_id:
-                    assert span["attributes"]["gen_ai.usage.output_tokens"] == 2
-                    assert span["attributes"]["gen_ai.usage.input_tokens"] == 7
-                    assert span["attributes"]["gen_ai.usage.total_tokens"] == 9
-                else:
-                    assert span["attributes"]["gen_ai.usage.output_tokens"] == 2
-                    assert span["attributes"]["gen_ai.usage.input_tokens"] == 12
-                    assert span["attributes"]["gen_ai.usage.total_tokens"] == 14
-
-            except ImportError:
-                pass  # if tiktoken is not installed, we can't guarantee token usage will be calculated properly
         else:
             assert json.loads(
                 span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
@@ -2007,23 +1989,23 @@ async def test_streaming_chat_completion_async(
                 },
             ]
 
-            assert "hello" in span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-            assert "hello world" in span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+        assert "hello" in span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        assert "hello world" in span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT]
 
-            try:
-                import tiktoken  # type: ignore # noqa # pylint: disable=unused-import
+        try:
+            import tiktoken  # type: ignore # noqa # pylint: disable=unused-import
 
-                if "blocks" in param_id:
-                    assert span["attributes"]["gen_ai.usage.output_tokens"] == 2
-                    assert span["attributes"]["gen_ai.usage.input_tokens"] == 7
-                    assert span["attributes"]["gen_ai.usage.total_tokens"] == 9
-                else:
-                    assert span["attributes"]["gen_ai.usage.output_tokens"] == 2
-                    assert span["attributes"]["gen_ai.usage.input_tokens"] == 12
-                    assert span["attributes"]["gen_ai.usage.total_tokens"] == 14
+            if "blocks" in param_id:
+                assert span["attributes"]["gen_ai.usage.output_tokens"] == 2
+                assert span["attributes"]["gen_ai.usage.input_tokens"] == 7
+                assert span["attributes"]["gen_ai.usage.total_tokens"] == 9
+            else:
+                assert span["attributes"]["gen_ai.usage.output_tokens"] == 2
+                assert span["attributes"]["gen_ai.usage.input_tokens"] == 12
+                assert span["attributes"]["gen_ai.usage.total_tokens"] == 14
 
-            except ImportError:
-                pass  # if tiktoken is not installed, we can't guarantee token usage will be calculated properly
+        except ImportError:
+            pass  # if tiktoken is not installed, we can't guarantee token usage will be calculated properly
     else:
         events = capture_events()
 

From 47680da69e4481059c00c7707fe9172169144264 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Wed, 13 May 2026 13:43:56 +0200
Subject: [PATCH 83/84] add message in openai_agents tests

---
 tests/integrations/openai_agents/test_openai_agents.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py
index 5589352e5c..430d60ac01 100644
--- a/tests/integrations/openai_agents/test_openai_agents.py
+++ b/tests/integrations/openai_agents/test_openai_agents.py
@@ -332,6 +332,7 @@ async def test_agent_invocation_span_no_pii(
                     "content": "You are a helpful assistant.",
                 },
                 {
+                    "type": "message",
                     "role": "user",
                     "content": "Message demonstrating the absence of truncation.",
                 },
@@ -374,6 +375,7 @@ async def test_agent_invocation_span_no_pii(
                     ],
                 },
                 {
+                    "type": "message",
                     "role": "user",
                     "content": "Message demonstrating the absence of truncation.",
                 },

From 7f01f968e5d3af6bdf474265a6a15000f94acb17 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Wed, 13 May 2026 14:54:38 +0200
Subject: [PATCH 84/84] merge follow up

---
 .../integrations/anthropic/test_anthropic.py  | 325 ++++--------------
 .../google_genai/test_google_genai.py         | 166 ++-------
 .../integrations/langchain/test_langchain.py  | 137 +++-----
 .../integrations/langgraph/test_langgraph.py  |  86 ++---
 tests/integrations/litellm/test_litellm.py    | 103 ++----
 5 files changed, 201 insertions(+), 616 deletions(-)

diff --git a/tests/integrations/anthropic/test_anthropic.py b/tests/integrations/anthropic/test_anthropic.py
index 0c5d110827..d6b2c269d9 100644
--- a/tests/integrations/anthropic/test_anthropic.py
+++ b/tests/integrations/anthropic/test_anthropic.py
@@ -3685,20 +3685,14 @@ def mock_messages_create(*args, **kwargs):
     assert stored_messages[0]["role"] == expected_role
 
 
-@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
-def test_anthropic_message_truncation(
-    sentry_init,
-    capture_events,
-    capture_items,
-    stream_gen_ai_spans,
-):
+def test_anthropic_message_truncation(sentry_init, capture_events):
     """Test that large messages are truncated properly in Anthropic integration."""
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        stream_gen_ai_spans=stream_gen_ai_spans,
     )
+    events = capture_events()
 
     client = Anthropic(api_key="z")
     client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE)
@@ -3714,82 +3708,43 @@ def test_anthropic_message_truncation(
         {"role": "user", "content": "small message 5"},
     ]
 
-    if stream_gen_ai_spans:
-        items = capture_items("transaction", "span")
-
-        with start_transaction():
-            client.messages.create(max_tokens=1024, messages=messages, model="model")
-
-        spans = [item.payload for item in items if item.type == "span"]
-        chat_spans = [
-            span
-            for span in spans
-            if span["attributes"].get("sentry.op") == OP.GEN_AI_CHAT
-        ]
-
-        assert len(chat_spans) > 0
-
-        chat_span = chat_spans[0]
-        assert chat_span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-        assert chat_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["attributes"]
-
-        messages_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-        assert isinstance(messages_data, str)
-
-        parsed_messages = json.loads(messages_data)
-        assert isinstance(parsed_messages, list)
-        assert len(parsed_messages) == 1
-        assert "small message 5" in str(parsed_messages[0])
-
-        tx = next(item.payload for item in items if item.type == "transaction")
-    else:
-        events = capture_events()
-
-        with start_transaction():
-            client.messages.create(max_tokens=1024, messages=messages, model="model")
+    with start_transaction():
+        client.messages.create(max_tokens=1024, messages=messages, model="model")
 
-        assert len(events) > 0
-        tx = events[0]
-        assert tx["type"] == "transaction"
+    assert len(events) > 0
+    tx = events[0]
+    assert tx["type"] == "transaction"
 
-        chat_spans = [
-            span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT
-        ]
-
-        assert len(chat_spans) > 0
+    chat_spans = [
+        span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT
+    ]
+    assert len(chat_spans) > 0
 
-        chat_span = chat_spans[0]
-        assert chat_span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-        assert chat_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"]
+    chat_span = chat_spans[0]
+    assert chat_span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert chat_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"]
 
-        messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-        assert isinstance(messages_data, str)
+    messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+    assert isinstance(messages_data, str)
 
-        parsed_messages = json.loads(messages_data)
-        assert isinstance(parsed_messages, list)
-        assert len(parsed_messages) == 1
-        assert "small message 5" in str(parsed_messages[0])
+    parsed_messages = json.loads(messages_data)
+    assert isinstance(parsed_messages, list)
+    assert len(parsed_messages) == 1
+    assert "small message 5" in str(parsed_messages[0])
 
     assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5
 
 
-@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 @pytest.mark.asyncio
-async def test_anthropic_message_truncation_async(
-    sentry_init,
-    capture_events,
-    capture_items,
-    stream_gen_ai_spans,
-):
+async def test_anthropic_message_truncation_async(sentry_init, capture_events):
     """Test that large messages are truncated properly in Anthropic integration."""
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        stream_gen_ai_spans=stream_gen_ai_spans,
     )
+    events = capture_events()
 
     client = AsyncAnthropic(api_key="z")
     client.messages._post = mock.AsyncMock(return_value=EXAMPLE_MESSAGE)
@@ -3805,68 +3760,30 @@ async def test_anthropic_message_truncation_async(
         {"role": "user", "content": "small message 5"},
     ]
 
-    if stream_gen_ai_spans:
-        items = capture_items("transaction", "span")
-
-        with start_transaction():
-            await client.messages.create(
-                max_tokens=1024, messages=messages, model="model"
-            )
-
-        spans = [item.payload for item in items if item.type == "span"]
-        chat_spans = [
-            span
-            for span in spans
-            if span["attributes"].get("sentry.op") == OP.GEN_AI_CHAT
-        ]
-        assert len(chat_spans) > 0
-
-        chat_span = chat_spans[0]
-
-        assert chat_span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-        assert chat_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["attributes"]
-
-        messages_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+    with start_transaction():
+        await client.messages.create(max_tokens=1024, messages=messages, model="model")
 
-        assert isinstance(messages_data, str)
+    assert len(events) > 0
+    tx = events[0]
+    assert tx["type"] == "transaction"
 
-        parsed_messages = json.loads(messages_data)
-        assert isinstance(parsed_messages, list)
-        assert len(parsed_messages) == 1
-        assert "small message 5" in str(parsed_messages[0])
-
-        tx = next(item.payload for item in items if item.type == "transaction")
-    else:
-        events = capture_events()
-
-        with start_transaction():
-            await client.messages.create(
-                max_tokens=1024, messages=messages, model="model"
-            )
-
-        assert len(events) > 0
-        tx = events[0]
-        assert tx["type"] == "transaction"
-
-        chat_spans = [
-            span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT
-        ]
-        assert len(chat_spans) > 0
-
-        chat_span = chat_spans[0]
+    chat_spans = [
+        span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT
+    ]
+    assert len(chat_spans) > 0
 
-        assert chat_span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
-        assert chat_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"]
+    chat_span = chat_spans[0]
+    assert chat_span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic"
+    assert chat_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"]
 
-        messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-        assert isinstance(messages_data, str)
+    messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+    assert isinstance(messages_data, str)
 
-        parsed_messages = json.loads(messages_data)
-        assert isinstance(parsed_messages, list)
-        assert len(parsed_messages) == 1
-        assert "small message 5" in str(parsed_messages[0])
+    parsed_messages = json.loads(messages_data)
+    assert isinstance(parsed_messages, list)
+    assert len(parsed_messages) == 1
+    assert "small message 5" in str(parsed_messages[0])
 
     assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5
 
@@ -5260,21 +5177,14 @@ def test_transform_message_content_list_anthropic():
 # Integration tests for binary data in messages
 
 
-@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
-def test_message_with_base64_image(
-    sentry_init,
-    capture_events,
-    capture_items,
-    stream_gen_ai_spans,
-):
+def test_message_with_base64_image(sentry_init, capture_events):
     """Test that messages with base64 images are properly captured."""
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        stream_gen_ai_spans=stream_gen_ai_spans,
     )
-
+    events = capture_events()
     client = Anthropic(api_key="z")
     client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE)
 
@@ -5295,31 +5205,15 @@ def test_message_with_base64_image(
         }
     ]
 
-    if stream_gen_ai_spans:
-        items = capture_items("transaction", "span")
-
-        with start_transaction(name="anthropic"):
-            client.messages.create(max_tokens=1024, messages=messages, model="model")
-
-        spans = [item.payload for item in items if item.type == "span"]
-        (span,) = spans
-
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"]
-        stored_messages = json.loads(
-            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-        )
-    else:
-        events = capture_events()
-
-        with start_transaction(name="anthropic"):
-            client.messages.create(max_tokens=1024, messages=messages, model="model")
+    with start_transaction(name="anthropic"):
+        client.messages.create(max_tokens=1024, messages=messages, model="model")
 
-        assert len(events) == 1
-        (event,) = events
-        (span,) = event["spans"]
+    assert len(events) == 1
+    (event,) = events
+    (span,) = event["spans"]
 
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]
-        stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]
+    stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
 
     assert len(stored_messages) == 1
     assert stored_messages[0]["role"] == "user"
@@ -5469,21 +5363,14 @@ def test_message_with_file_image(
     }
 
 
-@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
-def test_message_with_base64_pdf(
-    sentry_init,
-    capture_events,
-    capture_items,
-    stream_gen_ai_spans,
-):
+def test_message_with_base64_pdf(sentry_init, capture_events):
     """Test that messages with base64-encoded PDF documents are properly captured."""
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        stream_gen_ai_spans=stream_gen_ai_spans,
     )
-
+    events = capture_events()
     client = Anthropic(api_key="z")
     client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE)
 
@@ -5504,30 +5391,14 @@ def test_message_with_base64_pdf(
         }
     ]
 
-    if stream_gen_ai_spans:
-        items = capture_items("transaction", "span")
-
-        with start_transaction(name="anthropic"):
-            client.messages.create(max_tokens=1024, messages=messages, model="model")
+    with start_transaction(name="anthropic"):
+        client.messages.create(max_tokens=1024, messages=messages, model="model")
 
-        spans = [item.payload for item in items if item.type == "span"]
-        (span,) = spans
-
-        stored_messages = json.loads(
-            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-        )
-    else:
-        events = capture_events()
-
-        with start_transaction(name="anthropic"):
-            client.messages.create(max_tokens=1024, messages=messages, model="model")
-
-        assert len(events) == 1
-        (event,) = events
-        (span,) = event["spans"]
-
-        stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    assert len(events) == 1
+    (event,) = events
+    (span,) = event["spans"]
 
+    stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     content = stored_messages[0]["content"]
     assert content[1] == {
         "type": "blob",
@@ -5672,21 +5543,14 @@ def test_message_with_file_document(
     }
 
 
-@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
-def test_message_with_mixed_content(
-    sentry_init,
-    capture_events,
-    capture_items,
-    stream_gen_ai_spans,
-):
+def test_message_with_mixed_content(sentry_init, capture_events):
     """Test that messages with mixed content (text, images, documents) are properly captured."""
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        stream_gen_ai_spans=stream_gen_ai_spans,
     )
-
+    events = capture_events()
     client = Anthropic(api_key="z")
     client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE)
 
@@ -5723,30 +5587,14 @@ def test_message_with_mixed_content(
         }
     ]
 
-    if stream_gen_ai_spans:
-        items = capture_items("transaction", "span")
+    with start_transaction(name="anthropic"):
+        client.messages.create(max_tokens=1024, messages=messages, model="model")
 
-        with start_transaction(name="anthropic"):
-            client.messages.create(max_tokens=1024, messages=messages, model="model")
-
-        spans = [item.payload for item in items if item.type == "span"]
-        (span,) = spans
-
-        stored_messages = json.loads(
-            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-        )
-    else:
-        events = capture_events()
-
-        with start_transaction(name="anthropic"):
-            client.messages.create(max_tokens=1024, messages=messages, model="model")
-
-        assert len(events) == 1
-        (event,) = events
-        (span,) = event["spans"]
-
-        stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    assert len(events) == 1
+    (event,) = events
+    (span,) = event["spans"]
 
+    stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     content = stored_messages[0]["content"]
 
     assert len(content) == 5
@@ -5778,21 +5626,14 @@ def test_message_with_mixed_content(
     }
 
 
-@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
-def test_message_with_multiple_images_different_formats(
-    sentry_init,
-    capture_events,
-    capture_items,
-    stream_gen_ai_spans,
-):
+def test_message_with_multiple_images_different_formats(sentry_init, capture_events):
     """Test that messages with multiple images of different source types are handled."""
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        stream_gen_ai_spans=stream_gen_ai_spans,
     )
-
+    events = capture_events()
     client = Anthropic(api_key="z")
     client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE)
 
@@ -5828,30 +5669,14 @@ def test_message_with_multiple_images_different_formats(
         }
     ]
 
-    if stream_gen_ai_spans:
-        items = capture_items("transaction", "span")
-
-        with start_transaction(name="anthropic"):
-            client.messages.create(max_tokens=1024, messages=messages, model="model")
-
-        spans = [item.payload for item in items if item.type == "span"]
-        (span,) = spans
-
-        stored_messages = json.loads(
-            span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-        )
-    else:
-        events = capture_events()
-
-        with start_transaction(name="anthropic"):
-            client.messages.create(max_tokens=1024, messages=messages, model="model")
+    with start_transaction(name="anthropic"):
+        client.messages.create(max_tokens=1024, messages=messages, model="model")
 
-        assert len(events) == 1
-        (event,) = events
-        (span,) = event["spans"]
-
-        stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    assert len(events) == 1
+    (event,) = events
+    (span,) = event["spans"]
 
+    stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     content = stored_messages[0]["content"]
 
     assert len(content) == 4
diff --git a/tests/integrations/google_genai/test_google_genai.py b/tests/integrations/google_genai/test_google_genai.py
index 69287afb61..723a71959d 100644
--- a/tests/integrations/google_genai/test_google_genai.py
+++ b/tests/integrations/google_genai/test_google_genai.py
@@ -1436,21 +1436,16 @@ def test_tool_calls_extraction(
     assert json.loads(tool_calls[1]["arguments"]) == {"timezone": "PST"}
 
 
-@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 def test_google_genai_message_truncation(
-    sentry_init,
-    capture_events,
-    capture_items,
-    mock_genai_client,
-    stream_gen_ai_spans,
+    sentry_init, capture_events, mock_genai_client
 ):
     """Test that large messages are truncated properly in Google GenAI integration."""
     sentry_init(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        stream_gen_ai_spans=stream_gen_ai_spans,
     )
+    events = capture_events()
 
     large_content = (
         "This is a very long message that will exceed our size limits. " * 1000
@@ -1459,39 +1454,21 @@ def test_google_genai_message_truncation(
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
 
-    if stream_gen_ai_spans:
-        items = capture_items("span")
-
-        with mock.patch.object(
-            mock_genai_client._api_client, "request", return_value=mock_http_response
-        ), start_transaction(name="google_genai"):
-            mock_genai_client.models.generate_content(
-                model="gemini-1.5-flash",
-                contents=[large_content, small_content],
-                config=create_test_config(),
-            )
-
-        invoke_span = next(item.payload for item in items if item.type == "span")
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["attributes"]
-
-        messages_data = invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-    else:
-        events = capture_events()
-
-        with mock.patch.object(
-            mock_genai_client._api_client, "request", return_value=mock_http_response
-        ), start_transaction(name="google_genai"):
+    with mock.patch.object(
+        mock_genai_client._api_client, "request", return_value=mock_http_response
+    ):
+        with start_transaction(name="google_genai"):
             mock_genai_client.models.generate_content(
                 model="gemini-1.5-flash",
                 contents=[large_content, small_content],
                 config=create_test_config(),
             )
 
-        (event,) = events
-        invoke_span = event["spans"][0]
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["data"]
+    (event,) = events
+    invoke_span = event["spans"][0]
+    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["data"]
 
-        messages_data = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+    messages_data = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
     assert isinstance(messages_data, str)
 
     parsed_messages = json.loads(messages_data)
@@ -2574,21 +2551,16 @@ def test_generate_content_with_inline_data(
     assert messages[0]["content"][1]["content"] == BLOB_DATA_SUBSTITUTE
 
 
-@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 def test_generate_content_with_function_response(
-    sentry_init,
-    capture_events,
-    capture_items,
-    mock_genai_client,
-    stream_gen_ai_spans,
+    sentry_init, capture_events, mock_genai_client
 ):
     """Test generate_content with function_response (tool result)."""
     sentry_init(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        stream_gen_ai_spans=stream_gen_ai_spans,
     )
+    events = capture_events()
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
 
@@ -2614,36 +2586,18 @@ def test_generate_content_with_function_response(
         ),
     ]
 
-    if stream_gen_ai_spans:
-        items = capture_items("span")
-
-        with mock.patch.object(
-            mock_genai_client._api_client, "request", return_value=mock_http_response
-        ), start_transaction(name="google_genai"):
+    with mock.patch.object(
+        mock_genai_client._api_client, "request", return_value=mock_http_response
+    ):
+        with start_transaction(name="google_genai"):
             mock_genai_client.models.generate_content(
                 model="gemini-1.5-flash", contents=contents, config=create_test_config()
             )
 
-        invoke_span = next(item.payload for item in items if item.type == "span")
-
-        messages = json.loads(
-            invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-        )
-    else:
-        events = capture_events()
-
-        with mock.patch.object(
-            mock_genai_client._api_client, "request", return_value=mock_http_response
-        ), start_transaction(name="google_genai"):
-            mock_genai_client.models.generate_content(
-                model="gemini-1.5-flash", contents=contents, config=create_test_config()
-            )
-
-        (event,) = events
-        invoke_span = event["spans"][0]
-
-        messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    (event,) = events
+    invoke_span = event["spans"][0]
 
+    messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     assert len(messages) == 1
     # First message is user message
     assert messages[0]["role"] == "tool"
@@ -2652,21 +2606,16 @@ def test_generate_content_with_function_response(
     assert messages[0]["content"]["output"] == "Sunny, 72F"
 
 
-@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 def test_generate_content_with_mixed_string_and_content(
-    sentry_init,
-    capture_events,
-    capture_items,
-    mock_genai_client,
-    stream_gen_ai_spans,
+    sentry_init, capture_events, mock_genai_client
 ):
     """Test generate_content with mixed string and Content objects in list."""
     sentry_init(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        stream_gen_ai_spans=stream_gen_ai_spans,
     )
+    events = capture_events()
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
 
@@ -2683,36 +2632,18 @@ def test_generate_content_with_mixed_string_and_content(
         ),
     ]
 
-    if stream_gen_ai_spans:
-        items = capture_items("span")
-
-        with mock.patch.object(
-            mock_genai_client._api_client, "request", return_value=mock_http_response
-        ), start_transaction(name="google_genai"):
+    with mock.patch.object(
+        mock_genai_client._api_client, "request", return_value=mock_http_response
+    ):
+        with start_transaction(name="google_genai"):
             mock_genai_client.models.generate_content(
                 model="gemini-1.5-flash", contents=contents, config=create_test_config()
             )
 
-        invoke_span = next(item.payload for item in items if item.type == "span")
-
-        messages = json.loads(
-            invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-        )
-    else:
-        events = capture_events()
-
-        with mock.patch.object(
-            mock_genai_client._api_client, "request", return_value=mock_http_response
-        ), start_transaction(name="google_genai"):
-            mock_genai_client.models.generate_content(
-                model="gemini-1.5-flash", contents=contents, config=create_test_config()
-            )
-
-        (event,) = events
-        invoke_span = event["spans"][0]
-
-        messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    (event,) = events
+    invoke_span = event["spans"][0]
 
+    messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     assert len(messages) == 1
     # User message
     assert messages[0]["role"] == "user"
@@ -2775,13 +2706,8 @@ def test_generate_content_with_part_object_directly(
     assert messages[0]["content"] == [{"text": "Direct Part object", "type": "text"}]
 
 
-@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
 def test_generate_content_with_list_of_dicts(
-    sentry_init,
-    capture_events,
-    capture_items,
-    mock_genai_client,
-    stream_gen_ai_spans,
+    sentry_init, capture_events, mock_genai_client
 ):
     """
     Test generate_content with list of dict format inputs.
@@ -2794,8 +2720,8 @@ def test_generate_content_with_list_of_dicts(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        stream_gen_ai_spans=stream_gen_ai_spans,
     )
+    events = capture_events()
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
 
@@ -2806,36 +2732,18 @@ def test_generate_content_with_list_of_dicts(
         {"role": "user", "parts": [{"text": "Second user message"}]},
     ]
 
-    if stream_gen_ai_spans:
-        items = capture_items("span")
-
-        with mock.patch.object(
-            mock_genai_client._api_client, "request", return_value=mock_http_response
-        ), start_transaction(name="google_genai"):
+    with mock.patch.object(
+        mock_genai_client._api_client, "request", return_value=mock_http_response
+    ):
+        with start_transaction(name="google_genai"):
             mock_genai_client.models.generate_content(
                 model="gemini-1.5-flash", contents=contents, config=create_test_config()
             )
 
-        invoke_span = next(item.payload for item in items if item.type == "span")
-
-        messages = json.loads(
-            invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-        )
-    else:
-        events = capture_events()
-
-        with mock.patch.object(
-            mock_genai_client._api_client, "request", return_value=mock_http_response
-        ), start_transaction(name="google_genai"):
-            mock_genai_client.models.generate_content(
-                model="gemini-1.5-flash", contents=contents, config=create_test_config()
-            )
-
-        (event,) = events
-        invoke_span = event["spans"][0]
-
-        messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    (event,) = events
+    invoke_span = event["spans"][0]
 
+    messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
     assert len(messages) == 1
     assert messages[0]["role"] == "user"
     assert messages[0]["content"] == [{"text": "Second user message", "type": "text"}]
diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py
index 7a39f74ffc..3a2ef76a5a 100644
--- a/tests/integrations/langchain/test_langchain.py
+++ b/tests/integrations/langchain/test_langchain.py
@@ -2991,13 +2991,7 @@ def test_langchain_message_role_normalization_units():
     assert normalized[5] == "string message"  # String message unchanged
 
 
-@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
-def test_langchain_message_truncation(
-    sentry_init,
-    capture_events,
-    capture_items,
-    stream_gen_ai_spans,
-):
+def test_langchain_message_truncation(sentry_init, capture_events):
     """Test that large messages are truncated properly in Langchain integration."""
     from langchain_core.outputs import LLMResult, Generation
 
@@ -3005,8 +2999,8 @@ def test_langchain_message_truncation(
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        stream_gen_ai_spans=stream_gen_ai_spans,
     )
+    events = capture_events()
 
     callback = SentryLangchainCallback(max_span_map_size=100, include_prompts=True)
 
@@ -3024,101 +3018,48 @@ def test_langchain_message_truncation(
         "small message 5",
     ]
 
-    if stream_gen_ai_spans:
-        items = capture_items("transaction", "span")
-
-        with start_transaction():
-            callback.on_llm_start(
-                serialized=serialized,
-                prompts=prompts,
-                run_id=run_id,
-                name="my_pipeline",
-                invocation_params={
-                    "temperature": 0.7,
-                    "max_tokens": 100,
-                    "model": "gpt-3.5-turbo",
-                },
-            )
-
-            response = LLMResult(
-                generations=[[Generation(text="The response")]],
-                llm_output={
-                    "token_usage": {
-                        "total_tokens": 25,
-                        "prompt_tokens": 10,
-                        "completion_tokens": 15,
-                    }
-                },
-            )
-            callback.on_llm_end(response=response, run_id=run_id)
-
-        tx = next(item.payload for item in items if item.type == "transaction")
-        assert tx["type"] == "transaction"
-
-        spans = [item.payload for item in items if item.type == "span"]
-        llm_spans = [
-            span
-            for span in spans
-            if span["attributes"].get("sentry.op") == "gen_ai.text_completion"
-        ]
-
-        assert len(llm_spans) > 0
-
-        llm_span = llm_spans[0]
-
-        assert llm_span["attributes"]["gen_ai.operation.name"] == "text_completion"
-        assert llm_span["attributes"][SPANDATA.GEN_AI_FUNCTION_ID] == "my_pipeline"
-
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in llm_span["attributes"]
-        messages_data = llm_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-    else:
-        events = capture_events()
-
-        with start_transaction():
-            callback.on_llm_start(
-                serialized=serialized,
-                prompts=prompts,
-                run_id=run_id,
-                name="my_pipeline",
-                invocation_params={
-                    "temperature": 0.7,
-                    "max_tokens": 100,
-                    "model": "gpt-3.5-turbo",
-                },
-            )
-
-            response = LLMResult(
-                generations=[[Generation(text="The response")]],
-                llm_output={
-                    "token_usage": {
-                        "total_tokens": 25,
-                        "prompt_tokens": 10,
-                        "completion_tokens": 15,
-                    }
-                },
-            )
-            callback.on_llm_end(response=response, run_id=run_id)
-
-        assert len(events) > 0
-        tx = events[0]
-        assert tx["type"] == "transaction"
-
-        llm_spans = [
-            span
-            for span in tx.get("spans", [])
-            if span.get("op") == "gen_ai.text_completion"
-        ]
+    with start_transaction():
+        callback.on_llm_start(
+            serialized=serialized,
+            prompts=prompts,
+            run_id=run_id,
+            name="my_pipeline",
+            invocation_params={
+                "temperature": 0.7,
+                "max_tokens": 100,
+                "model": "gpt-3.5-turbo",
+            },
+        )
 
-        assert len(llm_spans) > 0
+        response = LLMResult(
+            generations=[[Generation(text="The response")]],
+            llm_output={
+                "token_usage": {
+                    "total_tokens": 25,
+                    "prompt_tokens": 10,
+                    "completion_tokens": 15,
+                }
+            },
+        )
+        callback.on_llm_end(response=response, run_id=run_id)
 
-        llm_span = llm_spans[0]
+    assert len(events) > 0
+    tx = events[0]
+    assert tx["type"] == "transaction"
 
-        assert llm_span["data"]["gen_ai.operation.name"] == "text_completion"
-        assert llm_span["data"][SPANDATA.GEN_AI_FUNCTION_ID] == "my_pipeline"
+    llm_spans = [
+        span
+        for span in tx.get("spans", [])
+        if span.get("op") == "gen_ai.text_completion"
+    ]
+    assert len(llm_spans) > 0
 
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in llm_span["data"]
-        messages_data = llm_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+    llm_span = llm_spans[0]
+    assert llm_span["data"]["gen_ai.operation.name"] == "text_completion"
+    assert llm_span["data"][SPANDATA.GEN_AI_FUNCTION_ID] == "my_pipeline"
 
+    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in llm_span["data"]
+    messages_data = llm_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
     assert isinstance(messages_data, str)
 
     parsed_messages = json.loads(messages_data)
diff --git a/tests/integrations/langgraph/test_langgraph.py b/tests/integrations/langgraph/test_langgraph.py
index 68d592bd1d..b8554f2f60 100644
--- a/tests/integrations/langgraph/test_langgraph.py
+++ b/tests/integrations/langgraph/test_langgraph.py
@@ -1988,13 +1988,7 @@ def __init__(self, content, message_type="human"):
     assert "ai" not in roles
 
 
-@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
-def test_langgraph_message_truncation(
-    sentry_init,
-    capture_events,
-    capture_items,
-    stream_gen_ai_spans,
-):
+def test_langgraph_message_truncation(sentry_init, capture_events):
     """Test that large messages are truncated properly in Langgraph integration."""
     import json
 
@@ -2002,8 +1996,8 @@ def test_langgraph_message_truncation(
         integrations=[LanggraphIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        stream_gen_ai_spans=stream_gen_ai_spans,
     )
+    events = capture_events()
 
     large_content = (
         "This is a very long message that will exceed our size limits. " * 1000
@@ -2023,66 +2017,28 @@ def test_langgraph_message_truncation(
     def original_invoke(self, *args, **kwargs):
         return {"messages": args[0].get("messages", [])}
 
-    if stream_gen_ai_spans:
-        items = capture_items("transaction", "span")
+    with start_transaction():
+        wrapped_invoke = _wrap_pregel_invoke(original_invoke)
+        result = wrapped_invoke(pregel, test_state)
 
-        with start_transaction():
-            wrapped_invoke = _wrap_pregel_invoke(original_invoke)
-            result = wrapped_invoke(pregel, test_state)
-
-        assert result is not None
-
-        spans = [item.payload for item in items if item.type == "span"]
-        invoke_spans = [
-            span
-            for span in spans
-            if span["attributes"].get("sentry.op") == OP.GEN_AI_INVOKE_AGENT
-        ]
-
-        assert len(invoke_spans) > 0
-
-        invoke_span = invoke_spans[0]
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["attributes"]
-
-        messages_data = invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-
-        assert isinstance(messages_data, str)
-
-        parsed_messages = json.loads(messages_data)
-        assert isinstance(parsed_messages, list)
-        assert len(parsed_messages) == 1
-        assert "small message 5" in str(parsed_messages[0])
-        (tx,) = (item.payload for item in items if item.type == "transaction")
-    else:
-        events = capture_events()
-
-        with start_transaction():
-            wrapped_invoke = _wrap_pregel_invoke(original_invoke)
-            result = wrapped_invoke(pregel, test_state)
-
-        assert result is not None
-
-        assert len(events) > 0
-        tx = events[0]
-        assert tx["type"] == "transaction"
-
-        invoke_spans = [
-            span
-            for span in tx.get("spans", [])
-            if span.get("op") == OP.GEN_AI_INVOKE_AGENT
-        ]
-
-        assert len(invoke_spans) > 0
+    assert result is not None
+    assert len(events) > 0
+    tx = events[0]
+    assert tx["type"] == "transaction"
 
-        invoke_span = invoke_spans[0]
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["data"]
+    invoke_spans = [
+        span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_INVOKE_AGENT
+    ]
+    assert len(invoke_spans) > 0
 
-        messages_data = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-        assert isinstance(messages_data, str)
+    invoke_span = invoke_spans[0]
+    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["data"]
 
-        parsed_messages = json.loads(messages_data)
-        assert isinstance(parsed_messages, list)
-        assert len(parsed_messages) == 1
-        assert "small message 5" in str(parsed_messages[0])
+    messages_data = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+    assert isinstance(messages_data, str)
 
+    parsed_messages = json.loads(messages_data)
+    assert isinstance(parsed_messages, list)
+    assert len(parsed_messages) == 1
+    assert "small message 5" in str(parsed_messages[0])
     assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5
diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py
index 76aea9093d..703ae67b1a 100644
--- a/tests/integrations/litellm/test_litellm.py
+++ b/tests/integrations/litellm/test_litellm.py
@@ -2349,20 +2349,14 @@ def test_integration_setup(sentry_init):
     assert _failure_callback in (litellm.failure_callback or [])
 
 
-@pytest.mark.parametrize("stream_gen_ai_spans", [True, False])
-def test_litellm_message_truncation(
-    sentry_init,
-    capture_events,
-    capture_items,
-    stream_gen_ai_spans,
-):
+def test_litellm_message_truncation(sentry_init, capture_events):
     """Test that large messages are truncated properly in LiteLLM integration."""
     sentry_init(
         integrations=[LiteLLMIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
         send_default_pii=True,
-        stream_gen_ai_spans=stream_gen_ai_spans,
     )
+    events = capture_events()
 
     large_content = (
         "This is a very long message that will exceed our size limits. " * 1000
@@ -2376,78 +2370,39 @@ def test_litellm_message_truncation(
     ]
     mock_response = MockCompletionResponse()
 
-    if stream_gen_ai_spans:
-        items = capture_items("transaction", "span")
-
-        with start_transaction(name="litellm test"):
-            kwargs = {
-                "model": "gpt-3.5-turbo",
-                "messages": messages,
-            }
-
-            _input_callback(kwargs)
-            _success_callback(
-                kwargs,
-                mock_response,
-                datetime.now(),
-                datetime.now(),
-            )
-
-        spans = [item.payload for item in items if item.type == "span"]
-        chat_spans = [
-            span
-            for span in spans
-            if span["attributes"].get("sentry.op") == OP.GEN_AI_CHAT
-        ]
-        assert len(chat_spans) > 0
-
-        chat_span = chat_spans[0]
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["attributes"]
-
-        messages_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-        assert isinstance(messages_data, str)
-
-        parsed_messages = json.loads(messages_data)
-        assert isinstance(parsed_messages, list)
-        assert len(parsed_messages) == 1
-        assert "small message 5" in str(parsed_messages[0])
-        tx = next(item.payload for item in items if item.type == "transaction")
-    else:
-        events = capture_events()
-
-        with start_transaction(name="litellm test"):
-            kwargs = {
-                "model": "gpt-3.5-turbo",
-                "messages": messages,
-            }
+    with start_transaction(name="litellm test"):
+        kwargs = {
+            "model": "gpt-3.5-turbo",
+            "messages": messages,
+        }
 
-            _input_callback(kwargs)
-            _success_callback(
-                kwargs,
-                mock_response,
-                datetime.now(),
-                datetime.now(),
-            )
+        _input_callback(kwargs)
+        _success_callback(
+            kwargs,
+            mock_response,
+            datetime.now(),
+            datetime.now(),
+        )
 
-        assert len(events) > 0
-        tx = events[0]
-        assert tx["type"] == "transaction"
+    assert len(events) > 0
+    tx = events[0]
+    assert tx["type"] == "transaction"
 
-        chat_spans = [
-            span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT
-        ]
-        assert len(chat_spans) > 0
+    chat_spans = [
+        span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT
+    ]
+    assert len(chat_spans) > 0
 
-        chat_span = chat_spans[0]
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"]
+    chat_span = chat_spans[0]
+    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"]
 
-        messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-        assert isinstance(messages_data, str)
+    messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+    assert isinstance(messages_data, str)
 
-        parsed_messages = json.loads(messages_data)
-        assert isinstance(parsed_messages, list)
-        assert len(parsed_messages) == 1
-        assert "small message 5" in str(parsed_messages[0])
+    parsed_messages = json.loads(messages_data)
+    assert isinstance(parsed_messages, list)
+    assert len(parsed_messages) == 1
+    assert "small message 5" in str(parsed_messages[0])
     assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5