From f696766b0ecd779441847fa9fa2767e2a1e4cbb6 Mon Sep 17 00:00:00 2001 From: Erica Pisani Date: Thu, 30 Apr 2026 11:22:59 -0400 Subject: [PATCH 1/4] feat(wsgi,asgi): Introduce substitute values for filtered fields in span-streaming mode When span streaming is enabled, sensitive headers are replaced with a human-readable string ("[Filtered]") rather than an AnnotatedValue wrapper, which is incompatible with the span-first data model. Add substitute string constants (OVER_SIZE_LIMIT_SUBSTITUTE, UNPARSABLE_RAW_DATA_SUBSTITUTE) and new AnnotatedValue factory methods (substituted_because_raw_data, substituted_because_over_size_limit) that use remark type "s" (substituted) instead of "x" (removed). Update _filter_headers to auto-detect span streaming and choose the appropriate value type, removing the use_annotated_value parameter. Refs GH-2396 Fixes PY-2396 --- sentry_sdk/_types.py | 43 ++++++++++++++++++++++ sentry_sdk/integrations/_asgi_common.py | 2 +- sentry_sdk/integrations/_wsgi_common.py | 15 ++++---- sentry_sdk/integrations/wsgi.py | 2 +- tests/integrations/asgi/test_asgi.py | 45 +++++++++++++++++++++++ tests/integrations/wsgi/test_wsgi.py | 47 +++++++++++++++++++++++++ 6 files changed, 146 insertions(+), 8 deletions(-) diff --git a/sentry_sdk/_types.py b/sentry_sdk/_types.py index baf5f6a2fd..e071ff07ff 100644 --- a/sentry_sdk/_types.py +++ b/sentry_sdk/_types.py @@ -13,6 +13,10 @@ SENSITIVE_DATA_SUBSTITUTE = "[Filtered]" BLOB_DATA_SUBSTITUTE = "[Blob substitute]" +OVER_SIZE_LIMIT_SUBSTITUTE = ( + "[Value removed due to size of field exceeding configured maximum size.]" +) +UNPARSABLE_RAW_DATA_SUBSTITUTE = "[Value removed due to being unparsable.]" class AnnotatedValue: @@ -47,6 +51,8 @@ def __len__(self: "AnnotatedValue") -> int: @classmethod def removed_because_raw_data(cls) -> "AnnotatedValue": """The value was removed because it could not be parsed. This is done for request body values that are not json nor a form.""" + # This is the legacy approach - we want to transition over to `substituted_because_raw_data` after we completely transition + # to span-first return AnnotatedValue( value="", metadata={ @@ -59,12 +65,29 @@ def removed_because_raw_data(cls) -> "AnnotatedValue": }, ) + @classmethod + def substituted_because_raw_data(cls) -> "AnnotatedValue": + """The value was replaced because it could not be parsed. This is done for request body values that are not json nor a form.""" + return AnnotatedValue( + value=UNPARSABLE_RAW_DATA_SUBSTITUTE, + metadata={ + "rem": [ # Remark + [ + "!raw", # Unparsable raw data + "s", # The fields original value was substituted + ] + ] + }, + ) + @classmethod def removed_because_over_size_limit(cls, value: "Any" = "") -> "AnnotatedValue": """ The actual value was removed because the size of the field exceeded the configured maximum size, for example specified with the max_request_body_size sdk option. """ + # This is the legacy approach - we want to transition over to `substituted_because_over_size_limit` after we completely transition + # to span-first return AnnotatedValue( value=value, metadata={ @@ -77,6 +100,26 @@ def removed_because_over_size_limit(cls, value: "Any" = "") -> "AnnotatedValue": }, ) + @classmethod + def substituted_because_over_size_limit( + cls, value: "Any" = OVER_SIZE_LIMIT_SUBSTITUTE + ) -> "AnnotatedValue": + """ + The actual value was replaced because the size of the field exceeded the configured maximum size, + for example specified with the max_request_body_size sdk option. + """ + return AnnotatedValue( + value=value, + metadata={ + "rem": [ # Remark + [ + "!config", # Because of configured maximum size + "s", # The fields original value was substituted + ] + ] + }, + ) + @classmethod def substituted_because_contains_sensitive_data(cls) -> "AnnotatedValue": """The actual value was removed because it contained sensitive information.""" diff --git a/sentry_sdk/integrations/_asgi_common.py b/sentry_sdk/integrations/_asgi_common.py index 525ca4b5b5..456960ebf2 100644 --- a/sentry_sdk/integrations/_asgi_common.py +++ b/sentry_sdk/integrations/_asgi_common.py @@ -118,7 +118,7 @@ def _get_request_attributes(asgi_scope: "Any") -> "dict[str, Any]": if asgi_scope.get("method"): attributes["http.request.method"] = asgi_scope["method"].upper() - headers = _filter_headers(_get_headers(asgi_scope), use_annotated_value=False) + headers = _filter_headers(_get_headers(asgi_scope)) for header, value in headers.items(): attributes[f"http.request.header.{header.lower()}"] = value diff --git a/sentry_sdk/integrations/_wsgi_common.py b/sentry_sdk/integrations/_wsgi_common.py index bcac1eb2d4..22a719d407 100644 --- a/sentry_sdk/integrations/_wsgi_common.py +++ b/sentry_sdk/integrations/_wsgi_common.py @@ -6,6 +6,7 @@ from sentry_sdk._types import SENSITIVE_DATA_SUBSTITUTE from sentry_sdk.scope import should_send_default_pii from sentry_sdk.utils import AnnotatedValue, logger +from sentry_sdk.tracing_utils import has_span_streaming_enabled try: from django.http.request import RawPostDataException @@ -213,16 +214,18 @@ def _is_json_content_type(ct: "Optional[str]") -> bool: def _filter_headers( headers: "Mapping[str, str]", - use_annotated_value: bool = True, ) -> "Mapping[str, Union[AnnotatedValue, str]]": if should_send_default_pii(): return headers - substitute: "Union[AnnotatedValue, str]" - if use_annotated_value: - substitute = AnnotatedValue.removed_because_over_size_limit() - else: - substitute = SENSITIVE_DATA_SUBSTITUTE + client_options = sentry_sdk.get_client().options + is_span_streaming_enabled = has_span_streaming_enabled(client_options) + + substitute: "Union[AnnotatedValue, str]" = ( + SENSITIVE_DATA_SUBSTITUTE + if is_span_streaming_enabled + else AnnotatedValue.removed_because_over_size_limit() + ) return { k: (v if k.upper().replace("-", "_") not in SENSITIVE_HEADERS else substitute) diff --git a/sentry_sdk/integrations/wsgi.py b/sentry_sdk/integrations/wsgi.py index 8814a82858..82ac9dcfef 100644 --- a/sentry_sdk/integrations/wsgi.py +++ b/sentry_sdk/integrations/wsgi.py @@ -400,7 +400,7 @@ def _get_request_attributes( if method: attributes["http.request.method"] = method.upper() - headers = _filter_headers(dict(_get_headers(environ)), use_annotated_value=False) + headers = _filter_headers(dict(_get_headers(environ))) for header, value in headers.items(): attributes[f"http.request.header.{header.lower()}"] = value diff --git a/tests/integrations/asgi/test_asgi.py b/tests/integrations/asgi/test_asgi.py index 7f44c9d00a..2c30f2d2b2 100644 --- a/tests/integrations/asgi/test_asgi.py +++ b/tests/integrations/asgi/test_asgi.py @@ -1002,3 +1002,48 @@ async def test_custom_transaction_name( assert transaction_event["type"] == "transaction" assert transaction_event["transaction"] == "foobar" assert transaction_event["transaction_info"] == {"source": "custom"} + + +@pytest.mark.asyncio +@pytest.mark.parametrize("span_streaming", [True, False]) +async def test_filter_sensitive_headers_without_pii( + sentry_init, + asgi3_app, + capture_events, + capture_items, + span_streaming, +): + sentry_init( + send_default_pii=False, + traces_sample_rate=1.0, + _experiments={ + "trace_lifecycle": "stream" if span_streaming else "static", + }, + ) + app = SentryAsgiMiddleware(asgi3_app) + + if span_streaming: + items = capture_items("span") + else: + events = capture_events() + + async with TestClient(app) as client: + await client.get( + "/", + headers={"Authorization": "Bearer secret", "X-Custom": "ok"}, + ) + + sentry_sdk.flush() + + if span_streaming: + assert len(items) == 1 + attributes = items[0].payload["attributes"] + assert attributes["http.request.header.authorization"] == "[Filtered]" + assert attributes["http.request.header.x-custom"] == "ok" + else: + (transaction_event,) = events + headers = transaction_event["request"]["headers"] + assert ( + headers["authorization"] != "Bearer secret" + ) # In the legacy approach, the expectation is that the event scrubber would remove this + assert headers["x-custom"] == "ok" diff --git a/tests/integrations/wsgi/test_wsgi.py b/tests/integrations/wsgi/test_wsgi.py index a95a1d63fa..b877132895 100644 --- a/tests/integrations/wsgi/test_wsgi.py +++ b/tests/integrations/wsgi/test_wsgi.py @@ -841,3 +841,50 @@ def app(environ, start_response): ) def test_get_request_url_x_forwarded_proto(environ, use_x_forwarded_for, expected_url): assert get_request_url(environ, use_x_forwarded_for) == expected_url + + +@pytest.mark.parametrize("span_streaming", [True, False]) +def test_filter_sensitive_headers_without_pii( + sentry_init, + capture_events, + capture_items, + span_streaming, +): + def app(environ, start_response): + start_response("200 OK", []) + return [b"ok"] + + sentry_init( + send_default_pii=False, + traces_sample_rate=1.0, + _experiments={ + "trace_lifecycle": "stream" if span_streaming else "static", + }, + ) + middleware = SentryWsgiMiddleware(app) + client = Client(middleware) + + if span_streaming: + items = capture_items("span") + else: + events = capture_events() + + client.get( + "/", + headers={"Authorization": "Bearer secret", "X-Custom": "ok"}, + ) + + sentry_sdk.flush() + + if span_streaming: + assert len(items) == 1 + attributes = items[0].payload["attributes"] + assert attributes["http.request.header.authorization"] == "[Filtered]" + assert attributes["http.request.header.x-custom"] == "ok" + else: + envelope = events[0] + headers = envelope["request"]["headers"] + assert ( + headers["Authorization"] != "Bearer secret" + ) # In the legacy approach, the expectation is that the event scrubber would remove this + assert headers["X-Custom"] == "ok" From bd5a52fe7491c6b3913cbcf470aa9a492449334d Mon Sep 17 00:00:00 2001 From: Erica Pisani Date: Thu, 30 Apr 2026 11:51:22 -0400 Subject: [PATCH 2/4] Revert change to the behaviour of _filter_headers but retain the new AnnotatedValue methods --- sentry_sdk/integrations/_asgi_common.py | 4 ++- sentry_sdk/integrations/_wsgi_common.py | 8 ++--- sentry_sdk/integrations/wsgi.py | 2 +- tests/integrations/asgi/test_asgi.py | 45 ----------------------- tests/integrations/wsgi/test_wsgi.py | 47 ------------------------- 5 files changed, 6 insertions(+), 100 deletions(-) diff --git a/sentry_sdk/integrations/_asgi_common.py b/sentry_sdk/integrations/_asgi_common.py index 456960ebf2..91268e5ed3 100644 --- a/sentry_sdk/integrations/_asgi_common.py +++ b/sentry_sdk/integrations/_asgi_common.py @@ -93,7 +93,9 @@ def _get_request_data(asgi_scope: "Any") -> "Dict[str, Any]": if ty in ("http", "websocket"): request_data["method"] = asgi_scope.get("method") - request_data["headers"] = headers = _filter_headers(_get_headers(asgi_scope)) + request_data["headers"] = headers = _filter_headers( + _get_headers(asgi_scope), use_annotated_value=False + ) request_data["query_string"] = _get_query(asgi_scope) request_data["url"] = _get_url( diff --git a/sentry_sdk/integrations/_wsgi_common.py b/sentry_sdk/integrations/_wsgi_common.py index 22a719d407..dd962bfc1c 100644 --- a/sentry_sdk/integrations/_wsgi_common.py +++ b/sentry_sdk/integrations/_wsgi_common.py @@ -6,7 +6,6 @@ from sentry_sdk._types import SENSITIVE_DATA_SUBSTITUTE from sentry_sdk.scope import should_send_default_pii from sentry_sdk.utils import AnnotatedValue, logger -from sentry_sdk.tracing_utils import has_span_streaming_enabled try: from django.http.request import RawPostDataException @@ -213,17 +212,14 @@ def _is_json_content_type(ct: "Optional[str]") -> bool: def _filter_headers( - headers: "Mapping[str, str]", + headers: "Mapping[str, str]", use_annotated_value: True ) -> "Mapping[str, Union[AnnotatedValue, str]]": if should_send_default_pii(): return headers - client_options = sentry_sdk.get_client().options - is_span_streaming_enabled = has_span_streaming_enabled(client_options) - substitute: "Union[AnnotatedValue, str]" = ( SENSITIVE_DATA_SUBSTITUTE - if is_span_streaming_enabled + if not use_annotated_value else AnnotatedValue.removed_because_over_size_limit() ) diff --git a/sentry_sdk/integrations/wsgi.py b/sentry_sdk/integrations/wsgi.py index 82ac9dcfef..8814a82858 100644 --- a/sentry_sdk/integrations/wsgi.py +++ b/sentry_sdk/integrations/wsgi.py @@ -400,7 +400,7 @@ def _get_request_attributes( if method: attributes["http.request.method"] = method.upper() - headers = _filter_headers(dict(_get_headers(environ))) + headers = _filter_headers(dict(_get_headers(environ)), use_annotated_value=False) for header, value in headers.items(): attributes[f"http.request.header.{header.lower()}"] = value diff --git a/tests/integrations/asgi/test_asgi.py b/tests/integrations/asgi/test_asgi.py index 2c30f2d2b2..7f44c9d00a 100644 --- a/tests/integrations/asgi/test_asgi.py +++ b/tests/integrations/asgi/test_asgi.py @@ -1002,48 +1002,3 @@ async def test_custom_transaction_name( assert transaction_event["type"] == "transaction" assert transaction_event["transaction"] == "foobar" assert transaction_event["transaction_info"] == {"source": "custom"} - - -@pytest.mark.asyncio -@pytest.mark.parametrize("span_streaming", [True, False]) -async def test_filter_sensitive_headers_without_pii( - sentry_init, - asgi3_app, - capture_events, - capture_items, - span_streaming, -): - sentry_init( - send_default_pii=False, - traces_sample_rate=1.0, - _experiments={ - "trace_lifecycle": "stream" if span_streaming else "static", - }, - ) - app = SentryAsgiMiddleware(asgi3_app) - - if span_streaming: - items = capture_items("span") - else: - events = capture_events() - - async with TestClient(app) as client: - await client.get( - "/", - headers={"Authorization": "Bearer secret", "X-Custom": "ok"}, - ) - - sentry_sdk.flush() - - if span_streaming: - assert len(items) == 1 - attributes = items[0].payload["attributes"] - assert attributes["http.request.header.authorization"] == "[Filtered]" - assert attributes["http.request.header.x-custom"] == "ok" - else: - (transaction_event,) = events - headers = transaction_event["request"]["headers"] - assert ( - headers["authorization"] != "Bearer secret" - ) # In the legacy approach, the expectation is that the event scrubber would remove this - assert headers["x-custom"] == "ok" diff --git a/tests/integrations/wsgi/test_wsgi.py b/tests/integrations/wsgi/test_wsgi.py index b877132895..a95a1d63fa 100644 --- a/tests/integrations/wsgi/test_wsgi.py +++ b/tests/integrations/wsgi/test_wsgi.py @@ -841,50 +841,3 @@ def app(environ, start_response): ) def test_get_request_url_x_forwarded_proto(environ, use_x_forwarded_for, expected_url): assert get_request_url(environ, use_x_forwarded_for) == expected_url - - -@pytest.mark.parametrize("span_streaming", [True, False]) -def test_filter_sensitive_headers_without_pii( - sentry_init, - capture_events, - capture_items, - span_streaming, -): - def app(environ, start_response): - start_response("200 OK", []) - return [b"ok"] - - sentry_init( - send_default_pii=False, - traces_sample_rate=1.0, - _experiments={ - "trace_lifecycle": "stream" if span_streaming else "static", - }, - ) - middleware = SentryWsgiMiddleware(app) - client = Client(middleware) - - if span_streaming: - items = capture_items("span") - else: - events = capture_events() - - client.get( - "/", - headers={"Authorization": "Bearer secret", "X-Custom": "ok"}, - ) - - sentry_sdk.flush() - - if span_streaming: - assert len(items) == 1 - attributes = items[0].payload["attributes"] - assert attributes["http.request.header.authorization"] == "[Filtered]" - assert attributes["http.request.header.x-custom"] == "ok" - else: - envelope = events[0] - headers = envelope["request"]["headers"] - assert ( - headers["Authorization"] != "Bearer secret" - ) # In the legacy approach, the expectation is that the event scrubber would remove this - assert headers["X-Custom"] == "ok" From a90772e95770f98796eb614e76ebca5ca70a5b73 Mon Sep 17 00:00:00 2001 From: Erica Pisani Date: Thu, 30 Apr 2026 11:55:36 -0400 Subject: [PATCH 3/4] whoops --- sentry_sdk/integrations/_wsgi_common.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sentry_sdk/integrations/_wsgi_common.py b/sentry_sdk/integrations/_wsgi_common.py index dd962bfc1c..4208955620 100644 --- a/sentry_sdk/integrations/_wsgi_common.py +++ b/sentry_sdk/integrations/_wsgi_common.py @@ -212,7 +212,8 @@ def _is_json_content_type(ct: "Optional[str]") -> bool: def _filter_headers( - headers: "Mapping[str, str]", use_annotated_value: True + headers: "Mapping[str, str]", + use_annotated_value: bool = True, ) -> "Mapping[str, Union[AnnotatedValue, str]]": if should_send_default_pii(): return headers From 06e2cefdec3afaf3eedd55dff80a5d0d4c78cadc Mon Sep 17 00:00:00 2001 From: Erica Pisani Date: Thu, 30 Apr 2026 12:00:25 -0400 Subject: [PATCH 4/4] . --- sentry_sdk/integrations/_asgi_common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sentry_sdk/integrations/_asgi_common.py b/sentry_sdk/integrations/_asgi_common.py index 91268e5ed3..82edc6ce99 100644 --- a/sentry_sdk/integrations/_asgi_common.py +++ b/sentry_sdk/integrations/_asgi_common.py @@ -94,7 +94,7 @@ def _get_request_data(asgi_scope: "Any") -> "Dict[str, Any]": request_data["method"] = asgi_scope.get("method") request_data["headers"] = headers = _filter_headers( - _get_headers(asgi_scope), use_annotated_value=False + _get_headers(asgi_scope), ) request_data["query_string"] = _get_query(asgi_scope) @@ -120,7 +120,7 @@ def _get_request_attributes(asgi_scope: "Any") -> "dict[str, Any]": if asgi_scope.get("method"): attributes["http.request.method"] = asgi_scope["method"].upper() - headers = _filter_headers(_get_headers(asgi_scope)) + headers = _filter_headers(_get_headers(asgi_scope), use_annotated_value=False) for header, value in headers.items(): attributes[f"http.request.header.{header.lower()}"] = value