From 1f32952d0066a9dc1ff1482cef48c3cbe0acb663 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Wed, 17 Dec 2025 10:45:45 +0100 Subject: [PATCH 01/15] fix(ai): redact message parts content of type blob --- sentry_sdk/ai/utils.py | 51 +++++++++++++++++ tests/test_ai_monitoring.py | 106 +++++++++++++++++++++++++++++++++++- 2 files changed, 156 insertions(+), 1 deletion(-) diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py index 1d2b4483c9..73155b0305 100644 --- a/sentry_sdk/ai/utils.py +++ b/sentry_sdk/ai/utils.py @@ -5,6 +5,8 @@ from sys import getsizeof from typing import TYPE_CHECKING +from sentry_sdk._types import SENSITIVE_DATA_SUBSTITUTE + if TYPE_CHECKING: from typing import Any, Callable, Dict, List, Optional, Tuple @@ -141,6 +143,53 @@ def _find_truncation_index(messages: "List[Dict[str, Any]]", max_bytes: int) -> return 0 +def redact_blob_message_parts(messages): + # type: (List[Dict[str, Any]]) -> Tuple[List[Dict[str, Any]], int] + """ + Redact blob message parts from the messages, by removing the "content" key. + e.g: + { + "role": "user", + "content": [ + { + "text": "How many ponies do you see in the image?", + "type": "text" + }, + { + "type": "blob", + "modality": "image", + "mime_type": "image/jpeg", + "content": "data:image/jpeg;base64,..." + } + ] + } + becomes: + { + "role": "user", + "content": [ + { + "text": "How many ponies do you see in the image?", + "type": "text" + }, + { + "type": "blob", + "modality": "image", + "mime_type": "image/jpeg", + "content": "[Filtered]" + } + ] + } + """ + + for message in messages: + content = message.get("content") + if isinstance(content, list): + for item in content: + if item.get("type") == "blob": + item["content"] = SENSITIVE_DATA_SUBSTITUTE + return messages + + def truncate_messages_by_size( messages: "List[Dict[str, Any]]", max_bytes: int = MAX_GEN_AI_MESSAGE_BYTES, @@ -186,6 +235,8 @@ def truncate_and_annotate_messages( if not messages: return None + messages = redact_blob_message_parts(messages) + truncated_messages, removed_count = truncate_messages_by_size(messages, max_bytes) if removed_count > 0: scope._gen_ai_original_message_count[span.span_id] = len(messages) diff --git a/tests/test_ai_monitoring.py b/tests/test_ai_monitoring.py index 8d3d4ba204..e9f3712cd3 100644 --- a/tests/test_ai_monitoring.py +++ b/tests/test_ai_monitoring.py @@ -4,7 +4,7 @@ import pytest import sentry_sdk -from sentry_sdk._types import AnnotatedValue +from sentry_sdk._types import AnnotatedValue, SENSITIVE_DATA_SUBSTITUTE from sentry_sdk.ai.monitoring import ai_track from sentry_sdk.ai.utils import ( MAX_GEN_AI_MESSAGE_BYTES, @@ -13,6 +13,7 @@ truncate_and_annotate_messages, truncate_messages_by_size, _find_truncation_index, + redact_blob_message_parts, ) from sentry_sdk.serializer import serialize from sentry_sdk.utils import safe_serialize @@ -542,3 +543,106 @@ def __init__(self): assert isinstance(messages_value, AnnotatedValue) assert messages_value.metadata["len"] == stored_original_length assert len(messages_value.value) == len(truncated_messages) + + +class TestRedactBlobMessageParts: + def test_redacts_single_blob_content(self): + """Test that blob content is redacted in a message with single blob part""" + messages = [ + { + "role": "user", + "content": [ + { + "text": "How many ponies do you see in the image?", + "type": "text", + }, + { + "type": "blob", + "modality": "image", + "mime_type": "image/jpeg", + "content": "", + }, + ], + } + ] + + result = redact_blob_message_parts(messages) + + assert result == messages # Returns the same list + assert ( + messages[0]["content"][0]["text"] + == "How many ponies do you see in the image?" + ) + assert messages[0]["content"][0]["type"] == "text" + assert messages[0]["content"][1]["type"] == "blob" + assert messages[0]["content"][1]["modality"] == "image" + assert messages[0]["content"][1]["mime_type"] == "image/jpeg" + assert messages[0]["content"][1]["content"] == SENSITIVE_DATA_SUBSTITUTE + + def test_redacts_multiple_blob_parts(self): + """Test that multiple blob parts in a single message are all redacted""" + messages = [ + { + "role": "user", + "content": [ + {"text": "Compare these images", "type": "text"}, + { + "type": "blob", + "modality": "image", + "mime_type": "image/jpeg", + "content": "_image", + }, + { + "type": "blob", + "modality": "image", + "mime_type": "image/png", + "content": "_image", + }, + ], + } + ] + + result = redact_blob_message_parts(messages) + + assert result == messages + assert messages[0]["content"][0]["text"] == "Compare these images" + assert messages[0]["content"][1]["content"] == SENSITIVE_DATA_SUBSTITUTE + assert messages[0]["content"][2]["content"] == SENSITIVE_DATA_SUBSTITUTE + + def test_redacts_blobs_in_multiple_messages(self): + """Test that blob parts are redacted across multiple messages""" + messages = [ + { + "role": "user", + "content": [ + {"text": "First message", "type": "text"}, + { + "type": "blob", + "modality": "image", + "content": "", + }, + ], + }, + { + "role": "assistant", + "content": "I see the image.", + }, + { + "role": "user", + "content": [ + {"text": "Second message", "type": "text"}, + { + "type": "blob", + "modality": "image", + "content": "", + }, + ], + }, + ] + + result = redact_blob_message_parts(messages) + + assert result == messages + assert messages[0]["content"][1]["content"] == SENSITIVE_DATA_SUBSTITUTE + assert messages[1]["content"] == "I see the image." # Unchanged + assert messages[2]["content"][1]["content"] == SENSITIVE_DATA_SUBSTITUTE From 795bcea241f7777e646a4da14c870a3049bdbe90 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Wed, 17 Dec 2025 11:05:04 +0100 Subject: [PATCH 02/15] fix(ai): skip non dict messages --- sentry_sdk/ai/utils.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py index 73155b0305..ae507e898b 100644 --- a/sentry_sdk/ai/utils.py +++ b/sentry_sdk/ai/utils.py @@ -182,6 +182,9 @@ def redact_blob_message_parts(messages): """ for message in messages: + if not isinstance(message, dict): + continue + content = message.get("content") if isinstance(content, list): for item in content: From a623e137d26e982c0d85258256c0ba013f9ecb24 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Wed, 17 Dec 2025 11:21:43 +0100 Subject: [PATCH 03/15] fix(ai): typing --- sentry_sdk/ai/utils.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py index ae507e898b..1b61c7a113 100644 --- a/sentry_sdk/ai/utils.py +++ b/sentry_sdk/ai/utils.py @@ -143,8 +143,9 @@ def _find_truncation_index(messages: "List[Dict[str, Any]]", max_bytes: int) -> return 0 -def redact_blob_message_parts(messages): - # type: (List[Dict[str, Any]]) -> Tuple[List[Dict[str, Any]], int] +def redact_blob_message_parts( + messages: "List[Dict[str, Any]]", +) -> "List[Dict[str, Any]]": """ Redact blob message parts from the messages, by removing the "content" key. e.g: From 3d3ce5bbdca43f14194edbbbee11d3b6dcd6d8a3 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Wed, 17 Dec 2025 11:37:12 +0100 Subject: [PATCH 04/15] fix(ai): content items may not be dicts --- sentry_sdk/ai/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py index 1b61c7a113..78a64ab737 100644 --- a/sentry_sdk/ai/utils.py +++ b/sentry_sdk/ai/utils.py @@ -189,7 +189,7 @@ def redact_blob_message_parts( content = message.get("content") if isinstance(content, list): for item in content: - if item.get("type") == "blob": + if isinstance(item, dict) and item.get("type") == "blob": item["content"] = SENSITIVE_DATA_SUBSTITUTE return messages From 433bc885b682b76c8395e97bf736f8ba2d287e62 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Mon, 5 Jan 2026 12:12:27 +0100 Subject: [PATCH 05/15] fix(integrations): google-genai: reworked `gen_ai.request.messages` extraction from parameters --- sentry_sdk/integrations/google_genai/utils.py | 360 ++++++++++++++++-- .../google_genai/test_google_genai.py | 323 ++++++++++++++++ 2 files changed, 652 insertions(+), 31 deletions(-) diff --git a/sentry_sdk/integrations/google_genai/utils.py b/sentry_sdk/integrations/google_genai/utils.py index 03423c385a..ac3870a888 100644 --- a/sentry_sdk/integrations/google_genai/utils.py +++ b/sentry_sdk/integrations/google_genai/utils.py @@ -1,3 +1,4 @@ +import base64 import copy import inspect from functools import wraps @@ -12,6 +13,7 @@ Optional, Union, TypedDict, + Dict, ) import sentry_sdk @@ -19,6 +21,7 @@ set_data_normalized, truncate_and_annotate_messages, normalize_message_roles, + redact_blob_message_parts, ) from sentry_sdk.consts import OP, SPANDATA from sentry_sdk.scope import should_send_default_pii @@ -145,44 +148,323 @@ def get_model_name(model: "Union[str, Model]") -> str: return str(model) -def extract_contents_text(contents: "ContentListUnion") -> "Optional[str]": - """Extract text from contents parameter which can have various formats.""" +def extract_contents_messages(contents: "ContentListUnion") -> "List[Dict[str, Any]]": + """Extract messages from contents parameter which can have various formats. + + Returns a list of message dictionaries in the format: + - System: {"role": "system", "content": "string"} + - User/Assistant: {"role": "user"|"assistant", "content": [{"text": "...", "type": "text"}, ...]} + """ if contents is None: - return None + return [] - # Simple string case + messages = [] + + # Handle string case if isinstance(contents, str): - return contents + return [{"role": "user", "content": contents}] - # List of contents or parts + # Handle list case - process each item (non-recursive, flatten at top level) if isinstance(contents, list): - texts = [] for item in contents: - # Recursively extract text from each item - extracted = extract_contents_text(item) - if extracted: - texts.append(extracted) - return " ".join(texts) if texts else None + item_messages = extract_contents_messages(item) + messages.extend(item_messages) + return messages - # Dictionary case + # Handle dictionary case (ContentDict) if isinstance(contents, dict): - if "text" in contents: - return contents["text"] - # Try to extract from parts if present in dict - if "parts" in contents: - return extract_contents_text(contents["parts"]) + role = contents.get("role", "user") + parts = contents.get("parts") + + if parts: + content_parts = [] + tool_messages = [] + + for part in parts: + part_result = _extract_part_content(part) + if part_result is None: + continue + + if isinstance(part_result, dict) and part_result.get("role") == "tool": + # Tool message - add separately + tool_messages.append(part_result) + else: + # Regular content part + content_parts.append(part_result) + + # Add main message if we have content parts + if content_parts: + # Normalize role: "model" -> "assistant" + normalized_role = "assistant" if role == "model" else role or "user" + messages.append({"role": normalized_role, "content": content_parts}) + + # Add tool messages + messages.extend(tool_messages) + elif "text" in contents: + # Simple text in dict + messages.append( + { + "role": role or "user", + "content": [{"text": contents["text"], "type": "text"}], + } + ) + + return messages + + # Handle Content object + if hasattr(contents, "parts") and contents.parts: + role = getattr(contents, "role", None) or "user" + content_parts = [] + tool_messages = [] + + for part in contents.parts: + part_result = _extract_part_content(part) + if part_result is None: + continue + + if isinstance(part_result, dict) and part_result.get("role") == "tool": + tool_messages.append(part_result) + else: + content_parts.append(part_result) - # Content object with parts - recurse into parts - if getattr(contents, "parts", None): - return extract_contents_text(contents.parts) + if content_parts: + normalized_role = "assistant" if role == "model" else role + messages.append({"role": normalized_role, "content": content_parts}) - # Direct text attribute - if hasattr(contents, "text"): - return contents.text + messages.extend(tool_messages) + return messages + + # Handle Part object directly + part_result = _extract_part_content(contents) + if part_result: + if isinstance(part_result, dict) and part_result.get("role") == "tool": + return [part_result] + else: + return [{"role": "user", "content": [part_result]}] + + # Handle PIL.Image.Image + try: + from PIL import Image as PILImage + + if isinstance(contents, PILImage.Image): + blob_part = _extract_pil_image(contents) + if blob_part: + return [{"role": "user", "content": [blob_part]}] + except ImportError: + pass + + # Handle File object + if hasattr(contents, "uri") and hasattr(contents, "mime_type"): + # File object + file_uri = getattr(contents, "uri", None) + mime_type = getattr(contents, "mime_type", None) + if file_uri and mime_type: + blob_part = { + "type": "blob", + "mime_type": mime_type, + "file_uri": file_uri, + } + return [{"role": "user", "content": [blob_part]}] + + # Handle direct text attribute + if hasattr(contents, "text") and contents.text: + return [ + {"role": "user", "content": [{"text": str(contents.text), "type": "text"}]} + ] + + return [] + + +def _extract_part_content(part: "Any") -> "Optional[dict[str, Any]]": + """Extract content from a Part object or dict. + + Returns: + - dict for content part (text/blob) or tool message + - None if part should be skipped + """ + if part is None: + return None + + # Handle dict Part + if isinstance(part, dict): + # Check for function_response first (tool message) + if "function_response" in part: + return _extract_tool_message_from_part(part) + + if "text" in part: + return {"text": part["text"], "type": "text"} + + if "file_data" in part: + file_data = part["file_data"] + if isinstance(file_data, dict): + return { + "type": "blob", + "mime_type": file_data.get("mime_type"), + "file_uri": file_data.get("file_uri"), + } + + if "inline_data" in part: + inline_data = part["inline_data"] + if isinstance(inline_data, dict): + data = inline_data.get("data") + mime_type = inline_data.get("mime_type") + if data and mime_type: + # Encode bytes to base64 + if isinstance(data, bytes): + data_b64 = base64.b64encode(data).decode("utf-8") + return { + "type": "blob", + "mime_type": mime_type, + "content": f"data:{mime_type};base64,{data_b64}", + } + + return None + + # Handle Part object + # Check for function_response (tool message) + if hasattr(part, "function_response") and part.function_response: + return _extract_tool_message_from_part(part) + + # Handle text + if hasattr(part, "text") and part.text: + return {"text": part.text, "type": "text"} + + # Handle file_data + if hasattr(part, "file_data") and part.file_data: + file_data = part.file_data + file_uri = getattr(file_data, "file_uri", None) + mime_type = getattr(file_data, "mime_type", None) + if file_uri and mime_type: + return { + "type": "blob", + "mime_type": mime_type, + "file_uri": file_uri, + } + + # Handle inline_data + if hasattr(part, "inline_data") and part.inline_data: + inline_data = part.inline_data + data = getattr(inline_data, "data", None) + mime_type = getattr(inline_data, "mime_type", None) + if data and mime_type: + # Encode bytes to base64 + if isinstance(data, bytes): + data_b64 = base64.b64encode(data).decode("utf-8") + return { + "type": "blob", + "mime_type": mime_type, + "content": f"data:{mime_type};base64,{data_b64}", + } return None +def _extract_tool_message_from_part(part: "Any") -> "Optional[dict[str, Any]]": + """Extract tool message from a Part with function_response. + + Returns: + {"role": "tool", "content": {"toolCallId": "...", "toolName": "...", "output": "..."}} + or None if not a valid tool message + """ + function_response = None + + if isinstance(part, dict): + function_response = part.get("function_response") + elif hasattr(part, "function_response"): + function_response = part.function_response + + if not function_response: + return None + + # Extract fields from function_response + tool_call_id = None + tool_name = None + output = None + + if isinstance(function_response, dict): + tool_call_id = function_response.get("id") + tool_name = function_response.get("name") + response_dict = function_response.get("response", {}) + # Prefer "output" key if present, otherwise use entire response + output = response_dict.get("output", response_dict) + else: + # FunctionResponse object + tool_call_id = getattr(function_response, "id", None) + tool_name = getattr(function_response, "name", None) + response_obj = getattr(function_response, "response", None) + if response_obj: + if isinstance(response_obj, dict): + output = response_obj.get("output", response_obj) + else: + output = response_obj + + if not tool_name: + return None + + return { + "role": "tool", + "content": { + "toolCallId": str(tool_call_id) if tool_call_id else None, + "toolName": str(tool_name), + "output": safe_serialize(output) if output is not None else None, + }, + } + + +def _extract_pil_image(image: "Any") -> "Optional[dict[str, Any]]": + """Extract blob part from PIL.Image.Image.""" + try: + from PIL import Image as PILImage + import io + + if not isinstance(image, PILImage.Image): + return None + + # Get format, default to JPEG + format_str = image.format or "JPEG" + suffix = format_str.lower() + mime_type = f"image/{suffix}" + + # Convert to bytes + bytes_io = io.BytesIO() + image.save(bytes_io, format=format_str) + image_bytes = bytes_io.getvalue() + + # Encode to base64 + data_b64 = base64.b64encode(image_bytes).decode("utf-8") + + return { + "type": "blob", + "mime_type": mime_type, + "content": f"data:{mime_type};base64,{data_b64}", + } + except Exception: + return None + + +def extract_contents_text(contents: "ContentListUnion") -> "Optional[str]": + """Extract text from contents parameter which can have various formats. + + This is a compatibility function that extracts text from messages. + For new code, use extract_contents_messages instead. + """ + messages = extract_contents_messages(contents) + if not messages: + return None + + texts = [] + for message in messages: + content = message.get("content") + if isinstance(content, str): + texts.append(content) + elif isinstance(content, list): + for part in content: + if isinstance(part, dict) and part.get("type") == "text": + texts.append(part.get("text", "")) + + return " ".join(texts) if texts else None + + def _format_tools_for_span( tools: "Iterable[Tool | Callable[..., Any]]", ) -> "Optional[List[dict[str, Any]]]": @@ -457,16 +739,32 @@ def set_span_data_for_request( if config and hasattr(config, "system_instruction"): system_instruction = config.system_instruction if system_instruction: - system_text = extract_contents_text(system_instruction) - if system_text: - messages.append({"role": "system", "content": system_text}) + system_messages = extract_contents_messages(system_instruction) + # System instruction should be a single system message + # Extract text from all messages and combine into one system message + system_texts = [] + for msg in system_messages: + content = msg.get("content") + if isinstance(content, list): + # Extract text from content parts + for part in content: + if isinstance(part, dict) and part.get("type") == "text": + system_texts.append(part.get("text", "")) + elif isinstance(content, str): + system_texts.append(content) + + if system_texts: + messages.append( + {"role": "system", "content": " ".join(system_texts)} + ) - # Add user message - contents_text = extract_contents_text(contents) - if contents_text: - messages.append({"role": "user", "content": contents_text}) + # Extract messages from contents + contents_messages = extract_contents_messages(contents) + messages.extend(contents_messages) if messages: + # Redact blob message parts + messages = redact_blob_message_parts(messages) normalized_messages = normalize_message_roles(messages) scope = sentry_sdk.get_current_scope() messages_data = truncate_and_annotate_messages( diff --git a/tests/integrations/google_genai/test_google_genai.py b/tests/integrations/google_genai/test_google_genai.py index a49822f3d4..9a0ffa005a 100644 --- a/tests/integrations/google_genai/test_google_genai.py +++ b/tests/integrations/google_genai/test_google_genai.py @@ -1,3 +1,4 @@ +import base64 import json import pytest from unittest import mock @@ -8,6 +9,7 @@ from sentry_sdk import start_transaction from sentry_sdk.consts import OP, SPANDATA from sentry_sdk.integrations.google_genai import GoogleGenAIIntegration +from sentry_sdk.integrations.google_genai.utils import extract_contents_messages @pytest.fixture @@ -1417,3 +1419,324 @@ async def test_async_embed_content_span_origin( assert event["contexts"]["trace"]["origin"] == "manual" for span in event["spans"]: assert span["origin"] == "auto.ai.google_genai" + + +# Tests for extract_contents_messages function +def test_extract_contents_messages_none(): + """Test extract_contents_messages with None input""" + result = extract_contents_messages(None) + assert result == [] + + +def test_extract_contents_messages_string(): + """Test extract_contents_messages with string input""" + result = extract_contents_messages("Hello world") + assert result == [{"role": "user", "content": "Hello world"}] + + +def test_extract_contents_messages_content_object(): + """Test extract_contents_messages with Content object""" + content = genai_types.Content( + role="user", parts=[genai_types.Part(text="Test message")] + ) + result = extract_contents_messages(content) + assert len(result) == 1 + assert result[0]["role"] == "user" + assert result[0]["content"] == [{"text": "Test message", "type": "text"}] + + +def test_extract_contents_messages_content_object_model_role(): + """Test extract_contents_messages with Content object having model role""" + content = genai_types.Content( + role="model", parts=[genai_types.Part(text="Assistant response")] + ) + result = extract_contents_messages(content) + assert len(result) == 1 + assert result[0]["role"] == "assistant" + assert result[0]["content"] == [{"text": "Assistant response", "type": "text"}] + + +def test_extract_contents_messages_content_object_no_role(): + """Test extract_contents_messages with Content object without role""" + content = genai_types.Content(parts=[genai_types.Part(text="No role message")]) + result = extract_contents_messages(content) + assert len(result) == 1 + assert result[0]["role"] == "user" + assert result[0]["content"] == [{"text": "No role message", "type": "text"}] + + +def test_extract_contents_messages_part_object(): + """Test extract_contents_messages with Part object""" + part = genai_types.Part(text="Direct part") + result = extract_contents_messages(part) + assert len(result) == 1 + assert result[0]["role"] == "user" + assert result[0]["content"] == [{"text": "Direct part", "type": "text"}] + + +def test_extract_contents_messages_file_data(): + """Test extract_contents_messages with file_data""" + file_data = genai_types.FileData( + file_uri="gs://bucket/file.jpg", mime_type="image/jpeg" + ) + part = genai_types.Part(file_data=file_data) + content = genai_types.Content(parts=[part]) + result = extract_contents_messages(content) + + assert len(result) == 1 + assert result[0]["role"] == "user" + assert len(result[0]["content"]) == 1 + blob_part = result[0]["content"][0] + assert blob_part["type"] == "blob" + assert blob_part["mime_type"] == "image/jpeg" + assert blob_part["file_uri"] == "gs://bucket/file.jpg" + + +def test_extract_contents_messages_inline_data(): + """Test extract_contents_messages with inline_data (binary)""" + # Create inline data with bytes + image_bytes = b"fake_image_data" + blob = genai_types.Blob(data=image_bytes, mime_type="image/png") + part = genai_types.Part(inline_data=blob) + content = genai_types.Content(parts=[part]) + result = extract_contents_messages(content) + + assert len(result) == 1 + assert result[0]["role"] == "user" + assert len(result[0]["content"]) == 1 + blob_part = result[0]["content"][0] + assert blob_part["type"] == "blob" + assert blob_part["mime_type"] == "image/png" + assert "content" in blob_part + # Verify base64 encoding + expected_b64 = base64.b64encode(image_bytes).decode("utf-8") + assert blob_part["content"] == f"data:image/png;base64,{expected_b64}" + + +def test_extract_contents_messages_function_response(): + """Test extract_contents_messages with function_response (tool message)""" + function_response = genai_types.FunctionResponse( + id="call_123", name="get_weather", response={"output": "sunny"} + ) + part = genai_types.Part(function_response=function_response) + content = genai_types.Content(parts=[part]) + result = extract_contents_messages(content) + + assert len(result) == 1 + assert result[0]["role"] == "tool" + assert result[0]["content"]["toolCallId"] == "call_123" + assert result[0]["content"]["toolName"] == "get_weather" + assert result[0]["content"]["output"] == '"sunny"' + + +def test_extract_contents_messages_function_response_with_output_key(): + """Test extract_contents_messages with function_response that has output key""" + function_response = genai_types.FunctionResponse( + id="call_456", name="get_time", response={"output": "3:00 PM", "error": None} + ) + part = genai_types.Part(function_response=function_response) + content = genai_types.Content(parts=[part]) + result = extract_contents_messages(content) + + assert len(result) == 1 + assert result[0]["role"] == "tool" + assert result[0]["content"]["toolCallId"] == "call_456" + assert result[0]["content"]["toolName"] == "get_time" + # Should prefer "output" key + assert result[0]["content"]["output"] == '"3:00 PM"' + + +def test_extract_contents_messages_mixed_parts(): + """Test extract_contents_messages with mixed content parts""" + content = genai_types.Content( + role="user", + parts=[ + genai_types.Part(text="Text part"), + genai_types.Part( + file_data=genai_types.FileData( + file_uri="gs://bucket/image.jpg", mime_type="image/jpeg" + ) + ), + ], + ) + result = extract_contents_messages(content) + + assert len(result) == 1 + assert result[0]["role"] == "user" + assert len(result[0]["content"]) == 2 + assert result[0]["content"][0] == {"text": "Text part", "type": "text"} + assert result[0]["content"][1]["type"] == "blob" + assert result[0]["content"][1]["file_uri"] == "gs://bucket/image.jpg" + + +def test_extract_contents_messages_list(): + """Test extract_contents_messages with list input""" + contents = [ + "First message", + genai_types.Content( + role="user", parts=[genai_types.Part(text="Second message")] + ), + ] + result = extract_contents_messages(contents) + + assert len(result) == 2 + assert result[0] == {"role": "user", "content": "First message"} + assert result[1]["role"] == "user" + assert result[1]["content"] == [{"text": "Second message", "type": "text"}] + + +def test_extract_contents_messages_dict_content(): + """Test extract_contents_messages with dict (ContentDict)""" + content_dict = {"role": "user", "parts": [{"text": "Dict message"}]} + result = extract_contents_messages(content_dict) + + assert len(result) == 1 + assert result[0]["role"] == "user" + assert result[0]["content"] == [{"text": "Dict message", "type": "text"}] + + +def test_extract_contents_messages_dict_with_text(): + """Test extract_contents_messages with dict containing text key""" + content_dict = {"role": "user", "text": "Simple text"} + result = extract_contents_messages(content_dict) + + assert len(result) == 1 + assert result[0]["role"] == "user" + assert result[0]["content"] == [{"text": "Simple text", "type": "text"}] + + +def test_extract_contents_messages_file_object(): + """Test extract_contents_messages with File object""" + file_obj = genai_types.File( + name="files/123", uri="gs://bucket/file.pdf", mime_type="application/pdf" + ) + result = extract_contents_messages(file_obj) + + assert len(result) == 1 + assert result[0]["role"] == "user" + assert len(result[0]["content"]) == 1 + blob_part = result[0]["content"][0] + assert blob_part["type"] == "blob" + assert blob_part["mime_type"] == "application/pdf" + assert blob_part["file_uri"] == "gs://bucket/file.pdf" + + +@pytest.mark.skipif( + not hasattr(genai_types, "PIL_Image") or genai_types.PIL_Image is None, + reason="PIL not available", +) +def test_extract_contents_messages_pil_image(): + """Test extract_contents_messages with PIL.Image.Image""" + try: + from PIL import Image as PILImage + + # Create a simple test image + img = PILImage.new("RGB", (10, 10), color="red") + result = extract_contents_messages(img) + + assert len(result) == 1 + assert result[0]["role"] == "user" + assert len(result[0]["content"]) == 1 + blob_part = result[0]["content"][0] + assert blob_part["type"] == "blob" + assert blob_part["mime_type"].startswith("image/") + assert "content" in blob_part + assert blob_part["content"].startswith("data:image/") + except ImportError: + pytest.skip("PIL not available") + + +def test_extract_contents_messages_tool_and_text(): + """Test extract_contents_messages with both tool message and text""" + content = genai_types.Content( + role="user", + parts=[ + genai_types.Part(text="User question"), + genai_types.Part( + function_response=genai_types.FunctionResponse( + id="call_789", name="search", response={"output": "results"} + ) + ), + ], + ) + result = extract_contents_messages(content) + + # Should have two messages: one user message and one tool message + assert len(result) == 2 + # First should be user message with text + assert result[0]["role"] == "user" + assert result[0]["content"] == [{"text": "User question", "type": "text"}] + # Second should be tool message + assert result[1]["role"] == "tool" + assert result[1]["content"]["toolCallId"] == "call_789" + assert result[1]["content"]["toolName"] == "search" + + +def test_extract_contents_messages_empty_parts(): + """Test extract_contents_messages with Content object with empty parts""" + content = genai_types.Content(role="user", parts=[]) + result = extract_contents_messages(content) + + assert result == [] + + +def test_extract_contents_messages_empty_list(): + """Test extract_contents_messages with empty list""" + result = extract_contents_messages([]) + assert result == [] + + +def test_extract_contents_messages_dict_inline_data(): + """Test extract_contents_messages with dict containing inline_data""" + content_dict = { + "role": "user", + "parts": [{"inline_data": {"data": b"binary_data", "mime_type": "image/gif"}}], + } + result = extract_contents_messages(content_dict) + + assert len(result) == 1 + assert result[0]["role"] == "user" + assert len(result[0]["content"]) == 1 + blob_part = result[0]["content"][0] + assert blob_part["type"] == "blob" + assert blob_part["mime_type"] == "image/gif" + expected_b64 = base64.b64encode(b"binary_data").decode("utf-8") + assert blob_part["content"] == f"data:image/gif;base64,{expected_b64}" + + +def test_extract_contents_messages_dict_function_response(): + """Test extract_contents_messages with dict containing function_response""" + content_dict = { + "role": "user", + "parts": [ + { + "function_response": { + "id": "dict_call_1", + "name": "dict_tool", + "response": {"result": "success"}, + } + } + ], + } + result = extract_contents_messages(content_dict) + + assert len(result) == 1 + assert result[0]["role"] == "tool" + assert result[0]["content"]["toolCallId"] == "dict_call_1" + assert result[0]["content"]["toolName"] == "dict_tool" + assert result[0]["content"]["output"] == '{"result": "success"}' + + +def test_extract_contents_messages_object_with_text_attribute(): + """Test extract_contents_messages with object that has text attribute""" + + class TextObject: + def __init__(self): + self.text = "Object text" + + obj = TextObject() + result = extract_contents_messages(obj) + + assert len(result) == 1 + assert result[0]["role"] == "user" + assert result[0]["content"] == [{"text": "Object text", "type": "text"}] From 4244319b862f2e109ad8556cb624debf8916e58f Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Thu, 8 Jan 2026 14:19:57 +0100 Subject: [PATCH 06/15] fix(integrations): address cursor review comments --- sentry_sdk/integrations/google_genai/utils.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/sentry_sdk/integrations/google_genai/utils.py b/sentry_sdk/integrations/google_genai/utils.py index ac3870a888..f2d8467463 100644 --- a/sentry_sdk/integrations/google_genai/utils.py +++ b/sentry_sdk/integrations/google_genai/utils.py @@ -291,10 +291,10 @@ def _extract_part_content(part: "Any") -> "Optional[dict[str, Any]]": if "function_response" in part: return _extract_tool_message_from_part(part) - if "text" in part: + if part.get("text"): return {"text": part["text"], "type": "text"} - if "file_data" in part: + if part.get("file_data"): file_data = part["file_data"] if isinstance(file_data, dict): return { @@ -303,7 +303,7 @@ def _extract_part_content(part: "Any") -> "Optional[dict[str, Any]]": "file_uri": file_data.get("file_uri"), } - if "inline_data" in part: + if part.get("inline_data"): inline_data = part["inline_data"] if isinstance(inline_data, dict): data = inline_data.get("data") @@ -384,7 +384,7 @@ def _extract_tool_message_from_part(part: "Any") -> "Optional[dict[str, Any]]": if isinstance(function_response, dict): tool_call_id = function_response.get("id") tool_name = function_response.get("name") - response_dict = function_response.get("response", {}) + response_dict = function_response.get("response") or {} # Prefer "output" key if present, otherwise use entire response output = response_dict.get("output", response_dict) else: @@ -763,8 +763,6 @@ def set_span_data_for_request( messages.extend(contents_messages) if messages: - # Redact blob message parts - messages = redact_blob_message_parts(messages) normalized_messages = normalize_message_roles(messages) scope = sentry_sdk.get_current_scope() messages_data = truncate_and_annotate_messages( From f72aa457298a1ec351dd69c7f126ff63c54f3029 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Thu, 8 Jan 2026 15:21:55 +0100 Subject: [PATCH 07/15] fix(integrations): ensure file_data returns valid blob structure only if mime_type and file_uri are present (Cursor comment) --- sentry_sdk/integrations/google_genai/utils.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/sentry_sdk/integrations/google_genai/utils.py b/sentry_sdk/integrations/google_genai/utils.py index f2d8467463..f36288dd3c 100644 --- a/sentry_sdk/integrations/google_genai/utils.py +++ b/sentry_sdk/integrations/google_genai/utils.py @@ -297,11 +297,14 @@ def _extract_part_content(part: "Any") -> "Optional[dict[str, Any]]": if part.get("file_data"): file_data = part["file_data"] if isinstance(file_data, dict): - return { - "type": "blob", - "mime_type": file_data.get("mime_type"), - "file_uri": file_data.get("file_uri"), - } + mime_type = file_data.get("mime_type") + file_uri = file_data.get("file_uri") + if mime_type and file_uri: + return { + "type": "blob", + "mime_type": mime_type, + "file_uri": file_uri, + } if part.get("inline_data"): inline_data = part["inline_data"] From 2be041984f3623ba900582f846d74d50bb0aad54 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Thu, 8 Jan 2026 15:26:15 +0100 Subject: [PATCH 08/15] fix(integrations): add type ignore for missing PIL.Image import --- sentry_sdk/integrations/google_genai/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sentry_sdk/integrations/google_genai/utils.py b/sentry_sdk/integrations/google_genai/utils.py index f36288dd3c..05d2b4bdba 100644 --- a/sentry_sdk/integrations/google_genai/utils.py +++ b/sentry_sdk/integrations/google_genai/utils.py @@ -244,7 +244,7 @@ def extract_contents_messages(contents: "ContentListUnion") -> "List[Dict[str, A # Handle PIL.Image.Image try: - from PIL import Image as PILImage + from PIL import Image as PILImage # type: ignore[import-not-found] if isinstance(contents, PILImage.Image): blob_part = _extract_pil_image(contents) @@ -417,7 +417,7 @@ def _extract_tool_message_from_part(part: "Any") -> "Optional[dict[str, Any]]": def _extract_pil_image(image: "Any") -> "Optional[dict[str, Any]]": """Extract blob part from PIL.Image.Image.""" try: - from PIL import Image as PILImage + from PIL import Image as PILImage # type: ignore[import-not-found] import io if not isinstance(image, PILImage.Image): From 86f6ecb7c757f9937404c71fa4003ead6c8a2d91 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Tue, 13 Jan 2026 13:49:07 +0100 Subject: [PATCH 09/15] fix: linting issue and review comment --- sentry_sdk/integrations/google_genai/utils.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/sentry_sdk/integrations/google_genai/utils.py b/sentry_sdk/integrations/google_genai/utils.py index 05d2b4bdba..1fabe3b420 100644 --- a/sentry_sdk/integrations/google_genai/utils.py +++ b/sentry_sdk/integrations/google_genai/utils.py @@ -394,12 +394,11 @@ def _extract_tool_message_from_part(part: "Any") -> "Optional[dict[str, Any]]": # FunctionResponse object tool_call_id = getattr(function_response, "id", None) tool_name = getattr(function_response, "name", None) - response_obj = getattr(function_response, "response", None) - if response_obj: - if isinstance(response_obj, dict): - output = response_obj.get("output", response_obj) - else: - output = response_obj + response_obj = getattr(function_response, "response", None) or {} + if isinstance(response_obj, dict): + output = response_obj.get("output", response_obj) + else: + output = response_obj if not tool_name: return None @@ -417,7 +416,7 @@ def _extract_tool_message_from_part(part: "Any") -> "Optional[dict[str, Any]]": def _extract_pil_image(image: "Any") -> "Optional[dict[str, Any]]": """Extract blob part from PIL.Image.Image.""" try: - from PIL import Image as PILImage # type: ignore[import-not-found] + from PIL import Image as PILImage import io if not isinstance(image, PILImage.Image): From 0355c632d3e61d6be5c7c86cbc92e1e5fd767791 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Wed, 14 Jan 2026 15:42:43 +0100 Subject: [PATCH 10/15] fix(integrations): google-genai do not encode binary data that gets redacted later --- sentry_sdk/integrations/google_genai/utils.py | 21 ++++--------------- .../google_genai/test_google_genai.py | 10 +++------ 2 files changed, 7 insertions(+), 24 deletions(-) diff --git a/sentry_sdk/integrations/google_genai/utils.py b/sentry_sdk/integrations/google_genai/utils.py index 1fabe3b420..6c245b91f7 100644 --- a/sentry_sdk/integrations/google_genai/utils.py +++ b/sentry_sdk/integrations/google_genai/utils.py @@ -1,8 +1,8 @@ -import base64 import copy import inspect from functools import wraps from .consts import ORIGIN, TOOL_ATTRIBUTES_MAP, GEN_AI_SYSTEM +from sentry_sdk._types import BLOB_DATA_SUBSTITUTE from typing import ( cast, TYPE_CHECKING, @@ -312,13 +312,11 @@ def _extract_part_content(part: "Any") -> "Optional[dict[str, Any]]": data = inline_data.get("data") mime_type = inline_data.get("mime_type") if data and mime_type: - # Encode bytes to base64 if isinstance(data, bytes): - data_b64 = base64.b64encode(data).decode("utf-8") return { "type": "blob", "mime_type": mime_type, - "content": f"data:{mime_type};base64,{data_b64}", + "content": BLOB_DATA_SUBSTITUTE, } return None @@ -350,13 +348,11 @@ def _extract_part_content(part: "Any") -> "Optional[dict[str, Any]]": data = getattr(inline_data, "data", None) mime_type = getattr(inline_data, "mime_type", None) if data and mime_type: - # Encode bytes to base64 if isinstance(data, bytes): - data_b64 = base64.b64encode(data).decode("utf-8") return { "type": "blob", "mime_type": mime_type, - "content": f"data:{mime_type};base64,{data_b64}", + "content": BLOB_DATA_SUBSTITUTE, } return None @@ -417,7 +413,6 @@ def _extract_pil_image(image: "Any") -> "Optional[dict[str, Any]]": """Extract blob part from PIL.Image.Image.""" try: from PIL import Image as PILImage - import io if not isinstance(image, PILImage.Image): return None @@ -427,18 +422,10 @@ def _extract_pil_image(image: "Any") -> "Optional[dict[str, Any]]": suffix = format_str.lower() mime_type = f"image/{suffix}" - # Convert to bytes - bytes_io = io.BytesIO() - image.save(bytes_io, format=format_str) - image_bytes = bytes_io.getvalue() - - # Encode to base64 - data_b64 = base64.b64encode(image_bytes).decode("utf-8") - return { "type": "blob", "mime_type": mime_type, - "content": f"data:{mime_type};base64,{data_b64}", + "content": BLOB_DATA_SUBSTITUTE, } except Exception: return None diff --git a/tests/integrations/google_genai/test_google_genai.py b/tests/integrations/google_genai/test_google_genai.py index 9a0ffa005a..c7d84da811 100644 --- a/tests/integrations/google_genai/test_google_genai.py +++ b/tests/integrations/google_genai/test_google_genai.py @@ -1,4 +1,3 @@ -import base64 import json import pytest from unittest import mock @@ -7,6 +6,7 @@ from google.genai import types as genai_types from sentry_sdk import start_transaction +from sentry_sdk._types import BLOB_DATA_SUBSTITUTE from sentry_sdk.consts import OP, SPANDATA from sentry_sdk.integrations.google_genai import GoogleGenAIIntegration from sentry_sdk.integrations.google_genai.utils import extract_contents_messages @@ -1507,10 +1507,7 @@ def test_extract_contents_messages_inline_data(): blob_part = result[0]["content"][0] assert blob_part["type"] == "blob" assert blob_part["mime_type"] == "image/png" - assert "content" in blob_part - # Verify base64 encoding - expected_b64 = base64.b64encode(image_bytes).decode("utf-8") - assert blob_part["content"] == f"data:image/png;base64,{expected_b64}" + assert blob_part["content"] == BLOB_DATA_SUBSTITUTE def test_extract_contents_messages_function_response(): @@ -1700,8 +1697,7 @@ def test_extract_contents_messages_dict_inline_data(): blob_part = result[0]["content"][0] assert blob_part["type"] == "blob" assert blob_part["mime_type"] == "image/gif" - expected_b64 = base64.b64encode(b"binary_data").decode("utf-8") - assert blob_part["content"] == f"data:image/gif;base64,{expected_b64}" + assert blob_part["content"] == BLOB_DATA_SUBSTITUTE def test_extract_contents_messages_dict_function_response(): From 910c6791770751df664de13d4967eeedaf8f69e9 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Wed, 14 Jan 2026 16:43:05 +0100 Subject: [PATCH 11/15] fix(integrations): Use explicit None checks instead of `or {}` pattern --- sentry_sdk/integrations/google_genai/utils.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sentry_sdk/integrations/google_genai/utils.py b/sentry_sdk/integrations/google_genai/utils.py index 6c245b91f7..300aba0f7d 100644 --- a/sentry_sdk/integrations/google_genai/utils.py +++ b/sentry_sdk/integrations/google_genai/utils.py @@ -383,14 +383,16 @@ def _extract_tool_message_from_part(part: "Any") -> "Optional[dict[str, Any]]": if isinstance(function_response, dict): tool_call_id = function_response.get("id") tool_name = function_response.get("name") - response_dict = function_response.get("response") or {} + response_dict = function_response.get("response", {}) # Prefer "output" key if present, otherwise use entire response output = response_dict.get("output", response_dict) else: # FunctionResponse object tool_call_id = getattr(function_response, "id", None) tool_name = getattr(function_response, "name", None) - response_obj = getattr(function_response, "response", None) or {} + response_obj = getattr(function_response, "response", None) + if response_obj is None: + response_obj = {} if isinstance(response_obj, dict): output = response_obj.get("output", response_obj) else: From bd781654c11ef4f1892ad8891296da92e250bb60 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Thu, 15 Jan 2026 14:01:42 +0100 Subject: [PATCH 12/15] feat(ai): Add shared content transformation functions for multimodal AI messages Add transform_content_part() and transform_message_content() functions to standardize content part handling across all AI integrations. These functions transform various SDK-specific formats (OpenAI, Anthropic, Google, LangChain) into a unified format: - blob: base64-encoded binary data - uri: URL references (including file URIs) - file: file ID references Also adds get_modality_from_mime_type() helper to infer content modality (image/audio/video/document) from MIME types. --- sentry_sdk/ai/utils.py | 237 ++++++++++++++++++ tests/test_ai_monitoring.py | 484 ++++++++++++++++++++++++++++++++++++ 2 files changed, 721 insertions(+) diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py index 71f7544a1c..b7b3b790d2 100644 --- a/sentry_sdk/ai/utils.py +++ b/sentry_sdk/ai/utils.py @@ -72,6 +72,243 @@ def parse_data_uri(url: str) -> "Tuple[str, str]": return mime_type, content +def get_modality_from_mime_type(mime_type: str) -> str: + """ + Infer the content modality from a MIME type string. + + Args: + mime_type: A MIME type string (e.g., "image/jpeg", "audio/mp3") + + Returns: + One of: "image", "audio", "video", or "document" + Defaults to "image" for unknown or empty MIME types. + + Examples: + "image/jpeg" -> "image" + "audio/mp3" -> "audio" + "video/mp4" -> "video" + "application/pdf" -> "document" + "text/plain" -> "document" + """ + if not mime_type: + return "image" # Default fallback + + mime_lower = mime_type.lower() + if mime_lower.startswith("image/"): + return "image" + elif mime_lower.startswith("audio/"): + return "audio" + elif mime_lower.startswith("video/"): + return "video" + elif mime_lower.startswith("application/") or mime_lower.startswith("text/"): + return "document" + else: + return "image" # Default fallback for unknown types + + +def transform_content_part( + content_part: "Dict[str, Any]", +) -> "Optional[Dict[str, Any]]": + """ + Transform a content part from various AI SDK formats to Sentry's standardized format. + + Supported input formats: + - OpenAI/LiteLLM: {"type": "image_url", "image_url": {"url": "..."}} + - Anthropic: {"type": "image|document", "source": {"type": "base64|url|file", ...}} + - Google: {"inline_data": {...}} or {"file_data": {...}} + - Generic: {"type": "image|audio|video|file", "base64|url|file_id": "...", "mime_type": "..."} + + Output format (one of): + - {"type": "blob", "modality": "...", "mime_type": "...", "content": "..."} + - {"type": "uri", "modality": "...", "mime_type": "...", "uri": "..."} + - {"type": "file", "modality": "...", "mime_type": "...", "file_id": "..."} + + Args: + content_part: A dictionary representing a content part from an AI SDK + + Returns: + A transformed dictionary in standardized format, or None if the format + is unrecognized or transformation fails. + """ + if not isinstance(content_part, dict): + return None + + block_type = content_part.get("type") + + # Handle OpenAI/LiteLLM image_url format + # {"type": "image_url", "image_url": {"url": "..."}} or {"type": "image_url", "image_url": "..."} + if block_type == "image_url": + image_url_data = content_part.get("image_url") + if isinstance(image_url_data, str): + url = image_url_data + elif isinstance(image_url_data, dict): + url = image_url_data.get("url", "") + else: + return None + + if not url: + return None + + # Check if it's a data URI (base64 encoded) + if url.startswith("data:"): + try: + mime_type, content = parse_data_uri(url) + return { + "type": "blob", + "modality": get_modality_from_mime_type(mime_type), + "mime_type": mime_type, + "content": content, + } + except ValueError: + # If parsing fails, return as URI + return { + "type": "uri", + "modality": "image", + "mime_type": "", + "uri": url, + } + else: + # Regular URL + return { + "type": "uri", + "modality": "image", + "mime_type": "", + "uri": url, + } + + # Handle Anthropic format with source dict + # {"type": "image|document", "source": {"type": "base64|url|file", "media_type": "...", "data|url|file_id": "..."}} + if block_type in ("image", "document") and "source" in content_part: + source = content_part.get("source") + if not isinstance(source, dict): + return None + + source_type = source.get("type") + media_type = source.get("media_type", "") + modality = ( + "document" + if block_type == "document" + else get_modality_from_mime_type(media_type) + ) + + if source_type == "base64": + return { + "type": "blob", + "modality": modality, + "mime_type": media_type, + "content": source.get("data", ""), + } + elif source_type == "url": + return { + "type": "uri", + "modality": modality, + "mime_type": media_type, + "uri": source.get("url", ""), + } + elif source_type == "file": + return { + "type": "file", + "modality": modality, + "mime_type": media_type, + "file_id": source.get("file_id", ""), + } + return None + + # Handle Google inline_data format + # {"inline_data": {"mime_type": "...", "data": "..."}} + if "inline_data" in content_part: + inline_data = content_part.get("inline_data") + if isinstance(inline_data, dict): + mime_type = inline_data.get("mime_type", "") + return { + "type": "blob", + "modality": get_modality_from_mime_type(mime_type), + "mime_type": mime_type, + "content": inline_data.get("data", ""), + } + return None + + # Handle Google file_data format + # {"file_data": {"mime_type": "...", "file_uri": "..."}} + if "file_data" in content_part: + file_data = content_part.get("file_data") + if isinstance(file_data, dict): + mime_type = file_data.get("mime_type", "") + return { + "type": "uri", + "modality": get_modality_from_mime_type(mime_type), + "mime_type": mime_type, + "uri": file_data.get("file_uri", ""), + } + return None + + # Handle generic format with direct fields (LangChain style) + # {"type": "image|audio|video|file", "base64|url|file_id": "...", "mime_type": "..."} + if block_type in ("image", "audio", "video", "file"): + mime_type = content_part.get("mime_type", "") + modality = block_type if block_type != "file" else "document" + + # Check for base64 encoded content + if "base64" in content_part: + return { + "type": "blob", + "modality": modality, + "mime_type": mime_type, + "content": content_part.get("base64", ""), + } + # Check for URL reference + elif "url" in content_part: + return { + "type": "uri", + "modality": modality, + "mime_type": mime_type, + "uri": content_part.get("url", ""), + } + # Check for file_id reference + elif "file_id" in content_part: + return { + "type": "file", + "modality": modality, + "mime_type": mime_type, + "file_id": content_part.get("file_id", ""), + } + + # Unrecognized format + return None + + +def transform_message_content(content: "Any") -> "Any": + """ + Transform message content, handling both string content and list of content blocks. + + For list content, each item is transformed using transform_content_part(). + Items that cannot be transformed (return None) are kept as-is. + + Args: + content: Message content - can be a string, list of content blocks, or other + + Returns: + - String content: returned as-is + - List content: list with each transformable item converted to standardized format + - Other: returned as-is + """ + if isinstance(content, str): + return content + + if isinstance(content, (list, tuple)): + transformed = [] + for item in content: + if isinstance(item, dict): + result = transform_content_part(item) + # If transformation succeeded, use the result; otherwise keep original + transformed.append(result if result is not None else item) + else: + transformed.append(item) + return transformed + + return content + + def _normalize_data(data: "Any", unpack: bool = True) -> "Any": # convert pydantic data (e.g. OpenAI v1+) to json compatible format if hasattr(data, "model_dump"): diff --git a/tests/test_ai_monitoring.py b/tests/test_ai_monitoring.py index 1ff354f473..209d24e502 100644 --- a/tests/test_ai_monitoring.py +++ b/tests/test_ai_monitoring.py @@ -19,6 +19,9 @@ _find_truncation_index, parse_data_uri, redact_blob_message_parts, + get_modality_from_mime_type, + transform_content_part, + transform_message_content, ) from sentry_sdk.serializer import serialize from sentry_sdk.utils import safe_serialize @@ -842,3 +845,484 @@ def test_handles_uri_without_data_prefix(self): assert mime_type == "image/jpeg" assert content == "/9j/4AAQ" + + +class TestGetModalityFromMimeType: + def test_image_mime_types(self): + """Test that image MIME types return 'image' modality""" + assert get_modality_from_mime_type("image/jpeg") == "image" + assert get_modality_from_mime_type("image/png") == "image" + assert get_modality_from_mime_type("image/gif") == "image" + assert get_modality_from_mime_type("image/webp") == "image" + assert get_modality_from_mime_type("IMAGE/JPEG") == "image" # case insensitive + + def test_audio_mime_types(self): + """Test that audio MIME types return 'audio' modality""" + assert get_modality_from_mime_type("audio/mp3") == "audio" + assert get_modality_from_mime_type("audio/wav") == "audio" + assert get_modality_from_mime_type("audio/ogg") == "audio" + assert get_modality_from_mime_type("AUDIO/MP3") == "audio" # case insensitive + + def test_video_mime_types(self): + """Test that video MIME types return 'video' modality""" + assert get_modality_from_mime_type("video/mp4") == "video" + assert get_modality_from_mime_type("video/webm") == "video" + assert get_modality_from_mime_type("video/quicktime") == "video" + assert get_modality_from_mime_type("VIDEO/MP4") == "video" # case insensitive + + def test_document_mime_types(self): + """Test that application and text MIME types return 'document' modality""" + assert get_modality_from_mime_type("application/pdf") == "document" + assert get_modality_from_mime_type("application/json") == "document" + assert get_modality_from_mime_type("text/plain") == "document" + assert get_modality_from_mime_type("text/html") == "document" + + def test_empty_mime_type_returns_image(self): + """Test that empty MIME type defaults to 'image'""" + assert get_modality_from_mime_type("") == "image" + + def test_none_mime_type_returns_image(self): + """Test that None-like values default to 'image'""" + assert get_modality_from_mime_type(None) == "image" + + def test_unknown_mime_type_returns_image(self): + """Test that unknown MIME types default to 'image'""" + assert get_modality_from_mime_type("unknown/type") == "image" + assert get_modality_from_mime_type("custom/format") == "image" + + +class TestTransformContentPart: + # OpenAI/LiteLLM format tests + def test_openai_image_url_with_data_uri(self): + """Test transforming OpenAI image_url with base64 data URI""" + content_part = { + "type": "image_url", + "image_url": {"url": ""}, + } + result = transform_content_part(content_part) + + assert result == { + "type": "blob", + "modality": "image", + "mime_type": "image/jpeg", + "content": "/9j/4AAQSkZJRg==", + } + + def test_openai_image_url_with_regular_url(self): + """Test transforming OpenAI image_url with regular URL""" + content_part = { + "type": "image_url", + "image_url": {"url": "https://example.com/image.jpg"}, + } + result = transform_content_part(content_part) + + assert result == { + "type": "uri", + "modality": "image", + "mime_type": "", + "uri": "https://example.com/image.jpg", + } + + def test_openai_image_url_string_format(self): + """Test transforming OpenAI image_url where image_url is a string""" + content_part = { + "type": "image_url", + "image_url": "https://example.com/image.jpg", + } + result = transform_content_part(content_part) + + assert result == { + "type": "uri", + "modality": "image", + "mime_type": "", + "uri": "https://example.com/image.jpg", + } + + def test_openai_image_url_invalid_data_uri(self): + """Test transforming OpenAI image_url with invalid data URI falls back to URI""" + content_part = { + "type": "image_url", + "image_url": {"url": "data:image/jpeg;base64"}, # Missing comma + } + result = transform_content_part(content_part) + + assert result == { + "type": "uri", + "modality": "image", + "mime_type": "", + "uri": "data:image/jpeg;base64", + } + + # Anthropic format tests + def test_anthropic_image_base64(self): + """Test transforming Anthropic image with base64 source""" + content_part = { + "type": "image", + "source": { + "type": "base64", + "media_type": "image/png", + "data": "iVBORw0KGgo=", + }, + } + result = transform_content_part(content_part) + + assert result == { + "type": "blob", + "modality": "image", + "mime_type": "image/png", + "content": "iVBORw0KGgo=", + } + + def test_anthropic_image_url(self): + """Test transforming Anthropic image with URL source""" + content_part = { + "type": "image", + "source": { + "type": "url", + "media_type": "image/jpeg", + "url": "https://example.com/image.jpg", + }, + } + result = transform_content_part(content_part) + + assert result == { + "type": "uri", + "modality": "image", + "mime_type": "image/jpeg", + "uri": "https://example.com/image.jpg", + } + + def test_anthropic_image_file(self): + """Test transforming Anthropic image with file source""" + content_part = { + "type": "image", + "source": { + "type": "file", + "media_type": "image/jpeg", + "file_id": "file_123", + }, + } + result = transform_content_part(content_part) + + assert result == { + "type": "file", + "modality": "image", + "mime_type": "image/jpeg", + "file_id": "file_123", + } + + def test_anthropic_document_base64(self): + """Test transforming Anthropic document with base64 source""" + content_part = { + "type": "document", + "source": { + "type": "base64", + "media_type": "application/pdf", + "data": "JVBERi0xLjQ=", + }, + } + result = transform_content_part(content_part) + + assert result == { + "type": "blob", + "modality": "document", + "mime_type": "application/pdf", + "content": "JVBERi0xLjQ=", + } + + def test_anthropic_document_url(self): + """Test transforming Anthropic document with URL source""" + content_part = { + "type": "document", + "source": { + "type": "url", + "media_type": "application/pdf", + "url": "https://example.com/doc.pdf", + }, + } + result = transform_content_part(content_part) + + assert result == { + "type": "uri", + "modality": "document", + "mime_type": "application/pdf", + "uri": "https://example.com/doc.pdf", + } + + # Google format tests + def test_google_inline_data(self): + """Test transforming Google inline_data format""" + content_part = { + "inline_data": { + "mime_type": "image/jpeg", + "data": "/9j/4AAQSkZJRg==", + } + } + result = transform_content_part(content_part) + + assert result == { + "type": "blob", + "modality": "image", + "mime_type": "image/jpeg", + "content": "/9j/4AAQSkZJRg==", + } + + def test_google_file_data(self): + """Test transforming Google file_data format""" + content_part = { + "file_data": { + "mime_type": "video/mp4", + "file_uri": "gs://bucket/video.mp4", + } + } + result = transform_content_part(content_part) + + assert result == { + "type": "uri", + "modality": "video", + "mime_type": "video/mp4", + "uri": "gs://bucket/video.mp4", + } + + def test_google_inline_data_audio(self): + """Test transforming Google inline_data with audio""" + content_part = { + "inline_data": { + "mime_type": "audio/wav", + "data": "UklGRiQA", + } + } + result = transform_content_part(content_part) + + assert result == { + "type": "blob", + "modality": "audio", + "mime_type": "audio/wav", + "content": "UklGRiQA", + } + + # Generic format tests (LangChain style) + def test_generic_image_base64(self): + """Test transforming generic format with base64""" + content_part = { + "type": "image", + "base64": "/9j/4AAQSkZJRg==", + "mime_type": "image/jpeg", + } + result = transform_content_part(content_part) + + assert result == { + "type": "blob", + "modality": "image", + "mime_type": "image/jpeg", + "content": "/9j/4AAQSkZJRg==", + } + + def test_generic_audio_url(self): + """Test transforming generic format with URL""" + content_part = { + "type": "audio", + "url": "https://example.com/audio.mp3", + "mime_type": "audio/mp3", + } + result = transform_content_part(content_part) + + assert result == { + "type": "uri", + "modality": "audio", + "mime_type": "audio/mp3", + "uri": "https://example.com/audio.mp3", + } + + def test_generic_file_with_file_id(self): + """Test transforming generic format with file_id""" + content_part = { + "type": "file", + "file_id": "file_456", + "mime_type": "application/pdf", + } + result = transform_content_part(content_part) + + assert result == { + "type": "file", + "modality": "document", + "mime_type": "application/pdf", + "file_id": "file_456", + } + + def test_generic_video_base64(self): + """Test transforming generic video format""" + content_part = { + "type": "video", + "base64": "AAAA", + "mime_type": "video/mp4", + } + result = transform_content_part(content_part) + + assert result == { + "type": "blob", + "modality": "video", + "mime_type": "video/mp4", + "content": "AAAA", + } + + # Edge cases and error handling + def test_text_block_returns_none(self): + """Test that text blocks return None (not transformed)""" + content_part = {"type": "text", "text": "Hello world"} + result = transform_content_part(content_part) + + assert result is None + + def test_non_dict_returns_none(self): + """Test that non-dict input returns None""" + assert transform_content_part("string") is None + assert transform_content_part(123) is None + assert transform_content_part(None) is None + assert transform_content_part([1, 2, 3]) is None + + def test_empty_dict_returns_none(self): + """Test that empty dict returns None""" + assert transform_content_part({}) is None + + def test_unknown_type_returns_none(self): + """Test that unknown type returns None""" + content_part = {"type": "unknown", "data": "something"} + assert transform_content_part(content_part) is None + + def test_openai_image_url_empty_url_returns_none(self): + """Test that image_url with empty URL returns None""" + content_part = {"type": "image_url", "image_url": {"url": ""}} + assert transform_content_part(content_part) is None + + def test_anthropic_invalid_source_returns_none(self): + """Test that Anthropic format with invalid source returns None""" + content_part = {"type": "image", "source": "not_a_dict"} + assert transform_content_part(content_part) is None + + def test_anthropic_unknown_source_type_returns_none(self): + """Test that Anthropic format with unknown source type returns None""" + content_part = { + "type": "image", + "source": {"type": "unknown", "data": "something"}, + } + assert transform_content_part(content_part) is None + + def test_google_inline_data_not_dict_returns_none(self): + """Test that Google inline_data with non-dict value returns None""" + content_part = {"inline_data": "not_a_dict"} + assert transform_content_part(content_part) is None + + def test_google_file_data_not_dict_returns_none(self): + """Test that Google file_data with non-dict value returns None""" + content_part = {"file_data": "not_a_dict"} + assert transform_content_part(content_part) is None + + +class TestTransformMessageContent: + def test_string_content_returned_as_is(self): + """Test that string content is returned unchanged""" + content = "Hello, world!" + result = transform_message_content(content) + + assert result == "Hello, world!" + + def test_list_with_transformable_items(self): + """Test transforming a list with transformable content parts""" + content = [ + {"type": "text", "text": "What's in this image?"}, + { + "type": "image_url", + "image_url": {"url": ""}, + }, + ] + result = transform_message_content(content) + + assert len(result) == 2 + # Text block should be unchanged (transform returns None, so original kept) + assert result[0] == {"type": "text", "text": "What's in this image?"} + # Image should be transformed + assert result[1] == { + "type": "blob", + "modality": "image", + "mime_type": "image/jpeg", + "content": "/9j/4AAQ", + } + + def test_list_with_non_dict_items(self): + """Test that non-dict items in list are kept as-is""" + content = ["text string", 123, {"type": "text", "text": "hi"}] + result = transform_message_content(content) + + assert result == ["text string", 123, {"type": "text", "text": "hi"}] + + def test_tuple_content(self): + """Test that tuple content is also handled""" + content = ( + {"type": "text", "text": "Hello"}, + { + "type": "image_url", + "image_url": {"url": "https://example.com/img.jpg"}, + }, + ) + result = transform_message_content(content) + + assert len(result) == 2 + assert result[0] == {"type": "text", "text": "Hello"} + assert result[1] == { + "type": "uri", + "modality": "image", + "mime_type": "", + "uri": "https://example.com/img.jpg", + } + + def test_other_types_returned_as_is(self): + """Test that other types are returned unchanged""" + assert transform_message_content(123) == 123 + assert transform_message_content(None) is None + assert transform_message_content({"key": "value"}) == {"key": "value"} + + def test_mixed_content_types(self): + """Test transforming mixed content with multiple formats""" + content = [ + {"type": "text", "text": "Look at these:"}, + { + "type": "image_url", + "image_url": {"url": ""}, + }, + { + "type": "image", + "source": { + "type": "base64", + "media_type": "image/jpeg", + "data": "/9j/4AAQ", + }, + }, + {"inline_data": {"mime_type": "audio/wav", "data": "UklGRiQA"}}, + ] + result = transform_message_content(content) + + assert len(result) == 4 + assert result[0] == {"type": "text", "text": "Look at these:"} + assert result[1] == { + "type": "blob", + "modality": "image", + "mime_type": "image/png", + "content": "iVBORw0", + } + assert result[2] == { + "type": "blob", + "modality": "image", + "mime_type": "image/jpeg", + "content": "/9j/4AAQ", + } + assert result[3] == { + "type": "blob", + "modality": "audio", + "mime_type": "audio/wav", + "content": "UklGRiQA", + } + + def test_empty_list(self): + """Test that empty list is returned as empty list""" + assert transform_message_content([]) == [] From fc6bbfe93242f056380d074b160fc5ebcd853bf6 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Thu, 15 Jan 2026 14:12:43 +0100 Subject: [PATCH 13/15] refactor(google-genai): Use shared transform_content_part for dict formats Replace inline_data and file_data dict handling with the shared transform_content_part function. Keep Google SDK object handling and PIL.Image support local since those are Google-specific. --- sentry_sdk/integrations/google_genai/utils.py | 37 +++++++------------ .../google_genai/test_google_genai.py | 3 +- 2 files changed, 15 insertions(+), 25 deletions(-) diff --git a/sentry_sdk/integrations/google_genai/utils.py b/sentry_sdk/integrations/google_genai/utils.py index 300aba0f7d..746b3f2282 100644 --- a/sentry_sdk/integrations/google_genai/utils.py +++ b/sentry_sdk/integrations/google_genai/utils.py @@ -22,6 +22,8 @@ truncate_and_annotate_messages, normalize_message_roles, redact_blob_message_parts, + transform_content_part, + get_modality_from_mime_type, ) from sentry_sdk.consts import OP, SPANDATA from sentry_sdk.scope import should_send_default_pii @@ -294,30 +296,17 @@ def _extract_part_content(part: "Any") -> "Optional[dict[str, Any]]": if part.get("text"): return {"text": part["text"], "type": "text"} - if part.get("file_data"): - file_data = part["file_data"] - if isinstance(file_data, dict): - mime_type = file_data.get("mime_type") - file_uri = file_data.get("file_uri") - if mime_type and file_uri: - return { - "type": "blob", - "mime_type": mime_type, - "file_uri": file_uri, - } - - if part.get("inline_data"): - inline_data = part["inline_data"] - if isinstance(inline_data, dict): - data = inline_data.get("data") - mime_type = inline_data.get("mime_type") - if data and mime_type: - if isinstance(data, bytes): - return { - "type": "blob", - "mime_type": mime_type, - "content": BLOB_DATA_SUBSTITUTE, - } + # Try using shared transform_content_part for Google dict formats (inline_data, file_data) + result = transform_content_part(part) + if result is not None: + # For inline_data with bytes data, substitute the content + if "inline_data" in part: + inline_data = part["inline_data"] + if isinstance(inline_data, dict) and isinstance( + inline_data.get("data"), bytes + ): + result["content"] = BLOB_DATA_SUBSTITUTE + return result return None diff --git a/tests/integrations/google_genai/test_google_genai.py b/tests/integrations/google_genai/test_google_genai.py index c7d84da811..2557c1c2e5 100644 --- a/tests/integrations/google_genai/test_google_genai.py +++ b/tests/integrations/google_genai/test_google_genai.py @@ -1638,7 +1638,8 @@ def test_extract_contents_messages_pil_image(): assert blob_part["type"] == "blob" assert blob_part["mime_type"].startswith("image/") assert "content" in blob_part - assert blob_part["content"].startswith(""}, + } + result = transform_openai_content_part(content_part) + + assert result == { + "type": "blob", + "modality": "image", + "mime_type": "image/jpeg", + "content": "/9j/4AAQSkZJRg==", + } + + def test_image_url_with_regular_url(self): + """Test transforming OpenAI image_url with regular URL""" + content_part = { + "type": "image_url", + "image_url": {"url": "https://example.com/image.jpg"}, + } + result = transform_openai_content_part(content_part) + + assert result == { + "type": "uri", + "modality": "image", + "mime_type": "", + "uri": "https://example.com/image.jpg", + } + + def test_image_url_string_format(self): + """Test transforming OpenAI image_url where image_url is a string""" + content_part = { + "type": "image_url", + "image_url": "https://example.com/image.jpg", + } + result = transform_openai_content_part(content_part) + + assert result == { + "type": "uri", + "modality": "image", + "mime_type": "", + "uri": "https://example.com/image.jpg", + } + + def test_image_url_invalid_data_uri(self): + """Test transforming OpenAI image_url with invalid data URI falls back to URI""" + content_part = { + "type": "image_url", + "image_url": {"url": "data:image/jpeg;base64"}, # Missing comma + } + result = transform_openai_content_part(content_part) + + assert result == { + "type": "uri", + "modality": "image", + "mime_type": "", + "uri": "data:image/jpeg;base64", + } + + def test_empty_url_returns_none(self): + """Test that image_url with empty URL returns None""" + content_part = {"type": "image_url", "image_url": {"url": ""}} + assert transform_openai_content_part(content_part) is None + + def test_non_image_url_type_returns_none(self): + """Test that non-image_url types return None""" + content_part = {"type": "text", "text": "Hello"} + assert transform_openai_content_part(content_part) is None + + def test_anthropic_format_returns_none(self): + """Test that Anthropic format returns None (not handled)""" + content_part = { + "type": "image", + "source": {"type": "base64", "media_type": "image/png", "data": "abc"}, + } + assert transform_openai_content_part(content_part) is None + + def test_google_format_returns_none(self): + """Test that Google format returns None (not handled)""" + content_part = {"inline_data": {"mime_type": "image/jpeg", "data": "abc"}} + assert transform_openai_content_part(content_part) is None + + def test_non_dict_returns_none(self): + """Test that non-dict input returns None""" + assert transform_openai_content_part("string") is None + assert transform_openai_content_part(123) is None + assert transform_openai_content_part(None) is None + + +class TestTransformAnthropicContentPart: + """Tests for the Anthropic-specific transform function.""" + + def test_image_base64(self): + """Test transforming Anthropic image with base64 source""" + content_part = { + "type": "image", + "source": { + "type": "base64", + "media_type": "image/png", + "data": "iVBORw0KGgo=", + }, + } + result = transform_anthropic_content_part(content_part) + + assert result == { + "type": "blob", + "modality": "image", + "mime_type": "image/png", + "content": "iVBORw0KGgo=", + } + + def test_image_url(self): + """Test transforming Anthropic image with URL source""" + content_part = { + "type": "image", + "source": { + "type": "url", + "media_type": "image/jpeg", + "url": "https://example.com/image.jpg", + }, + } + result = transform_anthropic_content_part(content_part) + + assert result == { + "type": "uri", + "modality": "image", + "mime_type": "image/jpeg", + "uri": "https://example.com/image.jpg", + } + + def test_image_file(self): + """Test transforming Anthropic image with file source""" + content_part = { + "type": "image", + "source": { + "type": "file", + "media_type": "image/jpeg", + "file_id": "file_123", + }, + } + result = transform_anthropic_content_part(content_part) + + assert result == { + "type": "file", + "modality": "image", + "mime_type": "image/jpeg", + "file_id": "file_123", + } + + def test_document_base64(self): + """Test transforming Anthropic document with base64 source""" + content_part = { + "type": "document", + "source": { + "type": "base64", + "media_type": "application/pdf", + "data": "JVBERi0xLjQ=", + }, + } + result = transform_anthropic_content_part(content_part) + + assert result == { + "type": "blob", + "modality": "document", + "mime_type": "application/pdf", + "content": "JVBERi0xLjQ=", + } + + def test_document_url(self): + """Test transforming Anthropic document with URL source""" + content_part = { + "type": "document", + "source": { + "type": "url", + "media_type": "application/pdf", + "url": "https://example.com/doc.pdf", + }, + } + result = transform_anthropic_content_part(content_part) + + assert result == { + "type": "uri", + "modality": "document", + "mime_type": "application/pdf", + "uri": "https://example.com/doc.pdf", + } + + def test_invalid_source_returns_none(self): + """Test that Anthropic format with invalid source returns None""" + content_part = {"type": "image", "source": "not_a_dict"} + assert transform_anthropic_content_part(content_part) is None + + def test_unknown_source_type_returns_none(self): + """Test that Anthropic format with unknown source type returns None""" + content_part = { + "type": "image", + "source": {"type": "unknown", "data": "something"}, + } + assert transform_anthropic_content_part(content_part) is None + + def test_missing_source_returns_none(self): + """Test that Anthropic format without source returns None""" + content_part = {"type": "image", "data": "something"} + assert transform_anthropic_content_part(content_part) is None + + def test_openai_format_returns_none(self): + """Test that OpenAI format returns None (not handled)""" + content_part = { + "type": "image_url", + "image_url": {"url": "https://example.com"}, + } + assert transform_anthropic_content_part(content_part) is None + + def test_google_format_returns_none(self): + """Test that Google format returns None (not handled)""" + content_part = {"inline_data": {"mime_type": "image/jpeg", "data": "abc"}} + assert transform_anthropic_content_part(content_part) is None + + def test_non_dict_returns_none(self): + """Test that non-dict input returns None""" + assert transform_anthropic_content_part("string") is None + assert transform_anthropic_content_part(123) is None + assert transform_anthropic_content_part(None) is None + + +class TestTransformGoogleContentPart: + """Tests for the Google GenAI-specific transform function.""" + + def test_inline_data(self): + """Test transforming Google inline_data format""" + content_part = { + "inline_data": { + "mime_type": "image/jpeg", + "data": "/9j/4AAQSkZJRg==", + } + } + result = transform_google_content_part(content_part) + + assert result == { + "type": "blob", + "modality": "image", + "mime_type": "image/jpeg", + "content": "/9j/4AAQSkZJRg==", + } + + def test_file_data(self): + """Test transforming Google file_data format""" + content_part = { + "file_data": { + "mime_type": "video/mp4", + "file_uri": "gs://bucket/video.mp4", + } + } + result = transform_google_content_part(content_part) + + assert result == { + "type": "uri", + "modality": "video", + "mime_type": "video/mp4", + "uri": "gs://bucket/video.mp4", + } + + def test_inline_data_audio(self): + """Test transforming Google inline_data with audio""" + content_part = { + "inline_data": { + "mime_type": "audio/wav", + "data": "UklGRiQA", + } + } + result = transform_google_content_part(content_part) + + assert result == { + "type": "blob", + "modality": "audio", + "mime_type": "audio/wav", + "content": "UklGRiQA", + } + + def test_inline_data_not_dict_returns_none(self): + """Test that Google inline_data with non-dict value returns None""" + content_part = {"inline_data": "not_a_dict"} + assert transform_google_content_part(content_part) is None + + def test_file_data_not_dict_returns_none(self): + """Test that Google file_data with non-dict value returns None""" + content_part = {"file_data": "not_a_dict"} + assert transform_google_content_part(content_part) is None + + def test_openai_format_returns_none(self): + """Test that OpenAI format returns None (not handled)""" + content_part = { + "type": "image_url", + "image_url": {"url": "https://example.com"}, + } + assert transform_google_content_part(content_part) is None + + def test_anthropic_format_returns_none(self): + """Test that Anthropic format returns None (not handled)""" + content_part = { + "type": "image", + "source": {"type": "base64", "media_type": "image/png", "data": "abc"}, + } + assert transform_google_content_part(content_part) is None + + def test_non_dict_returns_none(self): + """Test that non-dict input returns None""" + assert transform_google_content_part("string") is None + assert transform_google_content_part(123) is None + assert transform_google_content_part(None) is None + + +class TestTransformGenericContentPart: + """Tests for the generic/LangChain-style transform function.""" + + def test_image_base64(self): + """Test transforming generic format with base64""" + content_part = { + "type": "image", + "base64": "/9j/4AAQSkZJRg==", + "mime_type": "image/jpeg", + } + result = transform_generic_content_part(content_part) + + assert result == { + "type": "blob", + "modality": "image", + "mime_type": "image/jpeg", + "content": "/9j/4AAQSkZJRg==", + } + + def test_audio_url(self): + """Test transforming generic format with URL""" + content_part = { + "type": "audio", + "url": "https://example.com/audio.mp3", + "mime_type": "audio/mp3", + } + result = transform_generic_content_part(content_part) + + assert result == { + "type": "uri", + "modality": "audio", + "mime_type": "audio/mp3", + "uri": "https://example.com/audio.mp3", + } + + def test_file_with_file_id(self): + """Test transforming generic format with file_id""" + content_part = { + "type": "file", + "file_id": "file_456", + "mime_type": "application/pdf", + } + result = transform_generic_content_part(content_part) + + assert result == { + "type": "file", + "modality": "document", + "mime_type": "application/pdf", + "file_id": "file_456", + } + + def test_video_base64(self): + """Test transforming generic video format""" + content_part = { + "type": "video", + "base64": "AAAA", + "mime_type": "video/mp4", + } + result = transform_generic_content_part(content_part) + + assert result == { + "type": "blob", + "modality": "video", + "mime_type": "video/mp4", + "content": "AAAA", + } + + def test_image_with_source_returns_none(self): + """Test that image with source key (Anthropic style) returns None""" + # This is Anthropic format, should NOT be handled by generic + content_part = { + "type": "image", + "source": {"type": "base64", "data": "abc"}, + } + assert transform_generic_content_part(content_part) is None + + def test_text_type_returns_none(self): + """Test that text type returns None""" + content_part = {"type": "text", "text": "Hello"} + assert transform_generic_content_part(content_part) is None + + def test_openai_format_returns_none(self): + """Test that OpenAI format returns None (not handled)""" + content_part = { + "type": "image_url", + "image_url": {"url": "https://example.com"}, + } + assert transform_generic_content_part(content_part) is None + + def test_google_format_returns_none(self): + """Test that Google format returns None (not handled)""" + content_part = {"inline_data": {"mime_type": "image/jpeg", "data": "abc"}} + assert transform_generic_content_part(content_part) is None + + def test_non_dict_returns_none(self): + """Test that non-dict input returns None""" + assert transform_generic_content_part("string") is None + assert transform_generic_content_part(123) is None + assert transform_generic_content_part(None) is None + + def test_missing_data_key_returns_none(self): + """Test that missing data key (base64/url/file_id) returns None""" + content_part = {"type": "image", "mime_type": "image/jpeg"} + assert transform_generic_content_part(content_part) is None + + class TestTransformContentPart: # OpenAI/LiteLLM format tests def test_openai_image_url_with_data_uri(self): From b9b629e68470868d84de1065976435d41ff75797 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Thu, 15 Jan 2026 15:55:37 +0100 Subject: [PATCH 15/15] refactor(google-genai): use transform_google_content_part directly Replace generic transform_content_part with the Google-specific transform_google_content_part function for better performance and clarity since we know Google GenAI uses inline_data and file_data formats. --- sentry_sdk/integrations/google_genai/utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sentry_sdk/integrations/google_genai/utils.py b/sentry_sdk/integrations/google_genai/utils.py index 746b3f2282..22b7a9f8ab 100644 --- a/sentry_sdk/integrations/google_genai/utils.py +++ b/sentry_sdk/integrations/google_genai/utils.py @@ -22,7 +22,7 @@ truncate_and_annotate_messages, normalize_message_roles, redact_blob_message_parts, - transform_content_part, + transform_google_content_part, get_modality_from_mime_type, ) from sentry_sdk.consts import OP, SPANDATA @@ -296,8 +296,8 @@ def _extract_part_content(part: "Any") -> "Optional[dict[str, Any]]": if part.get("text"): return {"text": part["text"], "type": "text"} - # Try using shared transform_content_part for Google dict formats (inline_data, file_data) - result = transform_content_part(part) + # Try using Google-specific transform for dict formats (inline_data, file_data) + result = transform_google_content_part(part) if result is not None: # For inline_data with bytes data, substitute the content if "inline_data" in part: