diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py index 71f7544a1c..e9966b48b6 100644 --- a/sentry_sdk/ai/utils.py +++ b/sentry_sdk/ai/utils.py @@ -72,6 +72,397 @@ def parse_data_uri(url: str) -> "Tuple[str, str]": return mime_type, content +def get_modality_from_mime_type(mime_type: str) -> str: + """ + Infer the content modality from a MIME type string. + + Args: + mime_type: A MIME type string (e.g., "image/jpeg", "audio/mp3") + + Returns: + One of: "image", "audio", "video", or "document" + Defaults to "image" for unknown or empty MIME types. + + Examples: + "image/jpeg" -> "image" + "audio/mp3" -> "audio" + "video/mp4" -> "video" + "application/pdf" -> "document" + "text/plain" -> "document" + """ + if not mime_type: + return "image" # Default fallback + + mime_lower = mime_type.lower() + if mime_lower.startswith("image/"): + return "image" + elif mime_lower.startswith("audio/"): + return "audio" + elif mime_lower.startswith("video/"): + return "video" + elif mime_lower.startswith("application/") or mime_lower.startswith("text/"): + return "document" + else: + return "image" # Default fallback for unknown types + + +def transform_openai_content_part( + content_part: "Dict[str, Any]", +) -> "Optional[Dict[str, Any]]": + """ + Transform an OpenAI/LiteLLM content part to Sentry's standardized format. + + This handles the OpenAI image_url format used by OpenAI and LiteLLM SDKs. + + Input format: + - {"type": "image_url", "image_url": {"url": "..."}} + - {"type": "image_url", "image_url": "..."} (string shorthand) + + Output format (one of): + - {"type": "blob", "modality": "image", "mime_type": "...", "content": "..."} + - {"type": "uri", "modality": "image", "mime_type": "", "uri": "..."} + + Args: + content_part: A dictionary representing a content part from OpenAI/LiteLLM + + Returns: + A transformed dictionary in standardized format, or None if the format + is not OpenAI image_url format or transformation fails. + """ + if not isinstance(content_part, dict): + return None + + block_type = content_part.get("type") + + if block_type != "image_url": + return None + + image_url_data = content_part.get("image_url") + if isinstance(image_url_data, str): + url = image_url_data + elif isinstance(image_url_data, dict): + url = image_url_data.get("url", "") + else: + return None + + if not url: + return None + + # Check if it's a data URI (base64 encoded) + if url.startswith("data:"): + try: + mime_type, content = parse_data_uri(url) + return { + "type": "blob", + "modality": get_modality_from_mime_type(mime_type), + "mime_type": mime_type, + "content": content, + } + except ValueError: + # If parsing fails, return as URI + return { + "type": "uri", + "modality": "image", + "mime_type": "", + "uri": url, + } + else: + # Regular URL + return { + "type": "uri", + "modality": "image", + "mime_type": "", + "uri": url, + } + + +def transform_anthropic_content_part( + content_part: "Dict[str, Any]", +) -> "Optional[Dict[str, Any]]": + """ + Transform an Anthropic content part to Sentry's standardized format. + + This handles the Anthropic image and document formats with source dictionaries. + + Input format: + - {"type": "image", "source": {"type": "base64", "media_type": "...", "data": "..."}} + - {"type": "image", "source": {"type": "url", "media_type": "...", "url": "..."}} + - {"type": "image", "source": {"type": "file", "media_type": "...", "file_id": "..."}} + - {"type": "document", "source": {...}} (same source formats) + + Output format (one of): + - {"type": "blob", "modality": "...", "mime_type": "...", "content": "..."} + - {"type": "uri", "modality": "...", "mime_type": "...", "uri": "..."} + - {"type": "file", "modality": "...", "mime_type": "...", "file_id": "..."} + + Args: + content_part: A dictionary representing a content part from Anthropic + + Returns: + A transformed dictionary in standardized format, or None if the format + is not Anthropic format or transformation fails. + """ + if not isinstance(content_part, dict): + return None + + block_type = content_part.get("type") + + if block_type not in ("image", "document") or "source" not in content_part: + return None + + source = content_part.get("source") + if not isinstance(source, dict): + return None + + source_type = source.get("type") + media_type = source.get("media_type", "") + modality = ( + "document" + if block_type == "document" + else get_modality_from_mime_type(media_type) + ) + + if source_type == "base64": + return { + "type": "blob", + "modality": modality, + "mime_type": media_type, + "content": source.get("data", ""), + } + elif source_type == "url": + return { + "type": "uri", + "modality": modality, + "mime_type": media_type, + "uri": source.get("url", ""), + } + elif source_type == "file": + return { + "type": "file", + "modality": modality, + "mime_type": media_type, + "file_id": source.get("file_id", ""), + } + + return None + + +def transform_google_content_part( + content_part: "Dict[str, Any]", +) -> "Optional[Dict[str, Any]]": + """ + Transform a Google GenAI content part to Sentry's standardized format. + + This handles the Google GenAI inline_data and file_data formats. + + Input format: + - {"inline_data": {"mime_type": "...", "data": "..."}} + - {"file_data": {"mime_type": "...", "file_uri": "..."}} + + Output format (one of): + - {"type": "blob", "modality": "...", "mime_type": "...", "content": "..."} + - {"type": "uri", "modality": "...", "mime_type": "...", "uri": "..."} + + Args: + content_part: A dictionary representing a content part from Google GenAI + + Returns: + A transformed dictionary in standardized format, or None if the format + is not Google format or transformation fails. + """ + if not isinstance(content_part, dict): + return None + + # Handle Google inline_data format + if "inline_data" in content_part: + inline_data = content_part.get("inline_data") + if isinstance(inline_data, dict): + mime_type = inline_data.get("mime_type", "") + return { + "type": "blob", + "modality": get_modality_from_mime_type(mime_type), + "mime_type": mime_type, + "content": inline_data.get("data", ""), + } + return None + + # Handle Google file_data format + if "file_data" in content_part: + file_data = content_part.get("file_data") + if isinstance(file_data, dict): + mime_type = file_data.get("mime_type", "") + return { + "type": "uri", + "modality": get_modality_from_mime_type(mime_type), + "mime_type": mime_type, + "uri": file_data.get("file_uri", ""), + } + return None + + return None + + +def transform_generic_content_part( + content_part: "Dict[str, Any]", +) -> "Optional[Dict[str, Any]]": + """ + Transform a generic/LangChain-style content part to Sentry's standardized format. + + This handles generic formats where the type indicates the modality and + the data is provided via direct base64, url, or file_id fields. + + Input format: + - {"type": "image", "base64": "...", "mime_type": "..."} + - {"type": "audio", "url": "...", "mime_type": "..."} + - {"type": "video", "base64": "...", "mime_type": "..."} + - {"type": "file", "file_id": "...", "mime_type": "..."} + + Output format (one of): + - {"type": "blob", "modality": "...", "mime_type": "...", "content": "..."} + - {"type": "uri", "modality": "...", "mime_type": "...", "uri": "..."} + - {"type": "file", "modality": "...", "mime_type": "...", "file_id": "..."} + + Args: + content_part: A dictionary representing a content part in generic format + + Returns: + A transformed dictionary in standardized format, or None if the format + is not generic format or transformation fails. + """ + if not isinstance(content_part, dict): + return None + + block_type = content_part.get("type") + + if block_type not in ("image", "audio", "video", "file"): + return None + + # Ensure it's not Anthropic format (which also uses type: "image") + if "source" in content_part: + return None + + mime_type = content_part.get("mime_type", "") + modality = block_type if block_type != "file" else "document" + + # Check for base64 encoded content + if "base64" in content_part: + return { + "type": "blob", + "modality": modality, + "mime_type": mime_type, + "content": content_part.get("base64", ""), + } + # Check for URL reference + elif "url" in content_part: + return { + "type": "uri", + "modality": modality, + "mime_type": mime_type, + "uri": content_part.get("url", ""), + } + # Check for file_id reference + elif "file_id" in content_part: + return { + "type": "file", + "modality": modality, + "mime_type": mime_type, + "file_id": content_part.get("file_id", ""), + } + + return None + + +def transform_content_part( + content_part: "Dict[str, Any]", +) -> "Optional[Dict[str, Any]]": + """ + Transform a content part from various AI SDK formats to Sentry's standardized format. + + This is a heuristic dispatcher that detects the format and delegates to the + appropriate SDK-specific transformer. For direct SDK integration, prefer using + the specific transformers directly: + - transform_openai_content_part() for OpenAI/LiteLLM + - transform_anthropic_content_part() for Anthropic + - transform_google_content_part() for Google GenAI + - transform_generic_content_part() for LangChain and other generic formats + + Detection order: + 1. OpenAI: type == "image_url" + 2. Google: "inline_data" or "file_data" keys present + 3. Anthropic: type in ("image", "document") with "source" key + 4. Generic: type in ("image", "audio", "video", "file") with base64/url/file_id + + Output format (one of): + - {"type": "blob", "modality": "...", "mime_type": "...", "content": "..."} + - {"type": "uri", "modality": "...", "mime_type": "...", "uri": "..."} + - {"type": "file", "modality": "...", "mime_type": "...", "file_id": "..."} + + Args: + content_part: A dictionary representing a content part from an AI SDK + + Returns: + A transformed dictionary in standardized format, or None if the format + is unrecognized or transformation fails. + """ + if not isinstance(content_part, dict): + return None + + # Try OpenAI format first (most common, clear indicator) + result = transform_openai_content_part(content_part) + if result is not None: + return result + + # Try Google format (unique keys make it easy to detect) + result = transform_google_content_part(content_part) + if result is not None: + return result + + # Try Anthropic format (has "source" key) + result = transform_anthropic_content_part(content_part) + if result is not None: + return result + + # Try generic format as fallback + result = transform_generic_content_part(content_part) + if result is not None: + return result + + # Unrecognized format + return None + + +def transform_message_content(content: "Any") -> "Any": + """ + Transform message content, handling both string content and list of content blocks. + + For list content, each item is transformed using transform_content_part(). + Items that cannot be transformed (return None) are kept as-is. + + Args: + content: Message content - can be a string, list of content blocks, or other + + Returns: + - String content: returned as-is + - List content: list with each transformable item converted to standardized format + - Other: returned as-is + """ + if isinstance(content, str): + return content + + if isinstance(content, (list, tuple)): + transformed = [] + for item in content: + if isinstance(item, dict): + result = transform_content_part(item) + # If transformation succeeded, use the result; otherwise keep original + transformed.append(result if result is not None else item) + else: + transformed.append(item) + return transformed + + return content + + def _normalize_data(data: "Any", unpack: bool = True) -> "Any": # convert pydantic data (e.g. OpenAI v1+) to json compatible format if hasattr(data, "model_dump"): @@ -107,6 +498,46 @@ def set_data_normalized( span.set_data(key, json.dumps(normalized)) +def extract_response_output( + output_items: "Any", +) -> "Tuple[List[Any], List[Dict[str, Any]]]": + """ + Extract response text and tool calls from OpenAI Responses API output. + + This handles the output format from OpenAI's Responses API where each output + item has a `type` field that can be "message" or "function_call". + + Args: + output_items: Iterable of output items from the response + + Returns: + Tuple of (response_texts, tool_calls) where: + - response_texts: List of text strings or dicts for unknown message types + - tool_calls: List of tool call dicts + """ + response_texts = [] # type: List[Any] + tool_calls = [] # type: List[Dict[str, Any]] + + for output in output_items: + if output.type == "function_call": + if hasattr(output, "model_dump"): + tool_calls.append(output.model_dump()) + elif hasattr(output, "dict"): + tool_calls.append(output.dict()) + elif output.type == "message": + for output_message in output.content: + try: + response_texts.append(output_message.text) + except AttributeError: + # Unknown output message type, just return the json + if hasattr(output_message, "model_dump"): + response_texts.append(output_message.model_dump()) + elif hasattr(output_message, "dict"): + response_texts.append(output_message.dict()) + + return response_texts, tool_calls + + def normalize_message_role(role: str) -> str: """ Normalize a message role to one of the 4 allowed gen_ai role values. diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index 66dc4a1c48..2b016be374 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -5,8 +5,10 @@ from sentry_sdk import consts from sentry_sdk.ai.monitoring import record_token_usage from sentry_sdk.ai.utils import ( + extract_response_output, set_data_normalized, normalize_message_roles, + transform_openai_content_part, truncate_and_annotate_messages, ) from sentry_sdk.consts import SPANDATA @@ -203,6 +205,21 @@ def _set_input_data( and integration.include_prompts ): normalized_messages = normalize_message_roles(messages) + # Transform content parts to standardized format using OpenAI-specific transformer + for message in normalized_messages: + if isinstance(message, dict) and "content" in message: + content = message["content"] + if isinstance(content, (list, tuple)): + transformed = [] + for item in content: + if isinstance(item, dict): + result = transform_openai_content_part(item) + # If transformation succeeded, use the result; otherwise keep original + transformed.append(result if result is not None else item) + else: + transformed.append(item) + message["content"] = transformed + scope = sentry_sdk.get_current_scope() messages_data = truncate_and_annotate_messages(normalized_messages, span, scope) if messages_data is not None: @@ -265,14 +282,45 @@ def _set_output_data( if hasattr(response, "choices"): if should_send_default_pii() and integration.include_prompts: - response_text = [ - choice.message.model_dump() - for choice in response.choices - if choice.message is not None - ] + response_text = [] # type: list[str] + tool_calls = [] # type: list[Any] + + for choice in response.choices: + if choice.message is None: + continue + + # Extract text content + content = getattr(choice.message, "content", None) + if content is not None: + response_text.append(content) + + # Extract audio transcript if available + audio = getattr(choice.message, "audio", None) + if audio is not None: + transcript = getattr(audio, "transcript", None) + if transcript is not None: + response_text.append(transcript) + + # Extract tool calls + message_tool_calls = getattr(choice.message, "tool_calls", None) + if message_tool_calls is not None: + for tool_call in message_tool_calls: + if hasattr(tool_call, "model_dump"): + tool_calls.append(tool_call.model_dump()) + elif hasattr(tool_call, "dict"): + tool_calls.append(tool_call.dict()) + if len(response_text) > 0: set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_TEXT, response_text) + if len(tool_calls) > 0: + set_data_normalized( + span, + SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS, + tool_calls, + unpack=False, + ) + _calculate_token_usage(messages, response, span, None, integration.count_tokens) if finish_span: @@ -280,34 +328,18 @@ def _set_output_data( elif hasattr(response, "output"): if should_send_default_pii() and integration.include_prompts: - output_messages: "dict[str, list[Any]]" = { - "response": [], - "tool": [], - } - - for output in response.output: - if output.type == "function_call": - output_messages["tool"].append(output.dict()) - elif output.type == "message": - for output_message in output.content: - try: - output_messages["response"].append(output_message.text) - except AttributeError: - # Unknown output message type, just return the json - output_messages["response"].append(output_message.dict()) - - if len(output_messages["tool"]) > 0: + response_texts, tool_calls = extract_response_output(response.output) + + if len(tool_calls) > 0: set_data_normalized( span, SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS, - output_messages["tool"], + tool_calls, unpack=False, ) - if len(output_messages["response"]) > 0: - set_data_normalized( - span, SPANDATA.GEN_AI_RESPONSE_TEXT, output_messages["response"] - ) + if len(response_texts) > 0: + set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_TEXT, response_texts) _calculate_token_usage(messages, response, span, None, integration.count_tokens) diff --git a/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py b/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py index c3a3a04dc9..1e2d7e758c 100644 --- a/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py +++ b/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py @@ -3,6 +3,7 @@ get_start_span_function, set_data_normalized, normalize_message_roles, + normalize_message_role, truncate_and_annotate_messages, ) from sentry_sdk.consts import OP, SPANDATA @@ -10,7 +11,11 @@ from sentry_sdk.utils import safe_serialize from ..consts import SPAN_ORIGIN -from ..utils import _set_agent_data, _set_usage_data +from ..utils import ( + _set_agent_data, + _set_usage_data, + _transform_openai_agents_message_content, +) from typing import TYPE_CHECKING @@ -49,17 +54,40 @@ def invoke_agent_span( original_input = kwargs.get("original_input") if original_input is not None: - message = ( - original_input - if isinstance(original_input, str) - else safe_serialize(original_input) - ) - messages.append( - { - "content": [{"text": message, "type": "text"}], - "role": "user", - } - ) + if isinstance(original_input, str): + # String input: wrap in text block + messages.append( + { + "content": [{"text": original_input, "type": "text"}], + "role": "user", + } + ) + elif isinstance(original_input, list) and len(original_input) > 0: + # Check if list contains message objects (with type="message") + # or content parts (input_text, input_image, etc.) + first_item = original_input[0] + if isinstance(first_item, dict) and first_item.get("type") == "message": + # List of message objects - process each individually + for msg in original_input: + if isinstance(msg, dict) and msg.get("type") == "message": + role = normalize_message_role(msg.get("role", "user")) + content = msg.get("content") + transformed = _transform_openai_agents_message_content( + content + ) + if isinstance(transformed, str): + transformed = [{"text": transformed, "type": "text"}] + elif not isinstance(transformed, list): + transformed = [ + {"text": str(transformed), "type": "text"} + ] + messages.append({"content": transformed, "role": role}) + else: + # List of content parts - transform and wrap as user message + content = _transform_openai_agents_message_content(original_input) + if not isinstance(content, list): + content = [{"text": str(content), "type": "text"}] + messages.append({"content": content, "role": "user"}) if len(messages) > 0: normalized_messages = normalize_message_roles(messages) diff --git a/sentry_sdk/integrations/openai_agents/utils.py b/sentry_sdk/integrations/openai_agents/utils.py index a24d0e909d..afa16dc609 100644 --- a/sentry_sdk/integrations/openai_agents/utils.py +++ b/sentry_sdk/integrations/openai_agents/utils.py @@ -1,7 +1,9 @@ import sentry_sdk from sentry_sdk.ai.utils import ( GEN_AI_ALLOWED_MESSAGE_ROLES, + extract_response_output, normalize_message_roles, + parse_data_uri, set_data_normalized, normalize_message_role, truncate_and_annotate_messages, @@ -27,6 +29,133 @@ raise DidNotEnable("OpenAI Agents not installed") +def _transform_openai_agents_content_part( + content_part: "dict[str, Any]", +) -> "dict[str, Any]": + """ + Transform an OpenAI Agents content part to Sentry-compatible format. + + Handles multimodal content (images, audio, files) by converting them + to the standardized format: + - base64 encoded data -> type: "blob" + - URL references -> type: "uri" + - file_id references -> type: "file" + """ + if not isinstance(content_part, dict): + return content_part + + part_type = content_part.get("type") + + # Handle input_text (OpenAI Agents SDK text format) -> normalize to standard text format + if part_type == "input_text": + return { + "type": "text", + "text": content_part.get("text", ""), + } + + # Handle image_url (OpenAI vision format) and input_image (OpenAI Agents SDK format) + if part_type in ("image_url", "input_image"): + # Get URL from either format + if part_type == "image_url": + image_url = content_part.get("image_url") or {} + url = ( + image_url.get("url", "") + if isinstance(image_url, dict) + else str(image_url) + ) + else: + # input_image format has image_url directly + url = content_part.get("image_url") or "" + + if url.startswith("data:"): + try: + mime_type, content = parse_data_uri(url) + return { + "type": "blob", + "modality": "image", + "mime_type": mime_type, + "content": content, + } + except ValueError: + # If parsing fails, return as URI + return { + "type": "uri", + "modality": "image", + "mime_type": "", + "uri": url, + } + else: + return { + "type": "uri", + "modality": "image", + "mime_type": "", + "uri": url, + } + + # Handle input_audio (OpenAI audio input format) + if part_type == "input_audio": + input_audio = content_part.get("input_audio") or {} + if isinstance(input_audio, dict): + audio_format = input_audio.get("format", "") + mime_type = f"audio/{audio_format}" if audio_format else "" + return { + "type": "blob", + "modality": "audio", + "mime_type": mime_type, + "content": input_audio.get("data", ""), + } + else: + return content_part + + # Handle image_file (Assistants API file-based images) + if part_type == "image_file": + image_file = content_part.get("image_file") or {} + if isinstance(image_file, dict): + return { + "type": "file", + "modality": "image", + "mime_type": "", + "file_id": image_file.get("file_id", ""), + } + else: + return content_part + + # Handle file (document attachments) + if part_type == "file": + file_data = content_part.get("file") or {} + if isinstance(file_data, dict): + return { + "type": "file", + "modality": "document", + "mime_type": "", + "file_id": file_data.get("file_id", ""), + } + else: + return content_part + + return content_part + + +def _transform_openai_agents_message_content(content: "Any") -> "Any": + """ + Transform OpenAI Agents message content, handling both string content and + list of content parts. + """ + if isinstance(content, str): + return content + + if isinstance(content, (list, tuple)): + transformed = [] + for item in content: + if isinstance(item, dict): + transformed.append(_transform_openai_agents_content_part(item)) + else: + transformed.append(item) + return transformed + + return content + + def _capture_exception(exc: "Any") -> None: set_span_errored() @@ -128,13 +257,15 @@ def _set_input_data( if "role" in message: normalized_role = normalize_message_role(message.get("role")) content = message.get("content") + # Transform content to handle multimodal data (images, audio, files) + transformed_content = _transform_openai_agents_message_content(content) request_messages.append( { "role": normalized_role, "content": ( - [{"type": "text", "text": content}] - if isinstance(content, str) - else content + [{"type": "text", "text": transformed_content}] + if isinstance(transformed_content, str) + else transformed_content ), } ) @@ -170,31 +301,13 @@ def _set_output_data(span: "sentry_sdk.tracing.Span", result: "Any") -> None: if not should_send_default_pii(): return - output_messages: "dict[str, list[Any]]" = { - "response": [], - "tool": [], - } + response_texts, tool_calls = extract_response_output(result.output) - for output in result.output: - if output.type == "function_call": - output_messages["tool"].append(output.dict()) - elif output.type == "message": - for output_message in output.content: - try: - output_messages["response"].append(output_message.text) - except AttributeError: - # Unknown output message type, just return the json - output_messages["response"].append(output_message.dict()) - - if len(output_messages["tool"]) > 0: - span.set_data( - SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS, safe_serialize(output_messages["tool"]) - ) + if len(tool_calls) > 0: + span.set_data(SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS, safe_serialize(tool_calls)) - if len(output_messages["response"]) > 0: - set_data_normalized( - span, SPANDATA.GEN_AI_RESPONSE_TEXT, output_messages["response"] - ) + if len(response_texts) > 0: + set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_TEXT, response_texts) def _create_mcp_execute_tool_spans( diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index 814289c887..2c1e32b1e4 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -17,6 +17,10 @@ from openai.types.chat import ChatCompletion, ChatCompletionMessage, ChatCompletionChunk from openai.types.chat.chat_completion import Choice from openai.types.chat.chat_completion_chunk import ChoiceDelta, Choice as DeltaChoice +from openai.types.chat.chat_completion_message_tool_call import ( + ChatCompletionMessageToolCall, + Function as ToolCallFunction, +) from openai.types.create_embedding_response import Usage as EmbeddingTokenUsage SKIP_RESPONSES_TESTS = False @@ -44,7 +48,7 @@ OpenAIIntegration, _calculate_token_usage, ) -from sentry_sdk.ai.utils import MAX_GEN_AI_MESSAGE_BYTES +from sentry_sdk.ai.utils import MAX_GEN_AI_MESSAGE_BYTES, transform_message_content from sentry_sdk._types import AnnotatedValue from sentry_sdk.serializer import serialize @@ -1509,6 +1513,127 @@ def test_openai_message_role_mapping(sentry_init, capture_events): assert "ai" not in roles +def test_transform_message_content_image_url_to_blob(): + """Test that OpenAI image_url message parts are correctly converted to blob format""" + content = [ + { + "text": "How many ponies do you see in the image?", + "type": "text", + }, + { + "type": "image_url", + "image_url": { + "url": "data:image/jpeg;base64,/9j/4AAQSkZJRg==", + "detail": "high", + }, + }, + ] + + converted = transform_message_content(content) + + assert isinstance(converted, list) + assert len(converted) == 2 + + # First item (text) should remain unchanged + assert converted[0] == { + "text": "How many ponies do you see in the image?", + "type": "text", + } + + # Second item (image_url) should be converted to blob format + blob_item = converted[1] + assert blob_item["type"] == "blob" + assert blob_item["modality"] == "image" + assert blob_item["mime_type"] == "image/jpeg" + assert blob_item["content"] == "/9j/4AAQSkZJRg==" + # Verify the original image_url structure is replaced + assert "image_url" not in blob_item + + +def test_transform_message_content_image_url_to_uri(): + """Test that OpenAI image_url with non-data URLs are converted to uri format""" + content = [ + { + "type": "image_url", + "image_url": { + "url": "https://example.com/image.jpg", + "detail": "low", + }, + }, + ] + + converted = transform_message_content(content) + + assert len(converted) == 1 + uri_item = converted[0] + assert uri_item["type"] == "uri" + assert uri_item["uri"] == "https://example.com/image.jpg" + # Verify the original image_url structure is replaced + assert "image_url" not in uri_item + + +def test_transform_message_content_malformed_data_uri(): + """Test that malformed data URIs are handled gracefully without crashing""" + content = [ + { + "type": "image_url", + "image_url": { + # Malformed: missing ;base64, and comma separator + "url": "data:image/jpeg", + }, + }, + ] + + # Should not raise an exception + converted = transform_message_content(content) + + assert len(converted) == 1 + # Malformed data URI should fall back to uri type + item = converted[0] + assert item["type"] == "uri" + assert item["uri"] == "data:image/jpeg" + assert item["modality"] == "image" + + +def test_transform_message_content_image_url_as_string(): + """Test that image_url as a string (instead of dict) is handled gracefully""" + content = [ + { + "type": "image_url", + # Some implementations pass image_url as a string directly + "image_url": "https://example.com/image.jpg", + }, + ] + + # Should not raise an exception + converted = transform_message_content(content) + + assert len(converted) == 1 + item = converted[0] + assert item["type"] == "uri" + assert item["modality"] == "image" + assert item["uri"] == "https://example.com/image.jpg" + + +def test_transform_message_content_image_url_as_string_data_uri(): + """Test that image_url as a data URI string is correctly converted to blob""" + content = [ + { + "type": "image_url", + "image_url": "data:image/png;base64,iVBORw0KGgo=", + }, + ] + + converted = transform_message_content(content) + + assert len(converted) == 1 + item = converted[0] + assert item["type"] == "blob" + assert item["modality"] == "image" + assert item["mime_type"] == "image/png" + assert item["content"] == "iVBORw0KGgo=" + + def test_openai_message_truncation(sentry_init, capture_events): """Test that large messages are truncated properly in OpenAI integration.""" sentry_init( @@ -1559,3 +1684,262 @@ def test_openai_message_truncation(sentry_init, capture_events): if SPANDATA.GEN_AI_REQUEST_MESSAGES in span_meta: messages_meta = span_meta[SPANDATA.GEN_AI_REQUEST_MESSAGES] assert "len" in messages_meta.get("", {}) + + +def test_response_text_is_string_not_dict(sentry_init, capture_events): + """Test that gen_ai.response.text is a string, not a message dict. + + With set_data_normalized, a single-element list is unpacked to the element, + so ["the model response"] becomes just "the model response". + """ + sentry_init( + integrations=[OpenAIIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + + client = OpenAI(api_key="z") + client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION) + + with start_transaction(name="openai tx"): + client.chat.completions.create( + model="some-model", messages=[{"role": "system", "content": "hello"}] + ) + + (event,) = events + span = event["spans"][0] + + # Verify response text is in span data + assert SPANDATA.GEN_AI_RESPONSE_TEXT in span["data"] + + response_text = span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + + # For a single response, set_data_normalized unpacks the list, so it's the string directly + assert isinstance(response_text, str) + assert response_text == "the model response" + + # Make sure it's NOT a JSON string containing a dict (the old buggy format) + # The old format was like '{"content": "...", "role": "assistant", ...}' + try: + parsed = json.loads(response_text) + # If it parses as JSON, it should NOT be a dict + assert not isinstance(parsed, dict), "Response text should not be a dict" + except json.JSONDecodeError: + # If it's not valid JSON, that's fine - it's just the raw string + pass + + +def test_chat_completion_with_tool_calls(sentry_init, capture_events): + """Test that tool calls are properly extracted to gen_ai.response.tool_calls.""" + sentry_init( + integrations=[OpenAIIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + + # Create a response with tool calls using proper OpenAI types + tool_call_response = ChatCompletion( + id="chat-id", + choices=[ + Choice( + index=0, + finish_reason="tool_calls", + message=ChatCompletionMessage( + role="assistant", + content=None, # Content is None when there are tool calls + tool_calls=[ + ChatCompletionMessageToolCall( + id="call_123", + type="function", + function=ToolCallFunction( + name="get_weather", + arguments='{"location": "Paris"}', + ), + ), + ], + ), + ) + ], + created=10000000, + model="response-model-id", + object="chat.completion", + usage=CompletionUsage( + completion_tokens=10, + prompt_tokens=20, + total_tokens=30, + ), + ) + + client = OpenAI(api_key="z") + client.chat.completions._post = mock.Mock(return_value=tool_call_response) + + with start_transaction(name="openai tx"): + client.chat.completions.create( + model="some-model", + messages=[{"role": "user", "content": "What's the weather in Paris?"}], + ) + + (event,) = events + span = event["spans"][0] + + # Response text should NOT be present when content is None + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + + # Tool calls should be extracted + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in span["data"] + tool_calls_data = span["data"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS] + + # Should be serialized as JSON + assert isinstance(tool_calls_data, str) + parsed_tool_calls = json.loads(tool_calls_data) + + assert isinstance(parsed_tool_calls, list) + assert len(parsed_tool_calls) == 1 + assert parsed_tool_calls[0]["id"] == "call_123" + assert parsed_tool_calls[0]["type"] == "function" + assert parsed_tool_calls[0]["function"]["name"] == "get_weather" + + +def test_chat_completion_with_content_and_tool_calls(sentry_init, capture_events): + """Test that both content and tool calls are captured when both are present.""" + sentry_init( + integrations=[OpenAIIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + + # Create a response with both content and tool calls using proper OpenAI types + response_with_both = ChatCompletion( + id="chat-id", + choices=[ + Choice( + index=0, + finish_reason="tool_calls", + message=ChatCompletionMessage( + role="assistant", + content="I'll check the weather for you.", + tool_calls=[ + ChatCompletionMessageToolCall( + id="call_456", + type="function", + function=ToolCallFunction( + name="get_weather", + arguments='{"location": "London"}', + ), + ), + ], + ), + ) + ], + created=10000000, + model="response-model-id", + object="chat.completion", + usage=CompletionUsage( + completion_tokens=15, + prompt_tokens=25, + total_tokens=40, + ), + ) + + client = OpenAI(api_key="z") + client.chat.completions._post = mock.Mock(return_value=response_with_both) + + with start_transaction(name="openai tx"): + client.chat.completions.create( + model="some-model", + messages=[{"role": "user", "content": "What's the weather in London?"}], + ) + + (event,) = events + span = event["spans"][0] + + # Both should be present + assert SPANDATA.GEN_AI_RESPONSE_TEXT in span["data"] + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in span["data"] + + # Verify response text - single element list gets unpacked to the element + response_text = span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + assert response_text == "I'll check the weather for you." + + # Verify tool calls - single element list gets unpacked, then re-serialized as JSON + tool_calls_data = span["data"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS] + assert isinstance(tool_calls_data, str) + tool_calls = json.loads(tool_calls_data) + assert isinstance(tool_calls, list) + assert len(tool_calls) == 1 + assert tool_calls[0]["function"]["name"] == "get_weather" + + +def test_chat_completion_multiple_choices(sentry_init, capture_events): + """Test that multiple choices are all captured in the response text.""" + sentry_init( + integrations=[OpenAIIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + + # Create a response with multiple choices + multi_choice_response = ChatCompletion( + id="chat-id", + choices=[ + Choice( + index=0, + finish_reason="stop", + message=ChatCompletionMessage( + role="assistant", content="Response option 1" + ), + ), + Choice( + index=1, + finish_reason="stop", + message=ChatCompletionMessage( + role="assistant", content="Response option 2" + ), + ), + Choice( + index=2, + finish_reason="stop", + message=ChatCompletionMessage( + role="assistant", content="Response option 3" + ), + ), + ], + created=10000000, + model="response-model-id", + object="chat.completion", + usage=CompletionUsage( + completion_tokens=30, + prompt_tokens=20, + total_tokens=50, + ), + ) + + client = OpenAI(api_key="z") + client.chat.completions._post = mock.Mock(return_value=multi_choice_response) + + with start_transaction(name="openai tx"): + client.chat.completions.create( + model="some-model", + messages=[{"role": "user", "content": "Give me options"}], + n=3, + ) + + (event,) = events + span = event["spans"][0] + + assert SPANDATA.GEN_AI_RESPONSE_TEXT in span["data"] + response_text = json.loads(span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]) + + # Should have all 3 responses as strings + assert len(response_text) == 3 + assert response_text[0] == "Response option 1" + assert response_text[1] == "Response option 2" + assert response_text[2] == "Response option 3" + + # All should be strings + for item in response_text: + assert isinstance(item, str) diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py index 9d463f8de5..74a800fb55 100644 --- a/tests/integrations/openai_agents/test_openai_agents.py +++ b/tests/integrations/openai_agents/test_openai_agents.py @@ -9,7 +9,12 @@ from sentry_sdk import start_span from sentry_sdk.consts import SPANDATA from sentry_sdk.integrations.openai_agents import OpenAIAgentsIntegration -from sentry_sdk.integrations.openai_agents.utils import _set_input_data, safe_serialize +from sentry_sdk.integrations.openai_agents.utils import ( + _set_input_data, + safe_serialize, + _transform_openai_agents_content_part, + _transform_openai_agents_message_content, +) from sentry_sdk.utils import parse_version from openai import AsyncOpenAI @@ -2123,3 +2128,162 @@ def test_openai_agents_message_truncation(sentry_init, capture_events): assert len(parsed_messages) == 2 assert "small message 4" in str(parsed_messages[0]) assert "small message 5" in str(parsed_messages[1]) + + +def test_transform_does_not_modify_original(): + """Test that transformation does not modify the original content.""" + import copy + + content_part = { + "type": "image_url", + "image_url": { + "url": "data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD", + "detail": "high", + }, + } + original = copy.deepcopy(content_part) + _transform_openai_agents_content_part(content_part) + assert content_part == original, "Original content_part should not be modified" + + content = [ + {"type": "text", "text": "What is in this image?"}, + { + "type": "image_url", + "image_url": { + "url": "data:image/png;base64,iVBORw0KGgoAAAANSUhEUg==", + }, + }, + ] + original_content = copy.deepcopy(content) + _transform_openai_agents_message_content(content) + assert content == original_content, "Original content list should not be modified" + + +def test_transform_handles_none_values(): + """Test that transformation handles None values gracefully without crashing.""" + # input_image with image_url explicitly set to None - should not crash + content_part = {"type": "input_image", "image_url": None} + result = _transform_openai_agents_content_part(content_part) + assert result == {"type": "uri", "modality": "image", "mime_type": "", "uri": ""} + + # image_url with nested dict set to None - should not crash + content_part = {"type": "image_url", "image_url": None} + result = _transform_openai_agents_content_part(content_part) + assert result == {"type": "uri", "modality": "image", "mime_type": "", "uri": ""} + + # input_audio with None value - gracefully returns empty blob + content_part = {"type": "input_audio", "input_audio": None} + result = _transform_openai_agents_content_part(content_part) + assert result == { + "type": "blob", + "modality": "audio", + "mime_type": "", + "content": "", + } + + # image_file with None value - gracefully returns empty file reference + content_part = {"type": "image_file", "image_file": None} + result = _transform_openai_agents_content_part(content_part) + assert result == { + "type": "file", + "modality": "image", + "mime_type": "", + "file_id": "", + } + + # file with None value - gracefully returns empty file reference + content_part = {"type": "file", "file": None} + result = _transform_openai_agents_content_part(content_part) + assert result == { + "type": "file", + "modality": "document", + "mime_type": "", + "file_id": "", + } + + +def test_transform_image_url_to_blob(): + """Test that OpenAI image_url with data URI is converted to blob format.""" + content_part = { + "type": "image_url", + "image_url": { + "url": "data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD", + "detail": "high", + }, + } + result = _transform_openai_agents_content_part(content_part) + assert result == { + "type": "blob", + "modality": "image", + "mime_type": "image/jpeg", + "content": "/9j/4AAQSkZJRgABAQAAAQABAAD", + } + + +def test_transform_image_url_to_uri(): + """Test that OpenAI image_url with HTTP URL is converted to uri format.""" + content_part = { + "type": "image_url", + "image_url": { + "url": "https://example.com/image.jpg", + "detail": "low", + }, + } + result = _transform_openai_agents_content_part(content_part) + assert result == { + "type": "uri", + "modality": "image", + "mime_type": "", + "uri": "https://example.com/image.jpg", + } + + +def test_transform_message_content_with_image(): + """Test that message content with image is properly transformed.""" + content = [ + {"type": "text", "text": "What is in this image?"}, + { + "type": "image_url", + "image_url": { + "url": "data:image/png;base64,iVBORw0KGgoAAAANSUhEUg==", + }, + }, + ] + result = _transform_openai_agents_message_content(content) + assert len(result) == 2 + assert result[0] == {"type": "text", "text": "What is in this image?"} + assert result[1] == { + "type": "blob", + "modality": "image", + "mime_type": "image/png", + "content": "iVBORw0KGgoAAAANSUhEUg==", + } + + +def test_transform_input_image_to_blob(): + """Test that OpenAI Agents SDK input_image format is converted to blob format.""" + # OpenAI Agents SDK uses input_image type with image_url as a direct string + content_part = { + "type": "input_image", + "image_url": "data:image/png;base64,iVBORw0KGgoAAAANSUhEUg==", + } + result = _transform_openai_agents_content_part(content_part) + assert result == { + "type": "blob", + "modality": "image", + "mime_type": "image/png", + "content": "iVBORw0KGgoAAAANSUhEUg==", + } + + +def test_transform_input_text_to_text(): + """Test that OpenAI Agents SDK input_text format is normalized to text format.""" + content_part = { + "type": "input_text", + "text": "Hello, world!", + } + result = _transform_openai_agents_content_part(content_part) + assert result == { + "type": "text", + "text": "Hello, world!", + } diff --git a/tests/test_ai_monitoring.py b/tests/test_ai_monitoring.py index 1ff354f473..f6852d54bb 100644 --- a/tests/test_ai_monitoring.py +++ b/tests/test_ai_monitoring.py @@ -19,6 +19,13 @@ _find_truncation_index, parse_data_uri, redact_blob_message_parts, + get_modality_from_mime_type, + transform_openai_content_part, + transform_anthropic_content_part, + transform_google_content_part, + transform_generic_content_part, + transform_content_part, + transform_message_content, ) from sentry_sdk.serializer import serialize from sentry_sdk.utils import safe_serialize @@ -842,3 +849,906 @@ def test_handles_uri_without_data_prefix(self): assert mime_type == "image/jpeg" assert content == "/9j/4AAQ" + + +class TestGetModalityFromMimeType: + def test_image_mime_types(self): + """Test that image MIME types return 'image' modality""" + assert get_modality_from_mime_type("image/jpeg") == "image" + assert get_modality_from_mime_type("image/png") == "image" + assert get_modality_from_mime_type("image/gif") == "image" + assert get_modality_from_mime_type("image/webp") == "image" + assert get_modality_from_mime_type("IMAGE/JPEG") == "image" # case insensitive + + def test_audio_mime_types(self): + """Test that audio MIME types return 'audio' modality""" + assert get_modality_from_mime_type("audio/mp3") == "audio" + assert get_modality_from_mime_type("audio/wav") == "audio" + assert get_modality_from_mime_type("audio/ogg") == "audio" + assert get_modality_from_mime_type("AUDIO/MP3") == "audio" # case insensitive + + def test_video_mime_types(self): + """Test that video MIME types return 'video' modality""" + assert get_modality_from_mime_type("video/mp4") == "video" + assert get_modality_from_mime_type("video/webm") == "video" + assert get_modality_from_mime_type("video/quicktime") == "video" + assert get_modality_from_mime_type("VIDEO/MP4") == "video" # case insensitive + + def test_document_mime_types(self): + """Test that application and text MIME types return 'document' modality""" + assert get_modality_from_mime_type("application/pdf") == "document" + assert get_modality_from_mime_type("application/json") == "document" + assert get_modality_from_mime_type("text/plain") == "document" + assert get_modality_from_mime_type("text/html") == "document" + + def test_empty_mime_type_returns_image(self): + """Test that empty MIME type defaults to 'image'""" + assert get_modality_from_mime_type("") == "image" + + def test_none_mime_type_returns_image(self): + """Test that None-like values default to 'image'""" + assert get_modality_from_mime_type(None) == "image" + + def test_unknown_mime_type_returns_image(self): + """Test that unknown MIME types default to 'image'""" + assert get_modality_from_mime_type("unknown/type") == "image" + assert get_modality_from_mime_type("custom/format") == "image" + + +class TestTransformOpenAIContentPart: + """Tests for the OpenAI-specific transform function.""" + + def test_image_url_with_data_uri(self): + """Test transforming OpenAI image_url with base64 data URI""" + content_part = { + "type": "image_url", + "image_url": {"url": "data:image/jpeg;base64,/9j/4AAQSkZJRg=="}, + } + result = transform_openai_content_part(content_part) + + assert result == { + "type": "blob", + "modality": "image", + "mime_type": "image/jpeg", + "content": "/9j/4AAQSkZJRg==", + } + + def test_image_url_with_regular_url(self): + """Test transforming OpenAI image_url with regular URL""" + content_part = { + "type": "image_url", + "image_url": {"url": "https://example.com/image.jpg"}, + } + result = transform_openai_content_part(content_part) + + assert result == { + "type": "uri", + "modality": "image", + "mime_type": "", + "uri": "https://example.com/image.jpg", + } + + def test_image_url_string_format(self): + """Test transforming OpenAI image_url where image_url is a string""" + content_part = { + "type": "image_url", + "image_url": "https://example.com/image.jpg", + } + result = transform_openai_content_part(content_part) + + assert result == { + "type": "uri", + "modality": "image", + "mime_type": "", + "uri": "https://example.com/image.jpg", + } + + def test_image_url_invalid_data_uri(self): + """Test transforming OpenAI image_url with invalid data URI falls back to URI""" + content_part = { + "type": "image_url", + "image_url": {"url": "data:image/jpeg;base64"}, # Missing comma + } + result = transform_openai_content_part(content_part) + + assert result == { + "type": "uri", + "modality": "image", + "mime_type": "", + "uri": "data:image/jpeg;base64", + } + + def test_empty_url_returns_none(self): + """Test that image_url with empty URL returns None""" + content_part = {"type": "image_url", "image_url": {"url": ""}} + assert transform_openai_content_part(content_part) is None + + def test_non_image_url_type_returns_none(self): + """Test that non-image_url types return None""" + content_part = {"type": "text", "text": "Hello"} + assert transform_openai_content_part(content_part) is None + + def test_anthropic_format_returns_none(self): + """Test that Anthropic format returns None (not handled)""" + content_part = { + "type": "image", + "source": {"type": "base64", "media_type": "image/png", "data": "abc"}, + } + assert transform_openai_content_part(content_part) is None + + def test_google_format_returns_none(self): + """Test that Google format returns None (not handled)""" + content_part = {"inline_data": {"mime_type": "image/jpeg", "data": "abc"}} + assert transform_openai_content_part(content_part) is None + + def test_non_dict_returns_none(self): + """Test that non-dict input returns None""" + assert transform_openai_content_part("string") is None + assert transform_openai_content_part(123) is None + assert transform_openai_content_part(None) is None + + +class TestTransformAnthropicContentPart: + """Tests for the Anthropic-specific transform function.""" + + def test_image_base64(self): + """Test transforming Anthropic image with base64 source""" + content_part = { + "type": "image", + "source": { + "type": "base64", + "media_type": "image/png", + "data": "iVBORw0KGgo=", + }, + } + result = transform_anthropic_content_part(content_part) + + assert result == { + "type": "blob", + "modality": "image", + "mime_type": "image/png", + "content": "iVBORw0KGgo=", + } + + def test_image_url(self): + """Test transforming Anthropic image with URL source""" + content_part = { + "type": "image", + "source": { + "type": "url", + "media_type": "image/jpeg", + "url": "https://example.com/image.jpg", + }, + } + result = transform_anthropic_content_part(content_part) + + assert result == { + "type": "uri", + "modality": "image", + "mime_type": "image/jpeg", + "uri": "https://example.com/image.jpg", + } + + def test_image_file(self): + """Test transforming Anthropic image with file source""" + content_part = { + "type": "image", + "source": { + "type": "file", + "media_type": "image/jpeg", + "file_id": "file_123", + }, + } + result = transform_anthropic_content_part(content_part) + + assert result == { + "type": "file", + "modality": "image", + "mime_type": "image/jpeg", + "file_id": "file_123", + } + + def test_document_base64(self): + """Test transforming Anthropic document with base64 source""" + content_part = { + "type": "document", + "source": { + "type": "base64", + "media_type": "application/pdf", + "data": "JVBERi0xLjQ=", + }, + } + result = transform_anthropic_content_part(content_part) + + assert result == { + "type": "blob", + "modality": "document", + "mime_type": "application/pdf", + "content": "JVBERi0xLjQ=", + } + + def test_document_url(self): + """Test transforming Anthropic document with URL source""" + content_part = { + "type": "document", + "source": { + "type": "url", + "media_type": "application/pdf", + "url": "https://example.com/doc.pdf", + }, + } + result = transform_anthropic_content_part(content_part) + + assert result == { + "type": "uri", + "modality": "document", + "mime_type": "application/pdf", + "uri": "https://example.com/doc.pdf", + } + + def test_invalid_source_returns_none(self): + """Test that Anthropic format with invalid source returns None""" + content_part = {"type": "image", "source": "not_a_dict"} + assert transform_anthropic_content_part(content_part) is None + + def test_unknown_source_type_returns_none(self): + """Test that Anthropic format with unknown source type returns None""" + content_part = { + "type": "image", + "source": {"type": "unknown", "data": "something"}, + } + assert transform_anthropic_content_part(content_part) is None + + def test_missing_source_returns_none(self): + """Test that Anthropic format without source returns None""" + content_part = {"type": "image", "data": "something"} + assert transform_anthropic_content_part(content_part) is None + + def test_openai_format_returns_none(self): + """Test that OpenAI format returns None (not handled)""" + content_part = { + "type": "image_url", + "image_url": {"url": "https://example.com"}, + } + assert transform_anthropic_content_part(content_part) is None + + def test_google_format_returns_none(self): + """Test that Google format returns None (not handled)""" + content_part = {"inline_data": {"mime_type": "image/jpeg", "data": "abc"}} + assert transform_anthropic_content_part(content_part) is None + + def test_non_dict_returns_none(self): + """Test that non-dict input returns None""" + assert transform_anthropic_content_part("string") is None + assert transform_anthropic_content_part(123) is None + assert transform_anthropic_content_part(None) is None + + +class TestTransformGoogleContentPart: + """Tests for the Google GenAI-specific transform function.""" + + def test_inline_data(self): + """Test transforming Google inline_data format""" + content_part = { + "inline_data": { + "mime_type": "image/jpeg", + "data": "/9j/4AAQSkZJRg==", + } + } + result = transform_google_content_part(content_part) + + assert result == { + "type": "blob", + "modality": "image", + "mime_type": "image/jpeg", + "content": "/9j/4AAQSkZJRg==", + } + + def test_file_data(self): + """Test transforming Google file_data format""" + content_part = { + "file_data": { + "mime_type": "video/mp4", + "file_uri": "gs://bucket/video.mp4", + } + } + result = transform_google_content_part(content_part) + + assert result == { + "type": "uri", + "modality": "video", + "mime_type": "video/mp4", + "uri": "gs://bucket/video.mp4", + } + + def test_inline_data_audio(self): + """Test transforming Google inline_data with audio""" + content_part = { + "inline_data": { + "mime_type": "audio/wav", + "data": "UklGRiQA", + } + } + result = transform_google_content_part(content_part) + + assert result == { + "type": "blob", + "modality": "audio", + "mime_type": "audio/wav", + "content": "UklGRiQA", + } + + def test_inline_data_not_dict_returns_none(self): + """Test that Google inline_data with non-dict value returns None""" + content_part = {"inline_data": "not_a_dict"} + assert transform_google_content_part(content_part) is None + + def test_file_data_not_dict_returns_none(self): + """Test that Google file_data with non-dict value returns None""" + content_part = {"file_data": "not_a_dict"} + assert transform_google_content_part(content_part) is None + + def test_openai_format_returns_none(self): + """Test that OpenAI format returns None (not handled)""" + content_part = { + "type": "image_url", + "image_url": {"url": "https://example.com"}, + } + assert transform_google_content_part(content_part) is None + + def test_anthropic_format_returns_none(self): + """Test that Anthropic format returns None (not handled)""" + content_part = { + "type": "image", + "source": {"type": "base64", "media_type": "image/png", "data": "abc"}, + } + assert transform_google_content_part(content_part) is None + + def test_non_dict_returns_none(self): + """Test that non-dict input returns None""" + assert transform_google_content_part("string") is None + assert transform_google_content_part(123) is None + assert transform_google_content_part(None) is None + + +class TestTransformGenericContentPart: + """Tests for the generic/LangChain-style transform function.""" + + def test_image_base64(self): + """Test transforming generic format with base64""" + content_part = { + "type": "image", + "base64": "/9j/4AAQSkZJRg==", + "mime_type": "image/jpeg", + } + result = transform_generic_content_part(content_part) + + assert result == { + "type": "blob", + "modality": "image", + "mime_type": "image/jpeg", + "content": "/9j/4AAQSkZJRg==", + } + + def test_audio_url(self): + """Test transforming generic format with URL""" + content_part = { + "type": "audio", + "url": "https://example.com/audio.mp3", + "mime_type": "audio/mp3", + } + result = transform_generic_content_part(content_part) + + assert result == { + "type": "uri", + "modality": "audio", + "mime_type": "audio/mp3", + "uri": "https://example.com/audio.mp3", + } + + def test_file_with_file_id(self): + """Test transforming generic format with file_id""" + content_part = { + "type": "file", + "file_id": "file_456", + "mime_type": "application/pdf", + } + result = transform_generic_content_part(content_part) + + assert result == { + "type": "file", + "modality": "document", + "mime_type": "application/pdf", + "file_id": "file_456", + } + + def test_video_base64(self): + """Test transforming generic video format""" + content_part = { + "type": "video", + "base64": "AAAA", + "mime_type": "video/mp4", + } + result = transform_generic_content_part(content_part) + + assert result == { + "type": "blob", + "modality": "video", + "mime_type": "video/mp4", + "content": "AAAA", + } + + def test_image_with_source_returns_none(self): + """Test that image with source key (Anthropic style) returns None""" + # This is Anthropic format, should NOT be handled by generic + content_part = { + "type": "image", + "source": {"type": "base64", "data": "abc"}, + } + assert transform_generic_content_part(content_part) is None + + def test_text_type_returns_none(self): + """Test that text type returns None""" + content_part = {"type": "text", "text": "Hello"} + assert transform_generic_content_part(content_part) is None + + def test_openai_format_returns_none(self): + """Test that OpenAI format returns None (not handled)""" + content_part = { + "type": "image_url", + "image_url": {"url": "https://example.com"}, + } + assert transform_generic_content_part(content_part) is None + + def test_google_format_returns_none(self): + """Test that Google format returns None (not handled)""" + content_part = {"inline_data": {"mime_type": "image/jpeg", "data": "abc"}} + assert transform_generic_content_part(content_part) is None + + def test_non_dict_returns_none(self): + """Test that non-dict input returns None""" + assert transform_generic_content_part("string") is None + assert transform_generic_content_part(123) is None + assert transform_generic_content_part(None) is None + + def test_missing_data_key_returns_none(self): + """Test that missing data key (base64/url/file_id) returns None""" + content_part = {"type": "image", "mime_type": "image/jpeg"} + assert transform_generic_content_part(content_part) is None + + +class TestTransformContentPart: + # OpenAI/LiteLLM format tests + def test_openai_image_url_with_data_uri(self): + """Test transforming OpenAI image_url with base64 data URI""" + content_part = { + "type": "image_url", + "image_url": {"url": "data:image/jpeg;base64,/9j/4AAQSkZJRg=="}, + } + result = transform_content_part(content_part) + + assert result == { + "type": "blob", + "modality": "image", + "mime_type": "image/jpeg", + "content": "/9j/4AAQSkZJRg==", + } + + def test_openai_image_url_with_regular_url(self): + """Test transforming OpenAI image_url with regular URL""" + content_part = { + "type": "image_url", + "image_url": {"url": "https://example.com/image.jpg"}, + } + result = transform_content_part(content_part) + + assert result == { + "type": "uri", + "modality": "image", + "mime_type": "", + "uri": "https://example.com/image.jpg", + } + + def test_openai_image_url_string_format(self): + """Test transforming OpenAI image_url where image_url is a string""" + content_part = { + "type": "image_url", + "image_url": "https://example.com/image.jpg", + } + result = transform_content_part(content_part) + + assert result == { + "type": "uri", + "modality": "image", + "mime_type": "", + "uri": "https://example.com/image.jpg", + } + + def test_openai_image_url_invalid_data_uri(self): + """Test transforming OpenAI image_url with invalid data URI falls back to URI""" + content_part = { + "type": "image_url", + "image_url": {"url": "data:image/jpeg;base64"}, # Missing comma + } + result = transform_content_part(content_part) + + assert result == { + "type": "uri", + "modality": "image", + "mime_type": "", + "uri": "data:image/jpeg;base64", + } + + # Anthropic format tests + def test_anthropic_image_base64(self): + """Test transforming Anthropic image with base64 source""" + content_part = { + "type": "image", + "source": { + "type": "base64", + "media_type": "image/png", + "data": "iVBORw0KGgo=", + }, + } + result = transform_content_part(content_part) + + assert result == { + "type": "blob", + "modality": "image", + "mime_type": "image/png", + "content": "iVBORw0KGgo=", + } + + def test_anthropic_image_url(self): + """Test transforming Anthropic image with URL source""" + content_part = { + "type": "image", + "source": { + "type": "url", + "media_type": "image/jpeg", + "url": "https://example.com/image.jpg", + }, + } + result = transform_content_part(content_part) + + assert result == { + "type": "uri", + "modality": "image", + "mime_type": "image/jpeg", + "uri": "https://example.com/image.jpg", + } + + def test_anthropic_image_file(self): + """Test transforming Anthropic image with file source""" + content_part = { + "type": "image", + "source": { + "type": "file", + "media_type": "image/jpeg", + "file_id": "file_123", + }, + } + result = transform_content_part(content_part) + + assert result == { + "type": "file", + "modality": "image", + "mime_type": "image/jpeg", + "file_id": "file_123", + } + + def test_anthropic_document_base64(self): + """Test transforming Anthropic document with base64 source""" + content_part = { + "type": "document", + "source": { + "type": "base64", + "media_type": "application/pdf", + "data": "JVBERi0xLjQ=", + }, + } + result = transform_content_part(content_part) + + assert result == { + "type": "blob", + "modality": "document", + "mime_type": "application/pdf", + "content": "JVBERi0xLjQ=", + } + + def test_anthropic_document_url(self): + """Test transforming Anthropic document with URL source""" + content_part = { + "type": "document", + "source": { + "type": "url", + "media_type": "application/pdf", + "url": "https://example.com/doc.pdf", + }, + } + result = transform_content_part(content_part) + + assert result == { + "type": "uri", + "modality": "document", + "mime_type": "application/pdf", + "uri": "https://example.com/doc.pdf", + } + + # Google format tests + def test_google_inline_data(self): + """Test transforming Google inline_data format""" + content_part = { + "inline_data": { + "mime_type": "image/jpeg", + "data": "/9j/4AAQSkZJRg==", + } + } + result = transform_content_part(content_part) + + assert result == { + "type": "blob", + "modality": "image", + "mime_type": "image/jpeg", + "content": "/9j/4AAQSkZJRg==", + } + + def test_google_file_data(self): + """Test transforming Google file_data format""" + content_part = { + "file_data": { + "mime_type": "video/mp4", + "file_uri": "gs://bucket/video.mp4", + } + } + result = transform_content_part(content_part) + + assert result == { + "type": "uri", + "modality": "video", + "mime_type": "video/mp4", + "uri": "gs://bucket/video.mp4", + } + + def test_google_inline_data_audio(self): + """Test transforming Google inline_data with audio""" + content_part = { + "inline_data": { + "mime_type": "audio/wav", + "data": "UklGRiQA", + } + } + result = transform_content_part(content_part) + + assert result == { + "type": "blob", + "modality": "audio", + "mime_type": "audio/wav", + "content": "UklGRiQA", + } + + # Generic format tests (LangChain style) + def test_generic_image_base64(self): + """Test transforming generic format with base64""" + content_part = { + "type": "image", + "base64": "/9j/4AAQSkZJRg==", + "mime_type": "image/jpeg", + } + result = transform_content_part(content_part) + + assert result == { + "type": "blob", + "modality": "image", + "mime_type": "image/jpeg", + "content": "/9j/4AAQSkZJRg==", + } + + def test_generic_audio_url(self): + """Test transforming generic format with URL""" + content_part = { + "type": "audio", + "url": "https://example.com/audio.mp3", + "mime_type": "audio/mp3", + } + result = transform_content_part(content_part) + + assert result == { + "type": "uri", + "modality": "audio", + "mime_type": "audio/mp3", + "uri": "https://example.com/audio.mp3", + } + + def test_generic_file_with_file_id(self): + """Test transforming generic format with file_id""" + content_part = { + "type": "file", + "file_id": "file_456", + "mime_type": "application/pdf", + } + result = transform_content_part(content_part) + + assert result == { + "type": "file", + "modality": "document", + "mime_type": "application/pdf", + "file_id": "file_456", + } + + def test_generic_video_base64(self): + """Test transforming generic video format""" + content_part = { + "type": "video", + "base64": "AAAA", + "mime_type": "video/mp4", + } + result = transform_content_part(content_part) + + assert result == { + "type": "blob", + "modality": "video", + "mime_type": "video/mp4", + "content": "AAAA", + } + + # Edge cases and error handling + def test_text_block_returns_none(self): + """Test that text blocks return None (not transformed)""" + content_part = {"type": "text", "text": "Hello world"} + result = transform_content_part(content_part) + + assert result is None + + def test_non_dict_returns_none(self): + """Test that non-dict input returns None""" + assert transform_content_part("string") is None + assert transform_content_part(123) is None + assert transform_content_part(None) is None + assert transform_content_part([1, 2, 3]) is None + + def test_empty_dict_returns_none(self): + """Test that empty dict returns None""" + assert transform_content_part({}) is None + + def test_unknown_type_returns_none(self): + """Test that unknown type returns None""" + content_part = {"type": "unknown", "data": "something"} + assert transform_content_part(content_part) is None + + def test_openai_image_url_empty_url_returns_none(self): + """Test that image_url with empty URL returns None""" + content_part = {"type": "image_url", "image_url": {"url": ""}} + assert transform_content_part(content_part) is None + + def test_anthropic_invalid_source_returns_none(self): + """Test that Anthropic format with invalid source returns None""" + content_part = {"type": "image", "source": "not_a_dict"} + assert transform_content_part(content_part) is None + + def test_anthropic_unknown_source_type_returns_none(self): + """Test that Anthropic format with unknown source type returns None""" + content_part = { + "type": "image", + "source": {"type": "unknown", "data": "something"}, + } + assert transform_content_part(content_part) is None + + def test_google_inline_data_not_dict_returns_none(self): + """Test that Google inline_data with non-dict value returns None""" + content_part = {"inline_data": "not_a_dict"} + assert transform_content_part(content_part) is None + + def test_google_file_data_not_dict_returns_none(self): + """Test that Google file_data with non-dict value returns None""" + content_part = {"file_data": "not_a_dict"} + assert transform_content_part(content_part) is None + + +class TestTransformMessageContent: + def test_string_content_returned_as_is(self): + """Test that string content is returned unchanged""" + content = "Hello, world!" + result = transform_message_content(content) + + assert result == "Hello, world!" + + def test_list_with_transformable_items(self): + """Test transforming a list with transformable content parts""" + content = [ + {"type": "text", "text": "What's in this image?"}, + { + "type": "image_url", + "image_url": {"url": "data:image/jpeg;base64,/9j/4AAQ"}, + }, + ] + result = transform_message_content(content) + + assert len(result) == 2 + # Text block should be unchanged (transform returns None, so original kept) + assert result[0] == {"type": "text", "text": "What's in this image?"} + # Image should be transformed + assert result[1] == { + "type": "blob", + "modality": "image", + "mime_type": "image/jpeg", + "content": "/9j/4AAQ", + } + + def test_list_with_non_dict_items(self): + """Test that non-dict items in list are kept as-is""" + content = ["text string", 123, {"type": "text", "text": "hi"}] + result = transform_message_content(content) + + assert result == ["text string", 123, {"type": "text", "text": "hi"}] + + def test_tuple_content(self): + """Test that tuple content is also handled""" + content = ( + {"type": "text", "text": "Hello"}, + { + "type": "image_url", + "image_url": {"url": "https://example.com/img.jpg"}, + }, + ) + result = transform_message_content(content) + + assert len(result) == 2 + assert result[0] == {"type": "text", "text": "Hello"} + assert result[1] == { + "type": "uri", + "modality": "image", + "mime_type": "", + "uri": "https://example.com/img.jpg", + } + + def test_other_types_returned_as_is(self): + """Test that other types are returned unchanged""" + assert transform_message_content(123) == 123 + assert transform_message_content(None) is None + assert transform_message_content({"key": "value"}) == {"key": "value"} + + def test_mixed_content_types(self): + """Test transforming mixed content with multiple formats""" + content = [ + {"type": "text", "text": "Look at these:"}, + { + "type": "image_url", + "image_url": {"url": "data:image/png;base64,iVBORw0"}, + }, + { + "type": "image", + "source": { + "type": "base64", + "media_type": "image/jpeg", + "data": "/9j/4AAQ", + }, + }, + {"inline_data": {"mime_type": "audio/wav", "data": "UklGRiQA"}}, + ] + result = transform_message_content(content) + + assert len(result) == 4 + assert result[0] == {"type": "text", "text": "Look at these:"} + assert result[1] == { + "type": "blob", + "modality": "image", + "mime_type": "image/png", + "content": "iVBORw0", + } + assert result[2] == { + "type": "blob", + "modality": "image", + "mime_type": "image/jpeg", + "content": "/9j/4AAQ", + } + assert result[3] == { + "type": "blob", + "modality": "audio", + "mime_type": "audio/wav", + "content": "UklGRiQA", + } + + def test_empty_list(self): + """Test that empty list is returned as empty list""" + assert transform_message_content([]) == [] diff --git a/uv.lock b/uv.lock new file mode 100644 index 0000000000..bda0207302 --- /dev/null +++ b/uv.lock @@ -0,0 +1,3 @@ +version = 1 +revision = 3 +requires-python = ">=3.13"