diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py
index 71f7544a1c..e9966b48b6 100644
--- a/sentry_sdk/ai/utils.py
+++ b/sentry_sdk/ai/utils.py
@@ -72,6 +72,397 @@ def parse_data_uri(url: str) -> "Tuple[str, str]":
     return mime_type, content
 
 
+def get_modality_from_mime_type(mime_type: str) -> str:
+    """
+    Infer the content modality from a MIME type string.
+
+    Args:
+        mime_type: A MIME type string (e.g., "image/jpeg", "audio/mp3")
+
+    Returns:
+        One of: "image", "audio", "video", or "document"
+        Defaults to "image" for unknown or empty MIME types.
+
+    Examples:
+        "image/jpeg" -> "image"
+        "audio/mp3" -> "audio"
+        "video/mp4" -> "video"
+        "application/pdf" -> "document"
+        "text/plain" -> "document"
+    """
+    if not mime_type:
+        return "image"  # Default fallback
+
+    mime_lower = mime_type.lower()
+    if mime_lower.startswith("image/"):
+        return "image"
+    elif mime_lower.startswith("audio/"):
+        return "audio"
+    elif mime_lower.startswith("video/"):
+        return "video"
+    elif mime_lower.startswith("application/") or mime_lower.startswith("text/"):
+        return "document"
+    else:
+        return "image"  # Default fallback for unknown types
+
+
+def transform_openai_content_part(
+    content_part: "Dict[str, Any]",
+) -> "Optional[Dict[str, Any]]":
+    """
+    Transform an OpenAI/LiteLLM content part to Sentry's standardized format.
+
+    This handles the OpenAI image_url format used by OpenAI and LiteLLM SDKs.
+
+    Input format:
+    - {"type": "image_url", "image_url": {"url": "..."}}
+    - {"type": "image_url", "image_url": "..."} (string shorthand)
+
+    Output format (one of):
+    - {"type": "blob", "modality": "image", "mime_type": "...", "content": "..."}
+    - {"type": "uri", "modality": "image", "mime_type": "", "uri": "..."}
+
+    Args:
+        content_part: A dictionary representing a content part from OpenAI/LiteLLM
+
+    Returns:
+        A transformed dictionary in standardized format, or None if the format
+        is not OpenAI image_url format or transformation fails.
+    """
+    if not isinstance(content_part, dict):
+        return None
+
+    block_type = content_part.get("type")
+
+    if block_type != "image_url":
+        return None
+
+    image_url_data = content_part.get("image_url")
+    if isinstance(image_url_data, str):
+        url = image_url_data
+    elif isinstance(image_url_data, dict):
+        url = image_url_data.get("url", "")
+    else:
+        return None
+
+    if not url:
+        return None
+
+    # Check if it's a data URI (base64 encoded)
+    if url.startswith("data:"):
+        try:
+            mime_type, content = parse_data_uri(url)
+            return {
+                "type": "blob",
+                "modality": get_modality_from_mime_type(mime_type),
+                "mime_type": mime_type,
+                "content": content,
+            }
+        except ValueError:
+            # If parsing fails, return as URI
+            return {
+                "type": "uri",
+                "modality": "image",
+                "mime_type": "",
+                "uri": url,
+            }
+    else:
+        # Regular URL
+        return {
+            "type": "uri",
+            "modality": "image",
+            "mime_type": "",
+            "uri": url,
+        }
+
+
+def transform_anthropic_content_part(
+    content_part: "Dict[str, Any]",
+) -> "Optional[Dict[str, Any]]":
+    """
+    Transform an Anthropic content part to Sentry's standardized format.
+
+    This handles the Anthropic image and document formats with source dictionaries.
+
+    Input format:
+    - {"type": "image", "source": {"type": "base64", "media_type": "...", "data": "..."}}
+    - {"type": "image", "source": {"type": "url", "media_type": "...", "url": "..."}}
+    - {"type": "image", "source": {"type": "file", "media_type": "...", "file_id": "..."}}
+    - {"type": "document", "source": {...}} (same source formats)
+
+    Output format (one of):
+    - {"type": "blob", "modality": "...", "mime_type": "...", "content": "..."}
+    - {"type": "uri", "modality": "...", "mime_type": "...", "uri": "..."}
+    - {"type": "file", "modality": "...", "mime_type": "...", "file_id": "..."}
+
+    Args:
+        content_part: A dictionary representing a content part from Anthropic
+
+    Returns:
+        A transformed dictionary in standardized format, or None if the format
+        is not Anthropic format or transformation fails.
+    """
+    if not isinstance(content_part, dict):
+        return None
+
+    block_type = content_part.get("type")
+
+    if block_type not in ("image", "document") or "source" not in content_part:
+        return None
+
+    source = content_part.get("source")
+    if not isinstance(source, dict):
+        return None
+
+    source_type = source.get("type")
+    media_type = source.get("media_type", "")
+    modality = (
+        "document"
+        if block_type == "document"
+        else get_modality_from_mime_type(media_type)
+    )
+
+    if source_type == "base64":
+        return {
+            "type": "blob",
+            "modality": modality,
+            "mime_type": media_type,
+            "content": source.get("data", ""),
+        }
+    elif source_type == "url":
+        return {
+            "type": "uri",
+            "modality": modality,
+            "mime_type": media_type,
+            "uri": source.get("url", ""),
+        }
+    elif source_type == "file":
+        return {
+            "type": "file",
+            "modality": modality,
+            "mime_type": media_type,
+            "file_id": source.get("file_id", ""),
+        }
+
+    return None
+
+
+def transform_google_content_part(
+    content_part: "Dict[str, Any]",
+) -> "Optional[Dict[str, Any]]":
+    """
+    Transform a Google GenAI content part to Sentry's standardized format.
+
+    This handles the Google GenAI inline_data and file_data formats.
+
+    Input format:
+    - {"inline_data": {"mime_type": "...", "data": "..."}}
+    - {"file_data": {"mime_type": "...", "file_uri": "..."}}
+
+    Output format (one of):
+    - {"type": "blob", "modality": "...", "mime_type": "...", "content": "..."}
+    - {"type": "uri", "modality": "...", "mime_type": "...", "uri": "..."}
+
+    Args:
+        content_part: A dictionary representing a content part from Google GenAI
+
+    Returns:
+        A transformed dictionary in standardized format, or None if the format
+        is not Google format or transformation fails.
+    """
+    if not isinstance(content_part, dict):
+        return None
+
+    # Handle Google inline_data format
+    if "inline_data" in content_part:
+        inline_data = content_part.get("inline_data")
+        if isinstance(inline_data, dict):
+            mime_type = inline_data.get("mime_type", "")
+            return {
+                "type": "blob",
+                "modality": get_modality_from_mime_type(mime_type),
+                "mime_type": mime_type,
+                "content": inline_data.get("data", ""),
+            }
+        return None
+
+    # Handle Google file_data format
+    if "file_data" in content_part:
+        file_data = content_part.get("file_data")
+        if isinstance(file_data, dict):
+            mime_type = file_data.get("mime_type", "")
+            return {
+                "type": "uri",
+                "modality": get_modality_from_mime_type(mime_type),
+                "mime_type": mime_type,
+                "uri": file_data.get("file_uri", ""),
+            }
+        return None
+
+    return None
+
+
+def transform_generic_content_part(
+    content_part: "Dict[str, Any]",
+) -> "Optional[Dict[str, Any]]":
+    """
+    Transform a generic/LangChain-style content part to Sentry's standardized format.
+
+    This handles generic formats where the type indicates the modality and
+    the data is provided via direct base64, url, or file_id fields.
+
+    Input format:
+    - {"type": "image", "base64": "...", "mime_type": "..."}
+    - {"type": "audio", "url": "...", "mime_type": "..."}
+    - {"type": "video", "base64": "...", "mime_type": "..."}
+    - {"type": "file", "file_id": "...", "mime_type": "..."}
+
+    Output format (one of):
+    - {"type": "blob", "modality": "...", "mime_type": "...", "content": "..."}
+    - {"type": "uri", "modality": "...", "mime_type": "...", "uri": "..."}
+    - {"type": "file", "modality": "...", "mime_type": "...", "file_id": "..."}
+
+    Args:
+        content_part: A dictionary representing a content part in generic format
+
+    Returns:
+        A transformed dictionary in standardized format, or None if the format
+        is not generic format or transformation fails.
+    """
+    if not isinstance(content_part, dict):
+        return None
+
+    block_type = content_part.get("type")
+
+    if block_type not in ("image", "audio", "video", "file"):
+        return None
+
+    # Ensure it's not Anthropic format (which also uses type: "image")
+    if "source" in content_part:
+        return None
+
+    mime_type = content_part.get("mime_type", "")
+    modality = block_type if block_type != "file" else "document"
+
+    # Check for base64 encoded content
+    if "base64" in content_part:
+        return {
+            "type": "blob",
+            "modality": modality,
+            "mime_type": mime_type,
+            "content": content_part.get("base64", ""),
+        }
+    # Check for URL reference
+    elif "url" in content_part:
+        return {
+            "type": "uri",
+            "modality": modality,
+            "mime_type": mime_type,
+            "uri": content_part.get("url", ""),
+        }
+    # Check for file_id reference
+    elif "file_id" in content_part:
+        return {
+            "type": "file",
+            "modality": modality,
+            "mime_type": mime_type,
+            "file_id": content_part.get("file_id", ""),
+        }
+
+    return None
+
+
+def transform_content_part(
+    content_part: "Dict[str, Any]",
+) -> "Optional[Dict[str, Any]]":
+    """
+    Transform a content part from various AI SDK formats to Sentry's standardized format.
+
+    This is a heuristic dispatcher that detects the format and delegates to the
+    appropriate SDK-specific transformer. For direct SDK integration, prefer using
+    the specific transformers directly:
+    - transform_openai_content_part() for OpenAI/LiteLLM
+    - transform_anthropic_content_part() for Anthropic
+    - transform_google_content_part() for Google GenAI
+    - transform_generic_content_part() for LangChain and other generic formats
+
+    Detection order:
+    1. OpenAI: type == "image_url"
+    2. Google: "inline_data" or "file_data" keys present
+    3. Anthropic: type in ("image", "document") with "source" key
+    4. Generic: type in ("image", "audio", "video", "file") with base64/url/file_id
+
+    Output format (one of):
+    - {"type": "blob", "modality": "...", "mime_type": "...", "content": "..."}
+    - {"type": "uri", "modality": "...", "mime_type": "...", "uri": "..."}
+    - {"type": "file", "modality": "...", "mime_type": "...", "file_id": "..."}
+
+    Args:
+        content_part: A dictionary representing a content part from an AI SDK
+
+    Returns:
+        A transformed dictionary in standardized format, or None if the format
+        is unrecognized or transformation fails.
+    """
+    if not isinstance(content_part, dict):
+        return None
+
+    # Try OpenAI format first (most common, clear indicator)
+    result = transform_openai_content_part(content_part)
+    if result is not None:
+        return result
+
+    # Try Google format (unique keys make it easy to detect)
+    result = transform_google_content_part(content_part)
+    if result is not None:
+        return result
+
+    # Try Anthropic format (has "source" key)
+    result = transform_anthropic_content_part(content_part)
+    if result is not None:
+        return result
+
+    # Try generic format as fallback
+    result = transform_generic_content_part(content_part)
+    if result is not None:
+        return result
+
+    # Unrecognized format
+    return None
+
+
+def transform_message_content(content: "Any") -> "Any":
+    """
+    Transform message content, handling both string content and list of content blocks.
+
+    For list content, each item is transformed using transform_content_part().
+    Items that cannot be transformed (return None) are kept as-is.
+
+    Args:
+        content: Message content - can be a string, list of content blocks, or other
+
+    Returns:
+        - String content: returned as-is
+        - List content: list with each transformable item converted to standardized format
+        - Other: returned as-is
+    """
+    if isinstance(content, str):
+        return content
+
+    if isinstance(content, (list, tuple)):
+        transformed = []
+        for item in content:
+            if isinstance(item, dict):
+                result = transform_content_part(item)
+                # If transformation succeeded, use the result; otherwise keep original
+                transformed.append(result if result is not None else item)
+            else:
+                transformed.append(item)
+        return transformed
+
+    return content
+
+
 def _normalize_data(data: "Any", unpack: bool = True) -> "Any":
     # convert pydantic data (e.g. OpenAI v1+) to json compatible format
     if hasattr(data, "model_dump"):
@@ -107,6 +498,46 @@ def set_data_normalized(
         span.set_data(key, json.dumps(normalized))
 
 
+def extract_response_output(
+    output_items: "Any",
+) -> "Tuple[List[Any], List[Dict[str, Any]]]":
+    """
+    Extract response text and tool calls from OpenAI Responses API output.
+
+    This handles the output format from OpenAI's Responses API where each output
+    item has a `type` field that can be "message" or "function_call".
+
+    Args:
+        output_items: Iterable of output items from the response
+
+    Returns:
+        Tuple of (response_texts, tool_calls) where:
+        - response_texts: List of text strings or dicts for unknown message types
+        - tool_calls: List of tool call dicts
+    """
+    response_texts = []  # type: List[Any]
+    tool_calls = []  # type: List[Dict[str, Any]]
+
+    for output in output_items:
+        if output.type == "function_call":
+            if hasattr(output, "model_dump"):
+                tool_calls.append(output.model_dump())
+            elif hasattr(output, "dict"):
+                tool_calls.append(output.dict())
+        elif output.type == "message":
+            for output_message in output.content:
+                try:
+                    response_texts.append(output_message.text)
+                except AttributeError:
+                    # Unknown output message type, just return the json
+                    if hasattr(output_message, "model_dump"):
+                        response_texts.append(output_message.model_dump())
+                    elif hasattr(output_message, "dict"):
+                        response_texts.append(output_message.dict())
+
+    return response_texts, tool_calls
+
+
 def normalize_message_role(role: str) -> str:
     """
     Normalize a message role to one of the 4 allowed gen_ai role values.
diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py
index 66dc4a1c48..2b016be374 100644
--- a/sentry_sdk/integrations/openai.py
+++ b/sentry_sdk/integrations/openai.py
@@ -5,8 +5,10 @@
 from sentry_sdk import consts
 from sentry_sdk.ai.monitoring import record_token_usage
 from sentry_sdk.ai.utils import (
+    extract_response_output,
     set_data_normalized,
     normalize_message_roles,
+    transform_openai_content_part,
     truncate_and_annotate_messages,
 )
 from sentry_sdk.consts import SPANDATA
@@ -203,6 +205,21 @@ def _set_input_data(
         and integration.include_prompts
     ):
         normalized_messages = normalize_message_roles(messages)
+        # Transform content parts to standardized format using OpenAI-specific transformer
+        for message in normalized_messages:
+            if isinstance(message, dict) and "content" in message:
+                content = message["content"]
+                if isinstance(content, (list, tuple)):
+                    transformed = []
+                    for item in content:
+                        if isinstance(item, dict):
+                            result = transform_openai_content_part(item)
+                            # If transformation succeeded, use the result; otherwise keep original
+                            transformed.append(result if result is not None else item)
+                        else:
+                            transformed.append(item)
+                    message["content"] = transformed
+
         scope = sentry_sdk.get_current_scope()
         messages_data = truncate_and_annotate_messages(normalized_messages, span, scope)
         if messages_data is not None:
@@ -265,14 +282,45 @@ def _set_output_data(
 
     if hasattr(response, "choices"):
         if should_send_default_pii() and integration.include_prompts:
-            response_text = [
-                choice.message.model_dump()
-                for choice in response.choices
-                if choice.message is not None
-            ]
+            response_text = []  # type: list[str]
+            tool_calls = []  # type: list[Any]
+
+            for choice in response.choices:
+                if choice.message is None:
+                    continue
+
+                # Extract text content
+                content = getattr(choice.message, "content", None)
+                if content is not None:
+                    response_text.append(content)
+
+                # Extract audio transcript if available
+                audio = getattr(choice.message, "audio", None)
+                if audio is not None:
+                    transcript = getattr(audio, "transcript", None)
+                    if transcript is not None:
+                        response_text.append(transcript)
+
+                # Extract tool calls
+                message_tool_calls = getattr(choice.message, "tool_calls", None)
+                if message_tool_calls is not None:
+                    for tool_call in message_tool_calls:
+                        if hasattr(tool_call, "model_dump"):
+                            tool_calls.append(tool_call.model_dump())
+                        elif hasattr(tool_call, "dict"):
+                            tool_calls.append(tool_call.dict())
+
             if len(response_text) > 0:
                 set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_TEXT, response_text)
 
+            if len(tool_calls) > 0:
+                set_data_normalized(
+                    span,
+                    SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS,
+                    tool_calls,
+                    unpack=False,
+                )
+
         _calculate_token_usage(messages, response, span, None, integration.count_tokens)
 
         if finish_span:
@@ -280,34 +328,18 @@ def _set_output_data(
 
     elif hasattr(response, "output"):
         if should_send_default_pii() and integration.include_prompts:
-            output_messages: "dict[str, list[Any]]" = {
-                "response": [],
-                "tool": [],
-            }
-
-            for output in response.output:
-                if output.type == "function_call":
-                    output_messages["tool"].append(output.dict())
-                elif output.type == "message":
-                    for output_message in output.content:
-                        try:
-                            output_messages["response"].append(output_message.text)
-                        except AttributeError:
-                            # Unknown output message type, just return the json
-                            output_messages["response"].append(output_message.dict())
-
-            if len(output_messages["tool"]) > 0:
+            response_texts, tool_calls = extract_response_output(response.output)
+
+            if len(tool_calls) > 0:
                 set_data_normalized(
                     span,
                     SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS,
-                    output_messages["tool"],
+                    tool_calls,
                     unpack=False,
                 )
 
-            if len(output_messages["response"]) > 0:
-                set_data_normalized(
-                    span, SPANDATA.GEN_AI_RESPONSE_TEXT, output_messages["response"]
-                )
+            if len(response_texts) > 0:
+                set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_TEXT, response_texts)
 
         _calculate_token_usage(messages, response, span, None, integration.count_tokens)
 
diff --git a/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py b/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py
index c3a3a04dc9..1e2d7e758c 100644
--- a/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py
+++ b/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py
@@ -3,6 +3,7 @@
     get_start_span_function,
     set_data_normalized,
     normalize_message_roles,
+    normalize_message_role,
     truncate_and_annotate_messages,
 )
 from sentry_sdk.consts import OP, SPANDATA
@@ -10,7 +11,11 @@
 from sentry_sdk.utils import safe_serialize
 
 from ..consts import SPAN_ORIGIN
-from ..utils import _set_agent_data, _set_usage_data
+from ..utils import (
+    _set_agent_data,
+    _set_usage_data,
+    _transform_openai_agents_message_content,
+)
 
 from typing import TYPE_CHECKING
 
@@ -49,17 +54,40 @@ def invoke_agent_span(
 
         original_input = kwargs.get("original_input")
         if original_input is not None:
-            message = (
-                original_input
-                if isinstance(original_input, str)
-                else safe_serialize(original_input)
-            )
-            messages.append(
-                {
-                    "content": [{"text": message, "type": "text"}],
-                    "role": "user",
-                }
-            )
+            if isinstance(original_input, str):
+                # String input: wrap in text block
+                messages.append(
+                    {
+                        "content": [{"text": original_input, "type": "text"}],
+                        "role": "user",
+                    }
+                )
+            elif isinstance(original_input, list) and len(original_input) > 0:
+                # Check if list contains message objects (with type="message")
+                # or content parts (input_text, input_image, etc.)
+                first_item = original_input[0]
+                if isinstance(first_item, dict) and first_item.get("type") == "message":
+                    # List of message objects - process each individually
+                    for msg in original_input:
+                        if isinstance(msg, dict) and msg.get("type") == "message":
+                            role = normalize_message_role(msg.get("role", "user"))
+                            content = msg.get("content")
+                            transformed = _transform_openai_agents_message_content(
+                                content
+                            )
+                            if isinstance(transformed, str):
+                                transformed = [{"text": transformed, "type": "text"}]
+                            elif not isinstance(transformed, list):
+                                transformed = [
+                                    {"text": str(transformed), "type": "text"}
+                                ]
+                            messages.append({"content": transformed, "role": role})
+                else:
+                    # List of content parts - transform and wrap as user message
+                    content = _transform_openai_agents_message_content(original_input)
+                    if not isinstance(content, list):
+                        content = [{"text": str(content), "type": "text"}]
+                    messages.append({"content": content, "role": "user"})
 
         if len(messages) > 0:
             normalized_messages = normalize_message_roles(messages)
diff --git a/sentry_sdk/integrations/openai_agents/utils.py b/sentry_sdk/integrations/openai_agents/utils.py
index a24d0e909d..afa16dc609 100644
--- a/sentry_sdk/integrations/openai_agents/utils.py
+++ b/sentry_sdk/integrations/openai_agents/utils.py
@@ -1,7 +1,9 @@
 import sentry_sdk
 from sentry_sdk.ai.utils import (
     GEN_AI_ALLOWED_MESSAGE_ROLES,
+    extract_response_output,
     normalize_message_roles,
+    parse_data_uri,
     set_data_normalized,
     normalize_message_role,
     truncate_and_annotate_messages,
@@ -27,6 +29,133 @@
     raise DidNotEnable("OpenAI Agents not installed")
 
 
+def _transform_openai_agents_content_part(
+    content_part: "dict[str, Any]",
+) -> "dict[str, Any]":
+    """
+    Transform an OpenAI Agents content part to Sentry-compatible format.
+
+    Handles multimodal content (images, audio, files) by converting them
+    to the standardized format:
+    - base64 encoded data -> type: "blob"
+    - URL references -> type: "uri"
+    - file_id references -> type: "file"
+    """
+    if not isinstance(content_part, dict):
+        return content_part
+
+    part_type = content_part.get("type")
+
+    # Handle input_text (OpenAI Agents SDK text format) -> normalize to standard text format
+    if part_type == "input_text":
+        return {
+            "type": "text",
+            "text": content_part.get("text", ""),
+        }
+
+    # Handle image_url (OpenAI vision format) and input_image (OpenAI Agents SDK format)
+    if part_type in ("image_url", "input_image"):
+        # Get URL from either format
+        if part_type == "image_url":
+            image_url = content_part.get("image_url") or {}
+            url = (
+                image_url.get("url", "")
+                if isinstance(image_url, dict)
+                else str(image_url)
+            )
+        else:
+            # input_image format has image_url directly
+            url = content_part.get("image_url") or ""
+
+        if url.startswith("data:"):
+            try:
+                mime_type, content = parse_data_uri(url)
+                return {
+                    "type": "blob",
+                    "modality": "image",
+                    "mime_type": mime_type,
+                    "content": content,
+                }
+            except ValueError:
+                # If parsing fails, return as URI
+                return {
+                    "type": "uri",
+                    "modality": "image",
+                    "mime_type": "",
+                    "uri": url,
+                }
+        else:
+            return {
+                "type": "uri",
+                "modality": "image",
+                "mime_type": "",
+                "uri": url,
+            }
+
+    # Handle input_audio (OpenAI audio input format)
+    if part_type == "input_audio":
+        input_audio = content_part.get("input_audio") or {}
+        if isinstance(input_audio, dict):
+            audio_format = input_audio.get("format", "")
+            mime_type = f"audio/{audio_format}" if audio_format else ""
+            return {
+                "type": "blob",
+                "modality": "audio",
+                "mime_type": mime_type,
+                "content": input_audio.get("data", ""),
+            }
+        else:
+            return content_part
+
+    # Handle image_file (Assistants API file-based images)
+    if part_type == "image_file":
+        image_file = content_part.get("image_file") or {}
+        if isinstance(image_file, dict):
+            return {
+                "type": "file",
+                "modality": "image",
+                "mime_type": "",
+                "file_id": image_file.get("file_id", ""),
+            }
+        else:
+            return content_part
+
+    # Handle file (document attachments)
+    if part_type == "file":
+        file_data = content_part.get("file") or {}
+        if isinstance(file_data, dict):
+            return {
+                "type": "file",
+                "modality": "document",
+                "mime_type": "",
+                "file_id": file_data.get("file_id", ""),
+            }
+        else:
+            return content_part
+
+    return content_part
+
+
+def _transform_openai_agents_message_content(content: "Any") -> "Any":
+    """
+    Transform OpenAI Agents message content, handling both string content and
+    list of content parts.
+    """
+    if isinstance(content, str):
+        return content
+
+    if isinstance(content, (list, tuple)):
+        transformed = []
+        for item in content:
+            if isinstance(item, dict):
+                transformed.append(_transform_openai_agents_content_part(item))
+            else:
+                transformed.append(item)
+        return transformed
+
+    return content
+
+
 def _capture_exception(exc: "Any") -> None:
     set_span_errored()
 
@@ -128,13 +257,15 @@ def _set_input_data(
         if "role" in message:
             normalized_role = normalize_message_role(message.get("role"))
             content = message.get("content")
+            # Transform content to handle multimodal data (images, audio, files)
+            transformed_content = _transform_openai_agents_message_content(content)
             request_messages.append(
                 {
                     "role": normalized_role,
                     "content": (
-                        [{"type": "text", "text": content}]
-                        if isinstance(content, str)
-                        else content
+                        [{"type": "text", "text": transformed_content}]
+                        if isinstance(transformed_content, str)
+                        else transformed_content
                     ),
                 }
             )
@@ -170,31 +301,13 @@ def _set_output_data(span: "sentry_sdk.tracing.Span", result: "Any") -> None:
     if not should_send_default_pii():
         return
 
-    output_messages: "dict[str, list[Any]]" = {
-        "response": [],
-        "tool": [],
-    }
+    response_texts, tool_calls = extract_response_output(result.output)
 
-    for output in result.output:
-        if output.type == "function_call":
-            output_messages["tool"].append(output.dict())
-        elif output.type == "message":
-            for output_message in output.content:
-                try:
-                    output_messages["response"].append(output_message.text)
-                except AttributeError:
-                    # Unknown output message type, just return the json
-                    output_messages["response"].append(output_message.dict())
-
-    if len(output_messages["tool"]) > 0:
-        span.set_data(
-            SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS, safe_serialize(output_messages["tool"])
-        )
+    if len(tool_calls) > 0:
+        span.set_data(SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS, safe_serialize(tool_calls))
 
-    if len(output_messages["response"]) > 0:
-        set_data_normalized(
-            span, SPANDATA.GEN_AI_RESPONSE_TEXT, output_messages["response"]
-        )
+    if len(response_texts) > 0:
+        set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_TEXT, response_texts)
 
 
 def _create_mcp_execute_tool_spans(
diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py
index 814289c887..2c1e32b1e4 100644
--- a/tests/integrations/openai/test_openai.py
+++ b/tests/integrations/openai/test_openai.py
@@ -17,6 +17,10 @@
 from openai.types.chat import ChatCompletion, ChatCompletionMessage, ChatCompletionChunk
 from openai.types.chat.chat_completion import Choice
 from openai.types.chat.chat_completion_chunk import ChoiceDelta, Choice as DeltaChoice
+from openai.types.chat.chat_completion_message_tool_call import (
+    ChatCompletionMessageToolCall,
+    Function as ToolCallFunction,
+)
 from openai.types.create_embedding_response import Usage as EmbeddingTokenUsage
 
 SKIP_RESPONSES_TESTS = False
@@ -44,7 +48,7 @@
     OpenAIIntegration,
     _calculate_token_usage,
 )
-from sentry_sdk.ai.utils import MAX_GEN_AI_MESSAGE_BYTES
+from sentry_sdk.ai.utils import MAX_GEN_AI_MESSAGE_BYTES, transform_message_content
 from sentry_sdk._types import AnnotatedValue
 from sentry_sdk.serializer import serialize
 
@@ -1509,6 +1513,127 @@ def test_openai_message_role_mapping(sentry_init, capture_events):
     assert "ai" not in roles
 
 
+def test_transform_message_content_image_url_to_blob():
+    """Test that OpenAI image_url message parts are correctly converted to blob format"""
+    content = [
+        {
+            "text": "How many ponies do you see in the image?",
+            "type": "text",
+        },
+        {
+            "type": "image_url",
+            "image_url": {
+                "url": "data:image/jpeg;base64,/9j/4AAQSkZJRg==",
+                "detail": "high",
+            },
+        },
+    ]
+
+    converted = transform_message_content(content)
+
+    assert isinstance(converted, list)
+    assert len(converted) == 2
+
+    # First item (text) should remain unchanged
+    assert converted[0] == {
+        "text": "How many ponies do you see in the image?",
+        "type": "text",
+    }
+
+    # Second item (image_url) should be converted to blob format
+    blob_item = converted[1]
+    assert blob_item["type"] == "blob"
+    assert blob_item["modality"] == "image"
+    assert blob_item["mime_type"] == "image/jpeg"
+    assert blob_item["content"] == "/9j/4AAQSkZJRg=="
+    # Verify the original image_url structure is replaced
+    assert "image_url" not in blob_item
+
+
+def test_transform_message_content_image_url_to_uri():
+    """Test that OpenAI image_url with non-data URLs are converted to uri format"""
+    content = [
+        {
+            "type": "image_url",
+            "image_url": {
+                "url": "https://example.com/image.jpg",
+                "detail": "low",
+            },
+        },
+    ]
+
+    converted = transform_message_content(content)
+
+    assert len(converted) == 1
+    uri_item = converted[0]
+    assert uri_item["type"] == "uri"
+    assert uri_item["uri"] == "https://example.com/image.jpg"
+    # Verify the original image_url structure is replaced
+    assert "image_url" not in uri_item
+
+
+def test_transform_message_content_malformed_data_uri():
+    """Test that malformed data URIs are handled gracefully without crashing"""
+    content = [
+        {
+            "type": "image_url",
+            "image_url": {
+                # Malformed: missing ;base64, and comma separator
+                "url": "data:image/jpeg",
+            },
+        },
+    ]
+
+    # Should not raise an exception
+    converted = transform_message_content(content)
+
+    assert len(converted) == 1
+    # Malformed data URI should fall back to uri type
+    item = converted[0]
+    assert item["type"] == "uri"
+    assert item["uri"] == "data:image/jpeg"
+    assert item["modality"] == "image"
+
+
+def test_transform_message_content_image_url_as_string():
+    """Test that image_url as a string (instead of dict) is handled gracefully"""
+    content = [
+        {
+            "type": "image_url",
+            # Some implementations pass image_url as a string directly
+            "image_url": "https://example.com/image.jpg",
+        },
+    ]
+
+    # Should not raise an exception
+    converted = transform_message_content(content)
+
+    assert len(converted) == 1
+    item = converted[0]
+    assert item["type"] == "uri"
+    assert item["modality"] == "image"
+    assert item["uri"] == "https://example.com/image.jpg"
+
+
+def test_transform_message_content_image_url_as_string_data_uri():
+    """Test that image_url as a data URI string is correctly converted to blob"""
+    content = [
+        {
+            "type": "image_url",
+            "image_url": "data:image/png;base64,iVBORw0KGgo=",
+        },
+    ]
+
+    converted = transform_message_content(content)
+
+    assert len(converted) == 1
+    item = converted[0]
+    assert item["type"] == "blob"
+    assert item["modality"] == "image"
+    assert item["mime_type"] == "image/png"
+    assert item["content"] == "iVBORw0KGgo="
+
+
 def test_openai_message_truncation(sentry_init, capture_events):
     """Test that large messages are truncated properly in OpenAI integration."""
     sentry_init(
@@ -1559,3 +1684,262 @@ def test_openai_message_truncation(sentry_init, capture_events):
             if SPANDATA.GEN_AI_REQUEST_MESSAGES in span_meta:
                 messages_meta = span_meta[SPANDATA.GEN_AI_REQUEST_MESSAGES]
                 assert "len" in messages_meta.get("", {})
+
+
+def test_response_text_is_string_not_dict(sentry_init, capture_events):
+    """Test that gen_ai.response.text is a string, not a message dict.
+
+    With set_data_normalized, a single-element list is unpacked to the element,
+    so ["the model response"] becomes just "the model response".
+    """
+    sentry_init(
+        integrations=[OpenAIIntegration(include_prompts=True)],
+        traces_sample_rate=1.0,
+        send_default_pii=True,
+    )
+    events = capture_events()
+
+    client = OpenAI(api_key="z")
+    client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION)
+
+    with start_transaction(name="openai tx"):
+        client.chat.completions.create(
+            model="some-model", messages=[{"role": "system", "content": "hello"}]
+        )
+
+    (event,) = events
+    span = event["spans"][0]
+
+    # Verify response text is in span data
+    assert SPANDATA.GEN_AI_RESPONSE_TEXT in span["data"]
+
+    response_text = span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+
+    # For a single response, set_data_normalized unpacks the list, so it's the string directly
+    assert isinstance(response_text, str)
+    assert response_text == "the model response"
+
+    # Make sure it's NOT a JSON string containing a dict (the old buggy format)
+    # The old format was like '{"content": "...", "role": "assistant", ...}'
+    try:
+        parsed = json.loads(response_text)
+        # If it parses as JSON, it should NOT be a dict
+        assert not isinstance(parsed, dict), "Response text should not be a dict"
+    except json.JSONDecodeError:
+        # If it's not valid JSON, that's fine - it's just the raw string
+        pass
+
+
+def test_chat_completion_with_tool_calls(sentry_init, capture_events):
+    """Test that tool calls are properly extracted to gen_ai.response.tool_calls."""
+    sentry_init(
+        integrations=[OpenAIIntegration(include_prompts=True)],
+        traces_sample_rate=1.0,
+        send_default_pii=True,
+    )
+    events = capture_events()
+
+    # Create a response with tool calls using proper OpenAI types
+    tool_call_response = ChatCompletion(
+        id="chat-id",
+        choices=[
+            Choice(
+                index=0,
+                finish_reason="tool_calls",
+                message=ChatCompletionMessage(
+                    role="assistant",
+                    content=None,  # Content is None when there are tool calls
+                    tool_calls=[
+                        ChatCompletionMessageToolCall(
+                            id="call_123",
+                            type="function",
+                            function=ToolCallFunction(
+                                name="get_weather",
+                                arguments='{"location": "Paris"}',
+                            ),
+                        ),
+                    ],
+                ),
+            )
+        ],
+        created=10000000,
+        model="response-model-id",
+        object="chat.completion",
+        usage=CompletionUsage(
+            completion_tokens=10,
+            prompt_tokens=20,
+            total_tokens=30,
+        ),
+    )
+
+    client = OpenAI(api_key="z")
+    client.chat.completions._post = mock.Mock(return_value=tool_call_response)
+
+    with start_transaction(name="openai tx"):
+        client.chat.completions.create(
+            model="some-model",
+            messages=[{"role": "user", "content": "What's the weather in Paris?"}],
+        )
+
+    (event,) = events
+    span = event["spans"][0]
+
+    # Response text should NOT be present when content is None
+    assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+
+    # Tool calls should be extracted
+    assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in span["data"]
+    tool_calls_data = span["data"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS]
+
+    # Should be serialized as JSON
+    assert isinstance(tool_calls_data, str)
+    parsed_tool_calls = json.loads(tool_calls_data)
+
+    assert isinstance(parsed_tool_calls, list)
+    assert len(parsed_tool_calls) == 1
+    assert parsed_tool_calls[0]["id"] == "call_123"
+    assert parsed_tool_calls[0]["type"] == "function"
+    assert parsed_tool_calls[0]["function"]["name"] == "get_weather"
+
+
+def test_chat_completion_with_content_and_tool_calls(sentry_init, capture_events):
+    """Test that both content and tool calls are captured when both are present."""
+    sentry_init(
+        integrations=[OpenAIIntegration(include_prompts=True)],
+        traces_sample_rate=1.0,
+        send_default_pii=True,
+    )
+    events = capture_events()
+
+    # Create a response with both content and tool calls using proper OpenAI types
+    response_with_both = ChatCompletion(
+        id="chat-id",
+        choices=[
+            Choice(
+                index=0,
+                finish_reason="tool_calls",
+                message=ChatCompletionMessage(
+                    role="assistant",
+                    content="I'll check the weather for you.",
+                    tool_calls=[
+                        ChatCompletionMessageToolCall(
+                            id="call_456",
+                            type="function",
+                            function=ToolCallFunction(
+                                name="get_weather",
+                                arguments='{"location": "London"}',
+                            ),
+                        ),
+                    ],
+                ),
+            )
+        ],
+        created=10000000,
+        model="response-model-id",
+        object="chat.completion",
+        usage=CompletionUsage(
+            completion_tokens=15,
+            prompt_tokens=25,
+            total_tokens=40,
+        ),
+    )
+
+    client = OpenAI(api_key="z")
+    client.chat.completions._post = mock.Mock(return_value=response_with_both)
+
+    with start_transaction(name="openai tx"):
+        client.chat.completions.create(
+            model="some-model",
+            messages=[{"role": "user", "content": "What's the weather in London?"}],
+        )
+
+    (event,) = events
+    span = event["spans"][0]
+
+    # Both should be present
+    assert SPANDATA.GEN_AI_RESPONSE_TEXT in span["data"]
+    assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in span["data"]
+
+    # Verify response text - single element list gets unpacked to the element
+    response_text = span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+    assert response_text == "I'll check the weather for you."
+
+    # Verify tool calls - single element list gets unpacked, then re-serialized as JSON
+    tool_calls_data = span["data"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS]
+    assert isinstance(tool_calls_data, str)
+    tool_calls = json.loads(tool_calls_data)
+    assert isinstance(tool_calls, list)
+    assert len(tool_calls) == 1
+    assert tool_calls[0]["function"]["name"] == "get_weather"
+
+
+def test_chat_completion_multiple_choices(sentry_init, capture_events):
+    """Test that multiple choices are all captured in the response text."""
+    sentry_init(
+        integrations=[OpenAIIntegration(include_prompts=True)],
+        traces_sample_rate=1.0,
+        send_default_pii=True,
+    )
+    events = capture_events()
+
+    # Create a response with multiple choices
+    multi_choice_response = ChatCompletion(
+        id="chat-id",
+        choices=[
+            Choice(
+                index=0,
+                finish_reason="stop",
+                message=ChatCompletionMessage(
+                    role="assistant", content="Response option 1"
+                ),
+            ),
+            Choice(
+                index=1,
+                finish_reason="stop",
+                message=ChatCompletionMessage(
+                    role="assistant", content="Response option 2"
+                ),
+            ),
+            Choice(
+                index=2,
+                finish_reason="stop",
+                message=ChatCompletionMessage(
+                    role="assistant", content="Response option 3"
+                ),
+            ),
+        ],
+        created=10000000,
+        model="response-model-id",
+        object="chat.completion",
+        usage=CompletionUsage(
+            completion_tokens=30,
+            prompt_tokens=20,
+            total_tokens=50,
+        ),
+    )
+
+    client = OpenAI(api_key="z")
+    client.chat.completions._post = mock.Mock(return_value=multi_choice_response)
+
+    with start_transaction(name="openai tx"):
+        client.chat.completions.create(
+            model="some-model",
+            messages=[{"role": "user", "content": "Give me options"}],
+            n=3,
+        )
+
+    (event,) = events
+    span = event["spans"][0]
+
+    assert SPANDATA.GEN_AI_RESPONSE_TEXT in span["data"]
+    response_text = json.loads(span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT])
+
+    # Should have all 3 responses as strings
+    assert len(response_text) == 3
+    assert response_text[0] == "Response option 1"
+    assert response_text[1] == "Response option 2"
+    assert response_text[2] == "Response option 3"
+
+    # All should be strings
+    for item in response_text:
+        assert isinstance(item, str)
diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py
index 9d463f8de5..74a800fb55 100644
--- a/tests/integrations/openai_agents/test_openai_agents.py
+++ b/tests/integrations/openai_agents/test_openai_agents.py
@@ -9,7 +9,12 @@
 from sentry_sdk import start_span
 from sentry_sdk.consts import SPANDATA
 from sentry_sdk.integrations.openai_agents import OpenAIAgentsIntegration
-from sentry_sdk.integrations.openai_agents.utils import _set_input_data, safe_serialize
+from sentry_sdk.integrations.openai_agents.utils import (
+    _set_input_data,
+    safe_serialize,
+    _transform_openai_agents_content_part,
+    _transform_openai_agents_message_content,
+)
 from sentry_sdk.utils import parse_version
 
 from openai import AsyncOpenAI
@@ -2123,3 +2128,162 @@ def test_openai_agents_message_truncation(sentry_init, capture_events):
         assert len(parsed_messages) == 2
         assert "small message 4" in str(parsed_messages[0])
         assert "small message 5" in str(parsed_messages[1])
+
+
+def test_transform_does_not_modify_original():
+    """Test that transformation does not modify the original content."""
+    import copy
+
+    content_part = {
+        "type": "image_url",
+        "image_url": {
+            "url": "data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD",
+            "detail": "high",
+        },
+    }
+    original = copy.deepcopy(content_part)
+    _transform_openai_agents_content_part(content_part)
+    assert content_part == original, "Original content_part should not be modified"
+
+    content = [
+        {"type": "text", "text": "What is in this image?"},
+        {
+            "type": "image_url",
+            "image_url": {
+                "url": "data:image/png;base64,iVBORw0KGgoAAAANSUhEUg==",
+            },
+        },
+    ]
+    original_content = copy.deepcopy(content)
+    _transform_openai_agents_message_content(content)
+    assert content == original_content, "Original content list should not be modified"
+
+
+def test_transform_handles_none_values():
+    """Test that transformation handles None values gracefully without crashing."""
+    # input_image with image_url explicitly set to None - should not crash
+    content_part = {"type": "input_image", "image_url": None}
+    result = _transform_openai_agents_content_part(content_part)
+    assert result == {"type": "uri", "modality": "image", "mime_type": "", "uri": ""}
+
+    # image_url with nested dict set to None - should not crash
+    content_part = {"type": "image_url", "image_url": None}
+    result = _transform_openai_agents_content_part(content_part)
+    assert result == {"type": "uri", "modality": "image", "mime_type": "", "uri": ""}
+
+    # input_audio with None value - gracefully returns empty blob
+    content_part = {"type": "input_audio", "input_audio": None}
+    result = _transform_openai_agents_content_part(content_part)
+    assert result == {
+        "type": "blob",
+        "modality": "audio",
+        "mime_type": "",
+        "content": "",
+    }
+
+    # image_file with None value - gracefully returns empty file reference
+    content_part = {"type": "image_file", "image_file": None}
+    result = _transform_openai_agents_content_part(content_part)
+    assert result == {
+        "type": "file",
+        "modality": "image",
+        "mime_type": "",
+        "file_id": "",
+    }
+
+    # file with None value - gracefully returns empty file reference
+    content_part = {"type": "file", "file": None}
+    result = _transform_openai_agents_content_part(content_part)
+    assert result == {
+        "type": "file",
+        "modality": "document",
+        "mime_type": "",
+        "file_id": "",
+    }
+
+
+def test_transform_image_url_to_blob():
+    """Test that OpenAI image_url with data URI is converted to blob format."""
+    content_part = {
+        "type": "image_url",
+        "image_url": {
+            "url": "data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD",
+            "detail": "high",
+        },
+    }
+    result = _transform_openai_agents_content_part(content_part)
+    assert result == {
+        "type": "blob",
+        "modality": "image",
+        "mime_type": "image/jpeg",
+        "content": "/9j/4AAQSkZJRgABAQAAAQABAAD",
+    }
+
+
+def test_transform_image_url_to_uri():
+    """Test that OpenAI image_url with HTTP URL is converted to uri format."""
+    content_part = {
+        "type": "image_url",
+        "image_url": {
+            "url": "https://example.com/image.jpg",
+            "detail": "low",
+        },
+    }
+    result = _transform_openai_agents_content_part(content_part)
+    assert result == {
+        "type": "uri",
+        "modality": "image",
+        "mime_type": "",
+        "uri": "https://example.com/image.jpg",
+    }
+
+
+def test_transform_message_content_with_image():
+    """Test that message content with image is properly transformed."""
+    content = [
+        {"type": "text", "text": "What is in this image?"},
+        {
+            "type": "image_url",
+            "image_url": {
+                "url": "data:image/png;base64,iVBORw0KGgoAAAANSUhEUg==",
+            },
+        },
+    ]
+    result = _transform_openai_agents_message_content(content)
+    assert len(result) == 2
+    assert result[0] == {"type": "text", "text": "What is in this image?"}
+    assert result[1] == {
+        "type": "blob",
+        "modality": "image",
+        "mime_type": "image/png",
+        "content": "iVBORw0KGgoAAAANSUhEUg==",
+    }
+
+
+def test_transform_input_image_to_blob():
+    """Test that OpenAI Agents SDK input_image format is converted to blob format."""
+    # OpenAI Agents SDK uses input_image type with image_url as a direct string
+    content_part = {
+        "type": "input_image",
+        "image_url": "data:image/png;base64,iVBORw0KGgoAAAANSUhEUg==",
+    }
+    result = _transform_openai_agents_content_part(content_part)
+    assert result == {
+        "type": "blob",
+        "modality": "image",
+        "mime_type": "image/png",
+        "content": "iVBORw0KGgoAAAANSUhEUg==",
+    }
+
+
+def test_transform_input_text_to_text():
+    """Test that OpenAI Agents SDK input_text format is normalized to text format."""
+    content_part = {
+        "type": "input_text",
+        "text": "Hello, world!",
+    }
+    result = _transform_openai_agents_content_part(content_part)
+    assert result == {
+        "type": "text",
+        "text": "Hello, world!",
+    }
diff --git a/tests/test_ai_monitoring.py b/tests/test_ai_monitoring.py
index 1ff354f473..f6852d54bb 100644
--- a/tests/test_ai_monitoring.py
+++ b/tests/test_ai_monitoring.py
@@ -19,6 +19,13 @@
     _find_truncation_index,
     parse_data_uri,
     redact_blob_message_parts,
+    get_modality_from_mime_type,
+    transform_openai_content_part,
+    transform_anthropic_content_part,
+    transform_google_content_part,
+    transform_generic_content_part,
+    transform_content_part,
+    transform_message_content,
 )
 from sentry_sdk.serializer import serialize
 from sentry_sdk.utils import safe_serialize
@@ -842,3 +849,906 @@ def test_handles_uri_without_data_prefix(self):
 
         assert mime_type == "image/jpeg"
         assert content == "/9j/4AAQ"
+
+
+class TestGetModalityFromMimeType:
+    def test_image_mime_types(self):
+        """Test that image MIME types return 'image' modality"""
+        assert get_modality_from_mime_type("image/jpeg") == "image"
+        assert get_modality_from_mime_type("image/png") == "image"
+        assert get_modality_from_mime_type("image/gif") == "image"
+        assert get_modality_from_mime_type("image/webp") == "image"
+        assert get_modality_from_mime_type("IMAGE/JPEG") == "image"  # case insensitive
+
+    def test_audio_mime_types(self):
+        """Test that audio MIME types return 'audio' modality"""
+        assert get_modality_from_mime_type("audio/mp3") == "audio"
+        assert get_modality_from_mime_type("audio/wav") == "audio"
+        assert get_modality_from_mime_type("audio/ogg") == "audio"
+        assert get_modality_from_mime_type("AUDIO/MP3") == "audio"  # case insensitive
+
+    def test_video_mime_types(self):
+        """Test that video MIME types return 'video' modality"""
+        assert get_modality_from_mime_type("video/mp4") == "video"
+        assert get_modality_from_mime_type("video/webm") == "video"
+        assert get_modality_from_mime_type("video/quicktime") == "video"
+        assert get_modality_from_mime_type("VIDEO/MP4") == "video"  # case insensitive
+
+    def test_document_mime_types(self):
+        """Test that application and text MIME types return 'document' modality"""
+        assert get_modality_from_mime_type("application/pdf") == "document"
+        assert get_modality_from_mime_type("application/json") == "document"
+        assert get_modality_from_mime_type("text/plain") == "document"
+        assert get_modality_from_mime_type("text/html") == "document"
+
+    def test_empty_mime_type_returns_image(self):
+        """Test that empty MIME type defaults to 'image'"""
+        assert get_modality_from_mime_type("") == "image"
+
+    def test_none_mime_type_returns_image(self):
+        """Test that None-like values default to 'image'"""
+        assert get_modality_from_mime_type(None) == "image"
+
+    def test_unknown_mime_type_returns_image(self):
+        """Test that unknown MIME types default to 'image'"""
+        assert get_modality_from_mime_type("unknown/type") == "image"
+        assert get_modality_from_mime_type("custom/format") == "image"
+
+
+class TestTransformOpenAIContentPart:
+    """Tests for the OpenAI-specific transform function."""
+
+    def test_image_url_with_data_uri(self):
+        """Test transforming OpenAI image_url with base64 data URI"""
+        content_part = {
+            "type": "image_url",
+            "image_url": {"url": "data:image/jpeg;base64,/9j/4AAQSkZJRg=="},
+        }
+        result = transform_openai_content_part(content_part)
+
+        assert result == {
+            "type": "blob",
+            "modality": "image",
+            "mime_type": "image/jpeg",
+            "content": "/9j/4AAQSkZJRg==",
+        }
+
+    def test_image_url_with_regular_url(self):
+        """Test transforming OpenAI image_url with regular URL"""
+        content_part = {
+            "type": "image_url",
+            "image_url": {"url": "https://example.com/image.jpg"},
+        }
+        result = transform_openai_content_part(content_part)
+
+        assert result == {
+            "type": "uri",
+            "modality": "image",
+            "mime_type": "",
+            "uri": "https://example.com/image.jpg",
+        }
+
+    def test_image_url_string_format(self):
+        """Test transforming OpenAI image_url where image_url is a string"""
+        content_part = {
+            "type": "image_url",
+            "image_url": "https://example.com/image.jpg",
+        }
+        result = transform_openai_content_part(content_part)
+
+        assert result == {
+            "type": "uri",
+            "modality": "image",
+            "mime_type": "",
+            "uri": "https://example.com/image.jpg",
+        }
+
+    def test_image_url_invalid_data_uri(self):
+        """Test transforming OpenAI image_url with invalid data URI falls back to URI"""
+        content_part = {
+            "type": "image_url",
+            "image_url": {"url": "data:image/jpeg;base64"},  # Missing comma
+        }
+        result = transform_openai_content_part(content_part)
+
+        assert result == {
+            "type": "uri",
+            "modality": "image",
+            "mime_type": "",
+            "uri": "data:image/jpeg;base64",
+        }
+
+    def test_empty_url_returns_none(self):
+        """Test that image_url with empty URL returns None"""
+        content_part = {"type": "image_url", "image_url": {"url": ""}}
+        assert transform_openai_content_part(content_part) is None
+
+    def test_non_image_url_type_returns_none(self):
+        """Test that non-image_url types return None"""
+        content_part = {"type": "text", "text": "Hello"}
+        assert transform_openai_content_part(content_part) is None
+
+    def test_anthropic_format_returns_none(self):
+        """Test that Anthropic format returns None (not handled)"""
+        content_part = {
+            "type": "image",
+            "source": {"type": "base64", "media_type": "image/png", "data": "abc"},
+        }
+        assert transform_openai_content_part(content_part) is None
+
+    def test_google_format_returns_none(self):
+        """Test that Google format returns None (not handled)"""
+        content_part = {"inline_data": {"mime_type": "image/jpeg", "data": "abc"}}
+        assert transform_openai_content_part(content_part) is None
+
+    def test_non_dict_returns_none(self):
+        """Test that non-dict input returns None"""
+        assert transform_openai_content_part("string") is None
+        assert transform_openai_content_part(123) is None
+        assert transform_openai_content_part(None) is None
+
+
+class TestTransformAnthropicContentPart:
+    """Tests for the Anthropic-specific transform function."""
+
+    def test_image_base64(self):
+        """Test transforming Anthropic image with base64 source"""
+        content_part = {
+            "type": "image",
+            "source": {
+                "type": "base64",
+                "media_type": "image/png",
+                "data": "iVBORw0KGgo=",
+            },
+        }
+        result = transform_anthropic_content_part(content_part)
+
+        assert result == {
+            "type": "blob",
+            "modality": "image",
+            "mime_type": "image/png",
+            "content": "iVBORw0KGgo=",
+        }
+
+    def test_image_url(self):
+        """Test transforming Anthropic image with URL source"""
+        content_part = {
+            "type": "image",
+            "source": {
+                "type": "url",
+                "media_type": "image/jpeg",
+                "url": "https://example.com/image.jpg",
+            },
+        }
+        result = transform_anthropic_content_part(content_part)
+
+        assert result == {
+            "type": "uri",
+            "modality": "image",
+            "mime_type": "image/jpeg",
+            "uri": "https://example.com/image.jpg",
+        }
+
+    def test_image_file(self):
+        """Test transforming Anthropic image with file source"""
+        content_part = {
+            "type": "image",
+            "source": {
+                "type": "file",
+                "media_type": "image/jpeg",
+                "file_id": "file_123",
+            },
+        }
+        result = transform_anthropic_content_part(content_part)
+
+        assert result == {
+            "type": "file",
+            "modality": "image",
+            "mime_type": "image/jpeg",
+            "file_id": "file_123",
+        }
+
+    def test_document_base64(self):
+        """Test transforming Anthropic document with base64 source"""
+        content_part = {
+            "type": "document",
+            "source": {
+                "type": "base64",
+                "media_type": "application/pdf",
+                "data": "JVBERi0xLjQ=",
+            },
+        }
+        result = transform_anthropic_content_part(content_part)
+
+        assert result == {
+            "type": "blob",
+            "modality": "document",
+            "mime_type": "application/pdf",
+            "content": "JVBERi0xLjQ=",
+        }
+
+    def test_document_url(self):
+        """Test transforming Anthropic document with URL source"""
+        content_part = {
+            "type": "document",
+            "source": {
+                "type": "url",
+                "media_type": "application/pdf",
+                "url": "https://example.com/doc.pdf",
+            },
+        }
+        result = transform_anthropic_content_part(content_part)
+
+        assert result == {
+            "type": "uri",
+            "modality": "document",
+            "mime_type": "application/pdf",
+            "uri": "https://example.com/doc.pdf",
+        }
+
+    def test_invalid_source_returns_none(self):
+        """Test that Anthropic format with invalid source returns None"""
+        content_part = {"type": "image", "source": "not_a_dict"}
+        assert transform_anthropic_content_part(content_part) is None
+
+    def test_unknown_source_type_returns_none(self):
+        """Test that Anthropic format with unknown source type returns None"""
+        content_part = {
+            "type": "image",
+            "source": {"type": "unknown", "data": "something"},
+        }
+        assert transform_anthropic_content_part(content_part) is None
+
+    def test_missing_source_returns_none(self):
+        """Test that Anthropic format without source returns None"""
+        content_part = {"type": "image", "data": "something"}
+        assert transform_anthropic_content_part(content_part) is None
+
+    def test_openai_format_returns_none(self):
+        """Test that OpenAI format returns None (not handled)"""
+        content_part = {
+            "type": "image_url",
+            "image_url": {"url": "https://example.com"},
+        }
+        assert transform_anthropic_content_part(content_part) is None
+
+    def test_google_format_returns_none(self):
+        """Test that Google format returns None (not handled)"""
+        content_part = {"inline_data": {"mime_type": "image/jpeg", "data": "abc"}}
+        assert transform_anthropic_content_part(content_part) is None
+
+    def test_non_dict_returns_none(self):
+        """Test that non-dict input returns None"""
+        assert transform_anthropic_content_part("string") is None
+        assert transform_anthropic_content_part(123) is None
+        assert transform_anthropic_content_part(None) is None
+
+
+class TestTransformGoogleContentPart:
+    """Tests for the Google GenAI-specific transform function."""
+
+    def test_inline_data(self):
+        """Test transforming Google inline_data format"""
+        content_part = {
+            "inline_data": {
+                "mime_type": "image/jpeg",
+                "data": "/9j/4AAQSkZJRg==",
+            }
+        }
+        result = transform_google_content_part(content_part)
+
+        assert result == {
+            "type": "blob",
+            "modality": "image",
+            "mime_type": "image/jpeg",
+            "content": "/9j/4AAQSkZJRg==",
+        }
+
+    def test_file_data(self):
+        """Test transforming Google file_data format"""
+        content_part = {
+            "file_data": {
+                "mime_type": "video/mp4",
+                "file_uri": "gs://bucket/video.mp4",
+            }
+        }
+        result = transform_google_content_part(content_part)
+
+        assert result == {
+            "type": "uri",
+            "modality": "video",
+            "mime_type": "video/mp4",
+            "uri": "gs://bucket/video.mp4",
+        }
+
+    def test_inline_data_audio(self):
+        """Test transforming Google inline_data with audio"""
+        content_part = {
+            "inline_data": {
+                "mime_type": "audio/wav",
+                "data": "UklGRiQA",
+            }
+        }
+        result = transform_google_content_part(content_part)
+
+        assert result == {
+            "type": "blob",
+            "modality": "audio",
+            "mime_type": "audio/wav",
+            "content": "UklGRiQA",
+        }
+
+    def test_inline_data_not_dict_returns_none(self):
+        """Test that Google inline_data with non-dict value returns None"""
+        content_part = {"inline_data": "not_a_dict"}
+        assert transform_google_content_part(content_part) is None
+
+    def test_file_data_not_dict_returns_none(self):
+        """Test that Google file_data with non-dict value returns None"""
+        content_part = {"file_data": "not_a_dict"}
+        assert transform_google_content_part(content_part) is None
+
+    def test_openai_format_returns_none(self):
+        """Test that OpenAI format returns None (not handled)"""
+        content_part = {
+            "type": "image_url",
+            "image_url": {"url": "https://example.com"},
+        }
+        assert transform_google_content_part(content_part) is None
+
+    def test_anthropic_format_returns_none(self):
+        """Test that Anthropic format returns None (not handled)"""
+        content_part = {
+            "type": "image",
+            "source": {"type": "base64", "media_type": "image/png", "data": "abc"},
+        }
+        assert transform_google_content_part(content_part) is None
+
+    def test_non_dict_returns_none(self):
+        """Test that non-dict input returns None"""
+        assert transform_google_content_part("string") is None
+        assert transform_google_content_part(123) is None
+        assert transform_google_content_part(None) is None
+
+
+class TestTransformGenericContentPart:
+    """Tests for the generic/LangChain-style transform function."""
+
+    def test_image_base64(self):
+        """Test transforming generic format with base64"""
+        content_part = {
+            "type": "image",
+            "base64": "/9j/4AAQSkZJRg==",
+            "mime_type": "image/jpeg",
+        }
+        result = transform_generic_content_part(content_part)
+
+        assert result == {
+            "type": "blob",
+            "modality": "image",
+            "mime_type": "image/jpeg",
+            "content": "/9j/4AAQSkZJRg==",
+        }
+
+    def test_audio_url(self):
+        """Test transforming generic format with URL"""
+        content_part = {
+            "type": "audio",
+            "url": "https://example.com/audio.mp3",
+            "mime_type": "audio/mp3",
+        }
+        result = transform_generic_content_part(content_part)
+
+        assert result == {
+            "type": "uri",
+            "modality": "audio",
+            "mime_type": "audio/mp3",
+            "uri": "https://example.com/audio.mp3",
+        }
+
+    def test_file_with_file_id(self):
+        """Test transforming generic format with file_id"""
+        content_part = {
+            "type": "file",
+            "file_id": "file_456",
+            "mime_type": "application/pdf",
+        }
+        result = transform_generic_content_part(content_part)
+
+        assert result == {
+            "type": "file",
+            "modality": "document",
+            "mime_type": "application/pdf",
+            "file_id": "file_456",
+        }
+
+    def test_video_base64(self):
+        """Test transforming generic video format"""
+        content_part = {
+            "type": "video",
+            "base64": "AAAA",
+            "mime_type": "video/mp4",
+        }
+        result = transform_generic_content_part(content_part)
+
+        assert result == {
+            "type": "blob",
+            "modality": "video",
+            "mime_type": "video/mp4",
+            "content": "AAAA",
+        }
+
+    def test_image_with_source_returns_none(self):
+        """Test that image with source key (Anthropic style) returns None"""
+        # This is Anthropic format, should NOT be handled by generic
+        content_part = {
+            "type": "image",
+            "source": {"type": "base64", "data": "abc"},
+        }
+        assert transform_generic_content_part(content_part) is None
+
+    def test_text_type_returns_none(self):
+        """Test that text type returns None"""
+        content_part = {"type": "text", "text": "Hello"}
+        assert transform_generic_content_part(content_part) is None
+
+    def test_openai_format_returns_none(self):
+        """Test that OpenAI format returns None (not handled)"""
+        content_part = {
+            "type": "image_url",
+            "image_url": {"url": "https://example.com"},
+        }
+        assert transform_generic_content_part(content_part) is None
+
+    def test_google_format_returns_none(self):
+        """Test that Google format returns None (not handled)"""
+        content_part = {"inline_data": {"mime_type": "image/jpeg", "data": "abc"}}
+        assert transform_generic_content_part(content_part) is None
+
+    def test_non_dict_returns_none(self):
+        """Test that non-dict input returns None"""
+        assert transform_generic_content_part("string") is None
+        assert transform_generic_content_part(123) is None
+        assert transform_generic_content_part(None) is None
+
+    def test_missing_data_key_returns_none(self):
+        """Test that missing data key (base64/url/file_id) returns None"""
+        content_part = {"type": "image", "mime_type": "image/jpeg"}
+        assert transform_generic_content_part(content_part) is None
+
+
+class TestTransformContentPart:
+    # OpenAI/LiteLLM format tests
+    def test_openai_image_url_with_data_uri(self):
+        """Test transforming OpenAI image_url with base64 data URI"""
+        content_part = {
+            "type": "image_url",
+            "image_url": {"url": "data:image/jpeg;base64,/9j/4AAQSkZJRg=="},
+        }
+        result = transform_content_part(content_part)
+
+        assert result == {
+            "type": "blob",
+            "modality": "image",
+            "mime_type": "image/jpeg",
+            "content": "/9j/4AAQSkZJRg==",
+        }
+
+    def test_openai_image_url_with_regular_url(self):
+        """Test transforming OpenAI image_url with regular URL"""
+        content_part = {
+            "type": "image_url",
+            "image_url": {"url": "https://example.com/image.jpg"},
+        }
+        result = transform_content_part(content_part)
+
+        assert result == {
+            "type": "uri",
+            "modality": "image",
+            "mime_type": "",
+            "uri": "https://example.com/image.jpg",
+        }
+
+    def test_openai_image_url_string_format(self):
+        """Test transforming OpenAI image_url where image_url is a string"""
+        content_part = {
+            "type": "image_url",
+            "image_url": "https://example.com/image.jpg",
+        }
+        result = transform_content_part(content_part)
+
+        assert result == {
+            "type": "uri",
+            "modality": "image",
+            "mime_type": "",
+            "uri": "https://example.com/image.jpg",
+        }
+
+    def test_openai_image_url_invalid_data_uri(self):
+        """Test transforming OpenAI image_url with invalid data URI falls back to URI"""
+        content_part = {
+            "type": "image_url",
+            "image_url": {"url": "data:image/jpeg;base64"},  # Missing comma
+        }
+        result = transform_content_part(content_part)
+
+        assert result == {
+            "type": "uri",
+            "modality": "image",
+            "mime_type": "",
+            "uri": "data:image/jpeg;base64",
+        }
+
+    # Anthropic format tests
+    def test_anthropic_image_base64(self):
+        """Test transforming Anthropic image with base64 source"""
+        content_part = {
+            "type": "image",
+            "source": {
+                "type": "base64",
+                "media_type": "image/png",
+                "data": "iVBORw0KGgo=",
+            },
+        }
+        result = transform_content_part(content_part)
+
+        assert result == {
+            "type": "blob",
+            "modality": "image",
+            "mime_type": "image/png",
+            "content": "iVBORw0KGgo=",
+        }
+
+    def test_anthropic_image_url(self):
+        """Test transforming Anthropic image with URL source"""
+        content_part = {
+            "type": "image",
+            "source": {
+                "type": "url",
+                "media_type": "image/jpeg",
+                "url": "https://example.com/image.jpg",
+            },
+        }
+        result = transform_content_part(content_part)
+
+        assert result == {
+            "type": "uri",
+            "modality": "image",
+            "mime_type": "image/jpeg",
+            "uri": "https://example.com/image.jpg",
+        }
+
+    def test_anthropic_image_file(self):
+        """Test transforming Anthropic image with file source"""
+        content_part = {
+            "type": "image",
+            "source": {
+                "type": "file",
+                "media_type": "image/jpeg",
+                "file_id": "file_123",
+            },
+        }
+        result = transform_content_part(content_part)
+
+        assert result == {
+            "type": "file",
+            "modality": "image",
+            "mime_type": "image/jpeg",
+            "file_id": "file_123",
+        }
+
+    def test_anthropic_document_base64(self):
+        """Test transforming Anthropic document with base64 source"""
+        content_part = {
+            "type": "document",
+            "source": {
+                "type": "base64",
+                "media_type": "application/pdf",
+                "data": "JVBERi0xLjQ=",
+            },
+        }
+        result = transform_content_part(content_part)
+
+        assert result == {
+            "type": "blob",
+            "modality": "document",
+            "mime_type": "application/pdf",
+            "content": "JVBERi0xLjQ=",
+        }
+
+    def test_anthropic_document_url(self):
+        """Test transforming Anthropic document with URL source"""
+        content_part = {
+            "type": "document",
+            "source": {
+                "type": "url",
+                "media_type": "application/pdf",
+                "url": "https://example.com/doc.pdf",
+            },
+        }
+        result = transform_content_part(content_part)
+
+        assert result == {
+            "type": "uri",
+            "modality": "document",
+            "mime_type": "application/pdf",
+            "uri": "https://example.com/doc.pdf",
+        }
+
+    # Google format tests
+    def test_google_inline_data(self):
+        """Test transforming Google inline_data format"""
+        content_part = {
+            "inline_data": {
+                "mime_type": "image/jpeg",
+                "data": "/9j/4AAQSkZJRg==",
+            }
+        }
+        result = transform_content_part(content_part)
+
+        assert result == {
+            "type": "blob",
+            "modality": "image",
+            "mime_type": "image/jpeg",
+            "content": "/9j/4AAQSkZJRg==",
+        }
+
+    def test_google_file_data(self):
+        """Test transforming Google file_data format"""
+        content_part = {
+            "file_data": {
+                "mime_type": "video/mp4",
+                "file_uri": "gs://bucket/video.mp4",
+            }
+        }
+        result = transform_content_part(content_part)
+
+        assert result == {
+            "type": "uri",
+            "modality": "video",
+            "mime_type": "video/mp4",
+            "uri": "gs://bucket/video.mp4",
+        }
+
+    def test_google_inline_data_audio(self):
+        """Test transforming Google inline_data with audio"""
+        content_part = {
+            "inline_data": {
+                "mime_type": "audio/wav",
+                "data": "UklGRiQA",
+            }
+        }
+        result = transform_content_part(content_part)
+
+        assert result == {
+            "type": "blob",
+            "modality": "audio",
+            "mime_type": "audio/wav",
+            "content": "UklGRiQA",
+        }
+
+    # Generic format tests (LangChain style)
+    def test_generic_image_base64(self):
+        """Test transforming generic format with base64"""
+        content_part = {
+            "type": "image",
+            "base64": "/9j/4AAQSkZJRg==",
+            "mime_type": "image/jpeg",
+        }
+        result = transform_content_part(content_part)
+
+        assert result == {
+            "type": "blob",
+            "modality": "image",
+            "mime_type": "image/jpeg",
+            "content": "/9j/4AAQSkZJRg==",
+        }
+
+    def test_generic_audio_url(self):
+        """Test transforming generic format with URL"""
+        content_part = {
+            "type": "audio",
+            "url": "https://example.com/audio.mp3",
+            "mime_type": "audio/mp3",
+        }
+        result = transform_content_part(content_part)
+
+        assert result == {
+            "type": "uri",
+            "modality": "audio",
+            "mime_type": "audio/mp3",
+            "uri": "https://example.com/audio.mp3",
+        }
+
+    def test_generic_file_with_file_id(self):
+        """Test transforming generic format with file_id"""
+        content_part = {
+            "type": "file",
+            "file_id": "file_456",
+            "mime_type": "application/pdf",
+        }
+        result = transform_content_part(content_part)
+
+        assert result == {
+            "type": "file",
+            "modality": "document",
+            "mime_type": "application/pdf",
+            "file_id": "file_456",
+        }
+
+    def test_generic_video_base64(self):
+        """Test transforming generic video format"""
+        content_part = {
+            "type": "video",
+            "base64": "AAAA",
+            "mime_type": "video/mp4",
+        }
+        result = transform_content_part(content_part)
+
+        assert result == {
+            "type": "blob",
+            "modality": "video",
+            "mime_type": "video/mp4",
+            "content": "AAAA",
+        }
+
+    # Edge cases and error handling
+    def test_text_block_returns_none(self):
+        """Test that text blocks return None (not transformed)"""
+        content_part = {"type": "text", "text": "Hello world"}
+        result = transform_content_part(content_part)
+
+        assert result is None
+
+    def test_non_dict_returns_none(self):
+        """Test that non-dict input returns None"""
+        assert transform_content_part("string") is None
+        assert transform_content_part(123) is None
+        assert transform_content_part(None) is None
+        assert transform_content_part([1, 2, 3]) is None
+
+    def test_empty_dict_returns_none(self):
+        """Test that empty dict returns None"""
+        assert transform_content_part({}) is None
+
+    def test_unknown_type_returns_none(self):
+        """Test that unknown type returns None"""
+        content_part = {"type": "unknown", "data": "something"}
+        assert transform_content_part(content_part) is None
+
+    def test_openai_image_url_empty_url_returns_none(self):
+        """Test that image_url with empty URL returns None"""
+        content_part = {"type": "image_url", "image_url": {"url": ""}}
+        assert transform_content_part(content_part) is None
+
+    def test_anthropic_invalid_source_returns_none(self):
+        """Test that Anthropic format with invalid source returns None"""
+        content_part = {"type": "image", "source": "not_a_dict"}
+        assert transform_content_part(content_part) is None
+
+    def test_anthropic_unknown_source_type_returns_none(self):
+        """Test that Anthropic format with unknown source type returns None"""
+        content_part = {
+            "type": "image",
+            "source": {"type": "unknown", "data": "something"},
+        }
+        assert transform_content_part(content_part) is None
+
+    def test_google_inline_data_not_dict_returns_none(self):
+        """Test that Google inline_data with non-dict value returns None"""
+        content_part = {"inline_data": "not_a_dict"}
+        assert transform_content_part(content_part) is None
+
+    def test_google_file_data_not_dict_returns_none(self):
+        """Test that Google file_data with non-dict value returns None"""
+        content_part = {"file_data": "not_a_dict"}
+        assert transform_content_part(content_part) is None
+
+
+class TestTransformMessageContent:
+    def test_string_content_returned_as_is(self):
+        """Test that string content is returned unchanged"""
+        content = "Hello, world!"
+        result = transform_message_content(content)
+
+        assert result == "Hello, world!"
+
+    def test_list_with_transformable_items(self):
+        """Test transforming a list with transformable content parts"""
+        content = [
+            {"type": "text", "text": "What's in this image?"},
+            {
+                "type": "image_url",
+                "image_url": {"url": "data:image/jpeg;base64,/9j/4AAQ"},
+            },
+        ]
+        result = transform_message_content(content)
+
+        assert len(result) == 2
+        # Text block should be unchanged (transform returns None, so original kept)
+        assert result[0] == {"type": "text", "text": "What's in this image?"}
+        # Image should be transformed
+        assert result[1] == {
+            "type": "blob",
+            "modality": "image",
+            "mime_type": "image/jpeg",
+            "content": "/9j/4AAQ",
+        }
+
+    def test_list_with_non_dict_items(self):
+        """Test that non-dict items in list are kept as-is"""
+        content = ["text string", 123, {"type": "text", "text": "hi"}]
+        result = transform_message_content(content)
+
+        assert result == ["text string", 123, {"type": "text", "text": "hi"}]
+
+    def test_tuple_content(self):
+        """Test that tuple content is also handled"""
+        content = (
+            {"type": "text", "text": "Hello"},
+            {
+                "type": "image_url",
+                "image_url": {"url": "https://example.com/img.jpg"},
+            },
+        )
+        result = transform_message_content(content)
+
+        assert len(result) == 2
+        assert result[0] == {"type": "text", "text": "Hello"}
+        assert result[1] == {
+            "type": "uri",
+            "modality": "image",
+            "mime_type": "",
+            "uri": "https://example.com/img.jpg",
+        }
+
+    def test_other_types_returned_as_is(self):
+        """Test that other types are returned unchanged"""
+        assert transform_message_content(123) == 123
+        assert transform_message_content(None) is None
+        assert transform_message_content({"key": "value"}) == {"key": "value"}
+
+    def test_mixed_content_types(self):
+        """Test transforming mixed content with multiple formats"""
+        content = [
+            {"type": "text", "text": "Look at these:"},
+            {
+                "type": "image_url",
+                "image_url": {"url": "data:image/png;base64,iVBORw0"},
+            },
+            {
+                "type": "image",
+                "source": {
+                    "type": "base64",
+                    "media_type": "image/jpeg",
+                    "data": "/9j/4AAQ",
+                },
+            },
+            {"inline_data": {"mime_type": "audio/wav", "data": "UklGRiQA"}},
+        ]
+        result = transform_message_content(content)
+
+        assert len(result) == 4
+        assert result[0] == {"type": "text", "text": "Look at these:"}
+        assert result[1] == {
+            "type": "blob",
+            "modality": "image",
+            "mime_type": "image/png",
+            "content": "iVBORw0",
+        }
+        assert result[2] == {
+            "type": "blob",
+            "modality": "image",
+            "mime_type": "image/jpeg",
+            "content": "/9j/4AAQ",
+        }
+        assert result[3] == {
+            "type": "blob",
+            "modality": "audio",
+            "mime_type": "audio/wav",
+            "content": "UklGRiQA",
+        }
+
+    def test_empty_list(self):
+        """Test that empty list is returned as empty list"""
+        assert transform_message_content([]) == []
diff --git a/uv.lock b/uv.lock
new file mode 100644
index 0000000000..bda0207302
--- /dev/null
+++ b/uv.lock
@@ -0,0 +1,3 @@
+version = 1
+revision = 3
+requires-python = ">=3.13"