PredicateSystems · rcholic · Feb 1, 2026 · Feb 1, 2026
diff --git a/sentience/__init__.py b/sentience/__init__.py
@@ -85,6 +85,8 @@
     BBox,
     Cookie,
     Element,
+    LLMStepData,
+    LLMUsage,
     LocalStorageItem,
     OriginStorage,
     ScreenshotConfig,
@@ -255,6 +257,8 @@
     "TokenStats",
     "ActionHistory",
     "ActionTokenUsage",
+    "LLMStepData",
+    "LLMUsage",
     "SnapshotOptions",
     "SnapshotFilter",
     "ScreenshotConfig",

diff --git a/sentience/agent_runtime.py b/sentience/agent_runtime.py
@@ -81,6 +81,7 @@
 from .models import (
     EvaluateJsRequest,
     EvaluateJsResult,
+    LLMStepData,
     Snapshot,
     SnapshotOptions,
     TabInfo,
@@ -792,9 +793,26 @@ async def emit_step_end(
         verify_signals: dict[str, Any] | None = None,
         post_url: str | None = None,
         post_snapshot_digest: str | None = None,
+        llm_data: dict[str, Any] | LLMStepData | None = None,
     ) -> dict[str, Any]:
         """
         Emit a step_end event using TraceEventBuilder.
+
+        Args:
+            action: Action name/type executed in this step
+            success: Whether the action execution succeeded
+            error: Error message if action failed
+            outcome: Outcome description of the action
+            duration_ms: Duration of action execution in milliseconds
+            attempt: Attempt number (0-based)
+            verify_passed: Whether verification passed
+            verify_signals: Additional verification signals
+            post_url: URL after action execution
+            post_snapshot_digest: Digest of post-action snapshot
+            llm_data: LLM interaction data for this step. Can be:
+                - LLMStepData: Structured model with response_text, response_hash, usage, model
+                - dict: Raw dict with response_text, response_hash, usage keys
+                - None: No LLM data (defaults to empty dict)
         """
         goal = self._step_goal or ""
         pre_snap = self._step_pre_snapshot or self.last_snapshot
@@ -850,6 +868,15 @@ async def emit_step_end(
             "signals": signals,
         }
 
+        # Convert LLMStepData to dict if needed
+        llm_data_dict: dict[str, Any]
+        if llm_data is None:
+            llm_data_dict = {}
+        elif isinstance(llm_data, LLMStepData):
+            llm_data_dict = llm_data.to_trace_dict()
+        else:
+            llm_data_dict = llm_data
+
         step_end_data = TraceEventBuilder.build_step_end_event(
             step_id=self.step_id or "",
             step_index=int(self.step_index),
@@ -858,7 +885,7 @@ async def emit_step_end(
             pre_url=str(pre_url or ""),
             post_url=str(post_url or ""),
             snapshot_digest=pre_digest,
-            llm_data={},
+            llm_data=llm_data_dict,
             exec_data=exec_data,
             verify_data=verify_data,
             pre_elements=None,

diff --git a/sentience/models.py b/sentience/models.py
@@ -836,6 +836,62 @@ class ActionTokenUsage(BaseModel):
     model: str
 
 
+class LLMUsage(BaseModel):
+    """Token usage for a single LLM call"""
+
+    prompt_tokens: int = 0
+    completion_tokens: int = 0
+    total_tokens: int = 0
+
+
+class LLMStepData(BaseModel):
+    """
+    LLM interaction data for a single step in agent traces.
+
+    This structure is used in step_end trace events to capture LLM response
+    details for debugging and analysis in Sentience Studio.
+    """
+
+    response_text: str | None = Field(
+        None,
+        description="The LLM's response text for this step",
+    )
+    response_hash: str | None = Field(
+        None,
+        description="SHA256 hash of response_text for deduplication/indexing",
+    )
+    usage: LLMUsage | None = Field(
+        None,
+        description="Token usage statistics for this LLM call",
+    )
+    model: str | None = Field(
+        None,
+        description="Model identifier used for this call (e.g., 'gpt-4o', 'claude-3-5-sonnet')",
+    )
+
+    def to_trace_dict(self) -> dict[str, Any]:
+        """
+        Convert to dictionary format expected by TraceEventBuilder.
+
+        Returns:
+            Dict with response_text, response_hash, and usage fields
+        """
+        result: dict[str, Any] = {}
+        if self.response_text is not None:
+            result["response_text"] = self.response_text
+        if self.response_hash is not None:
+            result["response_hash"] = self.response_hash
+        if self.usage is not None:
+            result["usage"] = {
+                "prompt_tokens": self.usage.prompt_tokens,
+                "completion_tokens": self.usage.completion_tokens,
+                "total_tokens": self.usage.total_tokens,
+            }
+        if self.model is not None:
+            result["model"] = self.model
+        return result
+
+
 class TokenStats(BaseModel):
     """Token usage statistics for an agent session"""