Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions sentience/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,8 @@
BBox,
Cookie,
Element,
LLMStepData,
LLMUsage,
LocalStorageItem,
OriginStorage,
ScreenshotConfig,
Expand Down Expand Up @@ -255,6 +257,8 @@
"TokenStats",
"ActionHistory",
"ActionTokenUsage",
"LLMStepData",
"LLMUsage",
"SnapshotOptions",
"SnapshotFilter",
"ScreenshotConfig",
Expand Down
29 changes: 28 additions & 1 deletion sentience/agent_runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@
from .models import (
EvaluateJsRequest,
EvaluateJsResult,
LLMStepData,
Snapshot,
SnapshotOptions,
TabInfo,
Expand Down Expand Up @@ -792,9 +793,26 @@ async def emit_step_end(
verify_signals: dict[str, Any] | None = None,
post_url: str | None = None,
post_snapshot_digest: str | None = None,
llm_data: dict[str, Any] | LLMStepData | None = None,
) -> dict[str, Any]:
"""
Emit a step_end event using TraceEventBuilder.

Args:
action: Action name/type executed in this step
success: Whether the action execution succeeded
error: Error message if action failed
outcome: Outcome description of the action
duration_ms: Duration of action execution in milliseconds
attempt: Attempt number (0-based)
verify_passed: Whether verification passed
verify_signals: Additional verification signals
post_url: URL after action execution
post_snapshot_digest: Digest of post-action snapshot
llm_data: LLM interaction data for this step. Can be:
- LLMStepData: Structured model with response_text, response_hash, usage, model
- dict: Raw dict with response_text, response_hash, usage keys
- None: No LLM data (defaults to empty dict)
"""
goal = self._step_goal or ""
pre_snap = self._step_pre_snapshot or self.last_snapshot
Expand Down Expand Up @@ -850,6 +868,15 @@ async def emit_step_end(
"signals": signals,
}

# Convert LLMStepData to dict if needed
llm_data_dict: dict[str, Any]
if llm_data is None:
llm_data_dict = {}
elif isinstance(llm_data, LLMStepData):
llm_data_dict = llm_data.to_trace_dict()
else:
llm_data_dict = llm_data

step_end_data = TraceEventBuilder.build_step_end_event(
step_id=self.step_id or "",
step_index=int(self.step_index),
Expand All @@ -858,7 +885,7 @@ async def emit_step_end(
pre_url=str(pre_url or ""),
post_url=str(post_url or ""),
snapshot_digest=pre_digest,
llm_data={},
llm_data=llm_data_dict,
exec_data=exec_data,
verify_data=verify_data,
pre_elements=None,
Expand Down
56 changes: 56 additions & 0 deletions sentience/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -836,6 +836,62 @@ class ActionTokenUsage(BaseModel):
model: str


class LLMUsage(BaseModel):
"""Token usage for a single LLM call"""

prompt_tokens: int = 0
completion_tokens: int = 0
total_tokens: int = 0


class LLMStepData(BaseModel):
"""
LLM interaction data for a single step in agent traces.

This structure is used in step_end trace events to capture LLM response
details for debugging and analysis in Sentience Studio.
"""

response_text: str | None = Field(
None,
description="The LLM's response text for this step",
)
response_hash: str | None = Field(
None,
description="SHA256 hash of response_text for deduplication/indexing",
)
usage: LLMUsage | None = Field(
None,
description="Token usage statistics for this LLM call",
)
model: str | None = Field(
None,
description="Model identifier used for this call (e.g., 'gpt-4o', 'claude-3-5-sonnet')",
)

def to_trace_dict(self) -> dict[str, Any]:
"""
Convert to dictionary format expected by TraceEventBuilder.

Returns:
Dict with response_text, response_hash, and usage fields
"""
result: dict[str, Any] = {}
if self.response_text is not None:
result["response_text"] = self.response_text
if self.response_hash is not None:
result["response_hash"] = self.response_hash
if self.usage is not None:
result["usage"] = {
"prompt_tokens": self.usage.prompt_tokens,
"completion_tokens": self.usage.completion_tokens,
"total_tokens": self.usage.total_tokens,
}
if self.model is not None:
result["model"] = self.model
return result


class TokenStats(BaseModel):
"""Token usage statistics for an agent session"""

Expand Down
Loading