diff --git a/CHANGELOG.md b/CHANGELOG.md index 91d6f00..bf45ddd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,35 @@ All notable changes to the Sentience Python SDK will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## Unreleased + +### 2026-02-13 + +#### Expanded deterministic verifications (adaptive resnapshotting) + +When you use `.eventually()` for deterministic checks, you can now **automatically increase the snapshot element limit across retries**. This helps on long / virtualized pages where a small snapshot limit can miss the target element, causing a false failure. + +- **AgentRuntime verifications**: `AssertionHandle.eventually(..., snapshot_limit_growth=...)` +- **Expect-style verifications**: `with_eventually(..., snapshot_limit_growth=...)` +- **Commit**: `59125ce19001c457336dccbb3c9463560bd00245` + +**Example** + +```python +from predicate.verification import exists + +# Grow snapshot limit on each retry until the element appears. +await dbg.check(exists("text~'Checkout'"), label="checkout_visible", required=True).eventually( + timeout_s=12, + snapshot_limit_growth={ + "start_limit": 60, + "step": 40, + "max_limit": 220, + "apply_on": "only_on_fail", # default; or "all" + }, +) +``` + ## [0.12.0] - 2025-12-26 ### Added diff --git a/predicate/backends/__init__.py b/predicate/backends/__init__.py index ed6725c..b5a193e 100644 --- a/predicate/backends/__init__.py +++ b/predicate/backends/__init__.py @@ -97,7 +97,13 @@ ) from .playwright_backend import PlaywrightBackend from .protocol import BrowserBackend, LayoutMetrics, ViewportInfo -from .sentience_context import SentienceContext, SentienceContextState, TopElementSelector +from .sentience_context import ( + PredicateContext, + PredicateContextState, + SentienceContext, + SentienceContextState, + TopElementSelector, +) from .snapshot import CachedSnapshot, snapshot __all__ = [ @@ -117,6 +123,9 @@ # SentienceContext (Token-Slasher Context Middleware) "SentienceContext", "SentienceContextState", + # PredicateContext (rebrand alias) + "PredicateContext", + "PredicateContextState", "TopElementSelector", # Backend-agnostic functions "snapshot", diff --git a/predicate/backends/sentience_context.py b/predicate/backends/sentience_context.py index 548c557..d24f8ae 100644 --- a/predicate/backends/sentience_context.py +++ b/predicate/backends/sentience_context.py @@ -6,9 +6,9 @@ Example usage: from browser_use import Agent - from predicate.backends import SentienceContext + from predicate.backends import PredicateContext - ctx = SentienceContext(show_overlay=True) + ctx = PredicateContext(show_overlay=True) state = await ctx.build(agent.browser_session, goal="Click the first Show HN post") if state: agent.add_context(state.prompt_block) # or however browser-use injects state @@ -469,3 +469,12 @@ def _compress_href(self, href: str | None) -> str: pass return "item" + + +# --------------------------------------------------------------------------- +# Predicate-named counterparts (canonical moving forward). +# Keep Sentience* names for backward compatibility. +# --------------------------------------------------------------------------- + +PredicateContext = SentienceContext +PredicateContextState = SentienceContextState diff --git a/predicate/integrations/browser_use/__init__.py b/predicate/integrations/browser_use/__init__.py new file mode 100644 index 0000000..d6340e7 --- /dev/null +++ b/predicate/integrations/browser_use/__init__.py @@ -0,0 +1,47 @@ +""" +Browser Use integration (Predicate plugin). + +This package provides a low-friction integration layer that lets browser-use users +attach Predicate's deterministic verification (AgentRuntime / PredicateDebugger) +to existing Browser Use agent loops via lifecycle hooks and optional tools. + +Public surface is intentionally small and may evolve. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + +if TYPE_CHECKING: # pragma: no cover + from .plugin import ( + PredicateBrowserUsePlugin, + PredicateBrowserUsePluginConfig, + PredicateBrowserUseVerificationError, + StepCheckSpec, + ) + +__all__ = [ + "PredicateBrowserUsePlugin", + "PredicateBrowserUsePluginConfig", + "PredicateBrowserUseVerificationError", + "StepCheckSpec", +] + + +def __getattr__(name: str) -> Any: # pragma: no cover + if name in __all__: + from .plugin import ( # local import keeps linting/packaging robust + PredicateBrowserUsePlugin, + PredicateBrowserUsePluginConfig, + PredicateBrowserUseVerificationError, + StepCheckSpec, + ) + + return { + "PredicateBrowserUsePlugin": PredicateBrowserUsePlugin, + "PredicateBrowserUsePluginConfig": PredicateBrowserUsePluginConfig, + "PredicateBrowserUseVerificationError": PredicateBrowserUseVerificationError, + "StepCheckSpec": StepCheckSpec, + }[name] + raise AttributeError(name) + diff --git a/predicate/integrations/browser_use/plugin.py b/predicate/integrations/browser_use/plugin.py new file mode 100644 index 0000000..a5a1449 --- /dev/null +++ b/predicate/integrations/browser_use/plugin.py @@ -0,0 +1,448 @@ +from __future__ import annotations + +import asyncio +import time +import uuid +from dataclasses import dataclass, field +from collections.abc import Awaitable, Callable +from typing import Any, Literal + +from predicate.agent_runtime import AgentRuntime +from predicate.debugger import SentienceDebugger as PredicateDebugger +from predicate.integrations.models import AssertionResult, BrowserState, ElementSummary +from predicate.models import Snapshot, SnapshotOptions +from predicate.tracing import TraceSink, Tracer +from predicate.verification import Predicate + + +class _NoopTraceSink(TraceSink): + def emit(self, event: dict[str, Any]) -> None: # pragma: no cover + return + + def close(self) -> None: # pragma: no cover + return + + +@dataclass(frozen=True) +class StepCheckSpec: + predicate: Predicate + label: str + required: bool = True + eventually: bool = True + timeout_s: float = 10.0 + poll_s: float = 0.25 + max_snapshot_attempts: int = 3 + min_confidence: float | None = None + + +@dataclass +class PredicateBrowserUsePluginConfig: + # Backend / binding + predicate_api_key: str | None = None + use_api: bool | None = None + wait_for_extension_ms: int = 10_000 + bind_retries: int = 1 + + # Snapshot defaults + snapshot_options: SnapshotOptions = field(default_factory=SnapshotOptions) + + # Hybrid auto behavior + auto_snapshot_each_step: bool = True + auto_checks_each_step: bool = True + auto_checks: list[StepCheckSpec] = field(default_factory=list) + + # Failure policy + on_failure: Literal["raise", "pause", "log"] = "raise" + + # Tracing + tracer: Tracer | None = None + run_id: str | None = None + + +class PredicateBrowserUseVerificationError(RuntimeError): + def __init__(self, message: str, *, results: list[AssertionResult] | None = None): + super().__init__(message) + self.results = results or [] + + +class PredicateBrowserUsePlugin: + """ + Browser Use “plugin” for Predicate deterministic verification. + + Integration surfaces: + - lifecycle hooks: pass `plugin.on_step_start` / `plugin.on_step_end` to `agent.run(...)` + - optional tools: `plugin.register_tools(Tools())` + """ + + def __init__(self, *, config: PredicateBrowserUsePluginConfig | None = None) -> None: + self.config = config or PredicateBrowserUsePluginConfig() + + self._lock = asyncio.Lock() + + self._bound_session: Any | None = None + self.runtime: AgentRuntime | None = None + self.dbg: PredicateDebugger | None = None + + # Best-effort step counter if Browser Use does not expose one + self._step_counter = 0 + + async def bind(self, *, browser_session: Any) -> None: + """ + Bind plugin to a Browser Use BrowserSession. + + Creates CDP backend via BrowserUseAdapter and wires AgentRuntime + PredicateDebugger. + Safe to call multiple times; rebinds if session object changed. + """ + async with self._lock: + if browser_session is None: + raise ValueError("browser_session is required") + + if self._bound_session is browser_session and self.runtime is not None and self.dbg is not None: + return + + # Lazy import so predicate can be imported without browser-use installed. + from predicate.backends import BrowserUseAdapter + + last_err: Exception | None = None + for attempt in range(max(1, int(self.config.bind_retries)) + 1): + try: + adapter = BrowserUseAdapter(browser_session) + backend = await adapter.create_backend() + + tracer = self.config.tracer + if tracer is None: + run_id = self.config.run_id or str(uuid.uuid4()) + tracer = Tracer(run_id=run_id, sink=_NoopTraceSink()) + + # Ensure snapshot options carry credentials and use_api policy. + snap_opts = self._effective_snapshot_options() + self.runtime = AgentRuntime( + backend=backend, + tracer=tracer, + snapshot_options=snap_opts, + predicate_api_key=self.config.predicate_api_key, + ) + self.dbg = PredicateDebugger(runtime=self.runtime, auto_step=True) + + self._bound_session = browser_session + return + except Exception as e: # pragma: no cover (backend-specific) + last_err = e + if attempt >= max(1, int(self.config.bind_retries)) + 1: + break + await asyncio.sleep(0.5 * attempt) + + raise RuntimeError(f"Failed to bind PredicateBrowserUsePlugin: {last_err}") from last_err + + def _effective_snapshot_options(self) -> SnapshotOptions: + base = self.config.snapshot_options + effective = SnapshotOptions(**base.model_dump()) + if self.config.predicate_api_key: + effective.predicate_api_key = self.config.predicate_api_key + effective.sentience_api_key = self.config.predicate_api_key + if effective.use_api is None: + effective.use_api = True + if self.config.use_api is not None: + effective.use_api = bool(self.config.use_api) + return effective + + async def _maybe_get_current_url(self, agent: Any) -> str | None: + session = getattr(agent, "browser_session", None) + if session is None: + return None + fn = getattr(session, "get_current_page_url", None) + if not callable(fn): + return None + try: + v = fn() + return await v if asyncio.iscoroutine(v) else str(v) + except Exception: + return None + + async def _wait_for_extension_ready(self, *, timeout_ms: int) -> None: + """ + Wait until window.sentience.snapshot is available. + """ + assert self.runtime is not None + backend = self.runtime.backend + deadline = time.monotonic() + max(0.0, float(timeout_ms) / 1000.0) + + async def _eval_with_timeout(expr: str, *, timeout_s: float = 2.0) -> Any: + task = asyncio.create_task(backend.eval(expr)) + done, _pending = await asyncio.wait({task}, timeout=timeout_s) + if task not in done: + task.cancel() + return "__EVAL_TIMEOUT__" + try: + return task.result() + except Exception: + return "__EVAL_ERROR__" + + last = None + while time.monotonic() <= deadline: + # Best-effort refresh execution context to avoid stale observations. + try: + reset = getattr(backend, "reset_execution_context", None) + if callable(reset): + reset() + except Exception: + pass + + last = await _eval_with_timeout( + "typeof window.sentience !== 'undefined' && typeof window.sentience.snapshot === 'function'" + ) + if last not in ("__EVAL_TIMEOUT__", "__EVAL_ERROR__", False, None): + return + await asyncio.sleep(0.25) + + raise TimeoutError( + f"Predicate extension not ready after {timeout_ms}ms (last={last})" + ) + + async def on_step_start(self, agent: Any) -> None: + """ + Browser Use lifecycle hook: called at the beginning of each agent step. + """ + session = getattr(agent, "browser_session", None) + if session is None: + raise RuntimeError("Browser Use agent has no `browser_session` attribute") + + await self.bind(browser_session=session) + assert self.runtime is not None + + url = await self._maybe_get_current_url(agent) + task = getattr(agent, "task", None) + goal = str(task) if task is not None else "browser_use_step" + if url: + goal = f"{goal} @ {url}" + + # Keep steps stable even if Browser Use doesn't expose a step index. + self._step_counter += 1 + self.runtime.begin_step(goal=goal, step_index=self.runtime.step_index + 1) + + # Best-effort readiness (avoid flakiness right after navigation). + try: + await self._wait_for_extension_ready(timeout_ms=int(self.config.wait_for_extension_ms)) + except Exception: + # Non-fatal: snapshot() will retry; hook should not deadlock. + pass + + async def wrap_step( + self, + agent: Any, + step_coro: Awaitable[Any] | Callable[[], Awaitable[Any]], + ) -> Any: + """ + Convenience wrapper for Browser Use `agent.step()` flows. + + Browser Use step hooks are wired into `agent.run(...)`, but `agent.step()` does + not accept hook parameters. This helper provides the same behavior: + + - await plugin.on_step_start(agent) + - await agent.step() + - await plugin.on_step_end(agent) + + It guarantees `on_step_end` runs via a `finally` block. + """ + await self.on_step_start(agent) + try: + if callable(step_coro): + return await step_coro() + return await step_coro + finally: + await self.on_step_end(agent) + + async def on_step_end(self, agent: Any) -> None: + """ + Browser Use lifecycle hook: called at the end of each agent step. + """ + if self.runtime is None or self.dbg is None: + # Bind lazily if hook is used standalone. + session = getattr(agent, "browser_session", None) + if session is None: + raise RuntimeError("Browser Use agent has no `browser_session` attribute") + await self.bind(browser_session=session) + + assert self.runtime is not None and self.dbg is not None + + results: list[AssertionResult] = [] + err: Exception | None = None + try: + if self.config.auto_snapshot_each_step: + # Avoid injecting a very long Browser Use task string as the snapshot goal. + # Callers can set `config.snapshot_options.goal` if they want goal-aware ranking. + await self.dbg.snapshot() + + if self.config.auto_checks_each_step and self.config.auto_checks: + for spec in self.config.auto_checks: + try: + h = self.dbg.check(spec.predicate, label=spec.label, required=spec.required) + if spec.eventually: + ok = await h.eventually( + timeout_s=spec.timeout_s, + poll_s=spec.poll_s, + max_snapshot_attempts=spec.max_snapshot_attempts, + min_confidence=spec.min_confidence, + ) + else: + ok = h.once() + results.append( + AssertionResult(passed=bool(ok), reason="", details={"label": spec.label}) + ) + # `.once()` / `.eventually()` return booleans; they do not raise on failure. + # For required checks we treat a `False` result as a hard failure. + if spec.required and not bool(ok): + raise PredicateBrowserUseVerificationError( + f"Required check failed: {spec.label}", + results=results, + ) + except Exception as e: + results.append( + AssertionResult( + passed=False, + reason=str(e), + details={"label": spec.label, "error_type": type(e).__name__}, + ) + ) + raise + except Exception as e: + err = e + finally: + # Always attempt to close the step for trace completeness. + try: + await self.runtime.emit_step_end( + success=(err is None), + error=str(err) if err else None, + ) + except Exception: + pass + + if err is None: + return + + if self.config.on_failure == "log": + return + + if self.config.on_failure == "pause": + pause = getattr(agent, "pause", None) + if callable(pause): + try: + pause() + return + except Exception: + pass + if isinstance(err, PredicateBrowserUseVerificationError): + raise err + raise PredicateBrowserUseVerificationError(str(err), results=results) from err + + # Default: raise + if isinstance(err, PredicateBrowserUseVerificationError): + raise err + raise PredicateBrowserUseVerificationError(str(err), results=results) from err + + # --------------------------------------------------------------------- + # Optional tools integration + # --------------------------------------------------------------------- + + def register_tools(self, tools: Any) -> None: + """ + Register Browser Use tools for explicit deterministic checks. + + This method must be called by user code that constructs `Tools()`. + """ + # Import browser-use types lazily; keep this optional. + try: + import importlib + + browser_use = importlib.import_module("browser_use") + ActionResult = getattr(browser_use, "ActionResult", None) + BrowserSession = getattr(browser_use, "BrowserSession", None) + if ActionResult is None or BrowserSession is None: + raise ImportError("browser_use.ActionResult/BrowserSession not available") + except Exception as e: # pragma: no cover + raise ImportError( + "browser-use is required to register tools. Install with `predicatelabs[browser-use]`." + ) from e + + if tools is None: + raise ValueError("tools is required") + + @tools.action("Predicate: take a snapshot for deterministic verification") + async def predicate_snapshot( # type: ignore + browser_session: Any, # noqa: ARG001 (injected by browser-use) + label: str | None = None, + limit: int | None = None, + screenshot: bool | None = None, + show_overlay: bool | None = None, + ) -> Any: + await self.bind(browser_session=browser_session) + assert self.runtime is not None + opts = self._effective_snapshot_options() + if label: + opts.goal = label + if limit is not None: + opts.limit = int(limit) + if screenshot is not None: + opts.screenshot = bool(screenshot) + if show_overlay is not None: + opts.show_overlay = bool(show_overlay) + snap = await self.runtime.snapshot(**opts.model_dump(exclude_none=True)) + return ActionResult( + extracted_content=f"snapshot_ok url={snap.url} elements={len(snap.elements)}" + ) + + @tools.action("Predicate: deterministic check that URL contains text") + async def predicate_check_url_contains( # type: ignore + text: str, + browser_session: Any, + label: str | None = None, + required: bool = True, + eventually: bool = True, + timeout_s: float = 10.0, + ) -> Any: + from predicate.verification import url_contains + + await self.bind(browser_session=browser_session) + assert self.dbg is not None + lbl = label or f"url_contains:{text}" + h = self.dbg.check(url_contains(text), label=lbl, required=bool(required)) + ok = await h.eventually(timeout_s=float(timeout_s)) if eventually else h.once() + return ActionResult(extracted_content=f"check_ok={ok} label={lbl}") + + @tools.action("Predicate: deterministic check that selector exists") + async def predicate_check_exists( # type: ignore + selector: str, + browser_session: Any, + label: str | None = None, + required: bool = True, + eventually: bool = True, + timeout_s: float = 10.0, + ) -> Any: + from predicate.verification import exists + + await self.bind(browser_session=browser_session) + assert self.dbg is not None + lbl = label or f"exists:{selector}" + h = self.dbg.check(exists(selector), label=lbl, required=bool(required)) + ok = await h.eventually(timeout_s=float(timeout_s)) if eventually else h.once() + return ActionResult(extracted_content=f"check_ok={ok} label={lbl}") + + # --------------------------------------------------------------------- + # Helpers for docs/tests (bounded summaries) + # --------------------------------------------------------------------- + + @staticmethod + def summarize_snapshot(snap: Snapshot, *, max_elements: int = 20) -> BrowserState: + els: list[ElementSummary] = [] + for e in list(getattr(snap, "elements", []) or [])[: max(0, int(max_elements))]: + els.append( + ElementSummary( + id=int(getattr(e, "id", -1)), + role=str(getattr(e, "role", "")), + text=getattr(e, "text", None), + importance=getattr(e, "importance", None), + bbox=getattr(e, "bbox", None), + ) + ) + return BrowserState(url=str(getattr(snap, "url", "")), elements=els) + diff --git a/tests/unit/test_browser_use_plugin.py b/tests/unit/test_browser_use_plugin.py new file mode 100644 index 0000000..1d64fb8 --- /dev/null +++ b/tests/unit/test_browser_use_plugin.py @@ -0,0 +1,135 @@ +import importlib + +import pytest + + +def test_plugin_imports(): + mod = importlib.import_module("predicate.integrations.browser_use.plugin") + assert hasattr(mod, "PredicateBrowserUsePlugin") + + +def test_effective_snapshot_options_sets_api_key_and_use_api(): + from predicate.integrations.browser_use.plugin import ( + PredicateBrowserUsePlugin, + PredicateBrowserUsePluginConfig, + ) + from predicate.models import SnapshotOptions + + plugin = PredicateBrowserUsePlugin( + config=PredicateBrowserUsePluginConfig( + predicate_api_key="sk_test_123", + use_api=None, + snapshot_options=SnapshotOptions(use_api=None), + ) + ) + + # v1: internal helper is the only way to validate merged SnapshotOptions behavior. + # pylint: disable=protected-access + opts = plugin._effective_snapshot_options() + assert opts.predicate_api_key == "sk_test_123" + assert opts.sentience_api_key == "sk_test_123" + assert opts.use_api is True + + +def test_effective_snapshot_options_use_api_override(): + from predicate.integrations.browser_use.plugin import ( + PredicateBrowserUsePlugin, + PredicateBrowserUsePluginConfig, + ) + from predicate.models import SnapshotOptions + + plugin = PredicateBrowserUsePlugin( + config=PredicateBrowserUsePluginConfig( + predicate_api_key="sk_test_123", + use_api=False, + snapshot_options=SnapshotOptions(use_api=True), + ) + ) + + # pylint: disable=protected-access + opts = plugin._effective_snapshot_options() + assert opts.use_api is False + + +def test_register_tools_requires_browser_use(monkeypatch): + from predicate.integrations.browser_use.plugin import PredicateBrowserUsePlugin + + plugin = PredicateBrowserUsePlugin() + + def _fake_import_module(name: str, *args, **kwargs): + if name == "browser_use": + raise ImportError("browser_use not installed") + return importlib.import_module(name, *args, **kwargs) + + monkeypatch.setattr(importlib, "import_module", _fake_import_module) + + with pytest.raises(ImportError): + plugin.register_tools(tools=None) + + +@pytest.mark.asyncio +async def test_wrap_step_calls_hooks_in_order(): + from predicate.integrations.browser_use.plugin import PredicateBrowserUsePlugin + + plugin = PredicateBrowserUsePlugin() + calls: list[str] = [] + + async def _on_start(_agent): + calls.append("start") + + async def _on_end(_agent): + calls.append("end") + + async def _step(): + calls.append("step") + return 123 + + # Monkeypatch instance methods (lightweight behavioral test). + plugin.on_step_start = _on_start # type: ignore[assignment] + plugin.on_step_end = _on_end # type: ignore[assignment] + + out = await plugin.wrap_step(agent=object(), step_coro=_step) + assert out == 123 + assert calls == ["start", "step", "end"] + + +@pytest.mark.asyncio +async def test_on_step_end_raises_on_required_auto_check_false(): + from predicate.integrations.browser_use.plugin import ( + PredicateBrowserUsePlugin, + PredicateBrowserUsePluginConfig, + PredicateBrowserUseVerificationError, + StepCheckSpec, + ) + + class _FakeHandle: + def once(self) -> bool: + return False + + class _FakeDbg: + def check(self, _pred, label: str, required: bool = False): + assert label == "req" + assert required is True + return _FakeHandle() + + async def snapshot(self): + return None + + class _FakeRuntime: + async def emit_step_end(self, **_kw): + return {} + + plugin = PredicateBrowserUsePlugin( + config=PredicateBrowserUsePluginConfig( + auto_snapshot_each_step=False, + auto_checks_each_step=True, + auto_checks=[StepCheckSpec(predicate=lambda _ctx: None, label="req", required=True, eventually=False)], + on_failure="raise", + ) + ) + plugin.runtime = _FakeRuntime() # type: ignore[assignment] + plugin.dbg = _FakeDbg() # type: ignore[assignment] + + with pytest.raises(PredicateBrowserUseVerificationError): + await plugin.on_step_end(agent=object()) +