Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions sentience/agent_runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -612,9 +612,52 @@ def _list(name: str) -> list[str]:
iframe_hits = _list("iframe_src_hits")
url_hits = _list("url_hits")
text_hits = _list("text_hits")
selector_hits = _list("selector_hits")

# If we only saw selector/script hints, treat as non-blocking.
if not iframe_hits and not url_hits and not text_hits:
return False

# Heuristic: many sites include a passive reCAPTCHA badge (v3) that should NOT block.
# We only want to block when there's evidence of an interactive challenge.
hits_all = [*iframe_hits, *url_hits, *text_hits, *selector_hits]
hits_l = [str(x).lower() for x in hits_all if x]

strong_text = any(
k in " ".join(hits_l)
for k in (
"i'm not a robot",
"verify you are human",
"human verification",
"complete the security check",
"please verify",
)
)
strong_iframe = any(
any(k in h for k in ("api2/bframe", "hcaptcha", "turnstile"))
for h in hits_l
)
strong_selector = any(
any(
k in h
for k in (
"g-recaptcha-response",
"h-captcha-response",
"cf-turnstile-response",
"recaptcha-checkbox",
"hcaptcha-checkbox",
)
)
for h in hits_l
)
only_generic = (
not strong_text
and not strong_iframe
and not strong_selector
and all(("captcha" in h or "recaptcha" in h) for h in hits_l)
)
if only_generic:
return False
confidence = getattr(captcha, "confidence", 0.0)
return confidence >= self._captcha_options.min_confidence

Expand Down
5 changes: 4 additions & 1 deletion sentience/backends/snapshot.py
Original file line number Diff line number Diff line change
Expand Up @@ -595,7 +595,10 @@ async def _snapshot_via_api(

try:
api_result = await _post_snapshot_to_gateway_async(
payload, options.sentience_api_key, api_url
payload,
options.sentience_api_key,
api_url,
timeout_s=options.gateway_timeout_s,
)

# Merge API result with local data (screenshot, etc.)
Expand Down
1 change: 1 addition & 0 deletions sentience/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -776,6 +776,7 @@ class SnapshotOptions(BaseModel):
limit: int = Field(50, ge=1, le=500)
filter: SnapshotFilter | None = None
use_api: bool | None = None # Force API vs extension
gateway_timeout_s: float | None = None # Gateway snapshot timeout (seconds)
save_trace: bool = False # Save raw_elements to JSON for benchmarking/training
trace_path: str | None = None # Path to save trace (default: "trace_{timestamp}.json")
goal: str | None = None # Optional goal/task description for the snapshot
Expand Down
32 changes: 28 additions & 4 deletions sentience/snapshot.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,13 @@ def from_httpx(cls, e: Exception) -> "SnapshotGatewayError":
bits.append(f"err_type={type(e).__name__}")
if err_s:
bits.append(f"err={err_s}")
else:
# Some transport errors (e.g. httpx.ReadError) can stringify to "".
# Include repr() so callers can still see the exception type/shape.
try:
bits.append(f"err_repr={cls._snip(repr(e), 220)}")
except Exception:
pass
if bits:
msg = f"{msg}: " + " ".join(bits)
msg = msg + ". Try using use_api=False to use local extension instead."
Expand Down Expand Up @@ -162,6 +169,11 @@ def from_requests(cls, e: Exception) -> "SnapshotGatewayError":
bits.append(f"err_type={type(e).__name__}")
if err_s:
bits.append(f"err={err_s}")
else:
try:
bits.append(f"err_repr={cls._snip(repr(e), 220)}")
except Exception:
pass
if bits:
msg = f"{msg}: " + " ".join(bits)
msg = msg + ". Try using use_api=False to use local extension instead."
Expand Down Expand Up @@ -311,6 +323,8 @@ def _post_snapshot_to_gateway_sync(
payload: dict[str, Any],
api_key: str,
api_url: str = SENTIENCE_API_URL,
*,
timeout_s: float | None = None,
) -> dict[str, Any]:
"""
Post snapshot payload to gateway (synchronous).
Expand All @@ -326,11 +340,12 @@ def _post_snapshot_to_gateway_sync(
}

try:
timeout = 30 if timeout_s is None else float(timeout_s)
response = requests.post(
f"{api_url}/v1/snapshot",
data=payload_json,
headers=headers,
timeout=30,
timeout=timeout,
)
response.raise_for_status()
return response.json()
Expand All @@ -345,6 +360,8 @@ async def _post_snapshot_to_gateway_async(
payload: dict[str, Any],
api_key: str,
api_url: str = SENTIENCE_API_URL,
*,
timeout_s: float | None = None,
) -> dict[str, Any]:
"""
Post snapshot payload to gateway (asynchronous).
Expand All @@ -362,7 +379,8 @@ async def _post_snapshot_to_gateway_async(
"Content-Type": "application/json",
}

async with httpx.AsyncClient(timeout=30.0) as client:
timeout = 30.0 if timeout_s is None else float(timeout_s)
async with httpx.AsyncClient(timeout=timeout) as client:
try:
response = await client.post(
f"{api_url}/v1/snapshot",
Expand Down Expand Up @@ -604,7 +622,12 @@ def _snapshot_via_api(
payload = _build_snapshot_payload(raw_result, options)

try:
api_result = _post_snapshot_to_gateway_sync(payload, api_key, api_url)
api_result = _post_snapshot_to_gateway_sync(
payload,
api_key,
api_url,
timeout_s=options.gateway_timeout_s,
)

# Merge API result with local data (screenshot, etc.)
snapshot_data = _merge_api_result_with_local(api_result, raw_result)
Expand Down Expand Up @@ -923,7 +946,8 @@ async def _snapshot_via_api_async(
# Lazy import httpx - only needed for async API calls
import httpx

async with httpx.AsyncClient(timeout=30.0) as client:
timeout = 30.0 if options.gateway_timeout_s is None else float(options.gateway_timeout_s)
async with httpx.AsyncClient(timeout=timeout) as client:
response = await client.post(
f"{api_url}/v1/snapshot",
content=payload_json,
Expand Down
108 changes: 108 additions & 0 deletions tests/test_snapshot_gateway_timeout.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
import asyncio
import importlib
import sys

snapshot_module = importlib.import_module("sentience.snapshot")
from sentience.snapshot import _post_snapshot_to_gateway_async, _post_snapshot_to_gateway_sync


class _DummyResponse:
def raise_for_status(self):
return None

def json(self):
return {"status": "success", "elements": [], "url": "https://example.com"}


def test_post_snapshot_async_uses_default_timeout(monkeypatch):
class DummyClient:
last_timeout = None

def __init__(self, timeout):
DummyClient.last_timeout = timeout

async def __aenter__(self):
return self

async def __aexit__(self, exc_type, exc, tb):
return None

async def post(self, *args, **kwargs):
return _DummyResponse()

dummy_httpx = type("DummyHttpx", (), {"AsyncClient": DummyClient})
monkeypatch.setitem(sys.modules, "httpx", dummy_httpx)
asyncio.run(
_post_snapshot_to_gateway_async(
{"raw_elements": [], "url": "https://example.com", "viewport": None, "goal": None, "options": {}},
"sk_test",
"https://api.sentienceapi.com",
)
)
assert DummyClient.last_timeout == 30.0


def test_post_snapshot_async_uses_custom_timeout(monkeypatch):
class DummyClient:
last_timeout = None

def __init__(self, timeout):
DummyClient.last_timeout = timeout

async def __aenter__(self):
return self

async def __aexit__(self, exc_type, exc, tb):
return None

async def post(self, *args, **kwargs):
return _DummyResponse()

dummy_httpx = type("DummyHttpx", (), {"AsyncClient": DummyClient})
monkeypatch.setitem(sys.modules, "httpx", dummy_httpx)
asyncio.run(
_post_snapshot_to_gateway_async(
{"raw_elements": [], "url": "https://example.com", "viewport": None, "goal": None, "options": {}},
"sk_test",
"https://api.sentienceapi.com",
timeout_s=12.5,
)
)
assert DummyClient.last_timeout == 12.5


def test_post_snapshot_sync_uses_default_timeout(monkeypatch):
class DummyRequests:
last_timeout = None

@staticmethod
def post(*args, **kwargs):
DummyRequests.last_timeout = kwargs.get("timeout")
return _DummyResponse()

monkeypatch.setattr(snapshot_module, "requests", DummyRequests)
_post_snapshot_to_gateway_sync(
{"raw_elements": [], "url": "https://example.com", "viewport": None, "goal": None, "options": {}},
"sk_test",
"https://api.sentienceapi.com",
)
assert DummyRequests.last_timeout == 30


def test_post_snapshot_sync_uses_custom_timeout(monkeypatch):
class DummyRequests:
last_timeout = None

@staticmethod
def post(*args, **kwargs):
DummyRequests.last_timeout = kwargs.get("timeout")
return _DummyResponse()

monkeypatch.setattr(snapshot_module, "requests", DummyRequests)
_post_snapshot_to_gateway_sync(
{"raw_elements": [], "url": "https://example.com", "viewport": None, "goal": None, "options": {}},
"sk_test",
"https://api.sentienceapi.com",
timeout_s=9.0,
)
assert DummyRequests.last_timeout == 9.0
5 changes: 5 additions & 0 deletions traces/test-run.jsonl
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{"v": 1, "type": "run_start", "ts": "2026-02-05T06:20:59.000Z", "run_id": "test-run", "seq": 1, "data": {"agent": "SentienceAgent"}, "ts_ms": 1770272459843}
{"v": 1, "type": "run_start", "ts": "2026-02-05T06:20:59.000Z", "run_id": "test-run", "seq": 1, "data": {"agent": "SentienceAgent"}, "ts_ms": 1770272459845}
{"v": 1, "type": "run_start", "ts": "2026-02-05T06:20:59.000Z", "run_id": "test-run", "seq": 1, "data": {"agent": "SentienceAgent"}, "ts_ms": 1770272459846}
{"v": 1, "type": "run_start", "ts": "2026-02-05T06:20:59.000Z", "run_id": "test-run", "seq": 1, "data": {"agent": "SentienceAgent"}, "ts_ms": 1770272459848}
{"v": 1, "type": "run_start", "ts": "2026-02-05T06:20:59.000Z", "run_id": "test-run", "seq": 1, "data": {"agent": "SentienceAgent"}, "ts_ms": 1770272459855}
Loading