From c6e9543bbdb615c4a7924335ab0a819956ed92b3 Mon Sep 17 00:00:00 2001
From: SentienceDEV <dev@sentienceapi.com>
Date: Wed, 4 Feb 2026 22:25:09 -0800
Subject: [PATCH] timeout param + passive captcha handling

---
 sentience/agent_runtime.py             |  43 ++++++++++
 sentience/backends/snapshot.py         |   5 +-
 sentience/models.py                    |   1 +
 sentience/snapshot.py                  |  32 +++++++-
 tests/test_snapshot_gateway_timeout.py | 108 +++++++++++++++++++++++++
 traces/test-run.jsonl                  |   5 ++
 6 files changed, 189 insertions(+), 5 deletions(-)
 create mode 100644 tests/test_snapshot_gateway_timeout.py

diff --git a/sentience/agent_runtime.py b/sentience/agent_runtime.py
index b7cb4a3..a48fc3c 100644
--- a/sentience/agent_runtime.py
+++ b/sentience/agent_runtime.py
@@ -612,9 +612,52 @@ def _list(name: str) -> list[str]:
             iframe_hits = _list("iframe_src_hits")
             url_hits = _list("url_hits")
             text_hits = _list("text_hits")
+            selector_hits = _list("selector_hits")
+
             # If we only saw selector/script hints, treat as non-blocking.
             if not iframe_hits and not url_hits and not text_hits:
                 return False
+
+            # Heuristic: many sites include a passive reCAPTCHA badge (v3) that should NOT block.
+            # We only want to block when there's evidence of an interactive challenge.
+            hits_all = [*iframe_hits, *url_hits, *text_hits, *selector_hits]
+            hits_l = [str(x).lower() for x in hits_all if x]
+
+            strong_text = any(
+                k in " ".join(hits_l)
+                for k in (
+                    "i'm not a robot",
+                    "verify you are human",
+                    "human verification",
+                    "complete the security check",
+                    "please verify",
+                )
+            )
+            strong_iframe = any(
+                any(k in h for k in ("api2/bframe", "hcaptcha", "turnstile"))
+                for h in hits_l
+            )
+            strong_selector = any(
+                any(
+                    k in h
+                    for k in (
+                        "g-recaptcha-response",
+                        "h-captcha-response",
+                        "cf-turnstile-response",
+                        "recaptcha-checkbox",
+                        "hcaptcha-checkbox",
+                    )
+                )
+                for h in hits_l
+            )
+            only_generic = (
+                not strong_text
+                and not strong_iframe
+                and not strong_selector
+                and all(("captcha" in h or "recaptcha" in h) for h in hits_l)
+            )
+            if only_generic:
+                return False
         confidence = getattr(captcha, "confidence", 0.0)
         return confidence >= self._captcha_options.min_confidence
 
diff --git a/sentience/backends/snapshot.py b/sentience/backends/snapshot.py
index b09b1cb..f81b363 100644
--- a/sentience/backends/snapshot.py
+++ b/sentience/backends/snapshot.py
@@ -595,7 +595,10 @@ async def _snapshot_via_api(
 
     try:
         api_result = await _post_snapshot_to_gateway_async(
-            payload, options.sentience_api_key, api_url
+            payload,
+            options.sentience_api_key,
+            api_url,
+            timeout_s=options.gateway_timeout_s,
         )
 
         # Merge API result with local data (screenshot, etc.)
diff --git a/sentience/models.py b/sentience/models.py
index f1d9a6e..07c062c 100644
--- a/sentience/models.py
+++ b/sentience/models.py
@@ -776,6 +776,7 @@ class SnapshotOptions(BaseModel):
     limit: int = Field(50, ge=1, le=500)
     filter: SnapshotFilter | None = None
     use_api: bool | None = None  # Force API vs extension
+    gateway_timeout_s: float | None = None  # Gateway snapshot timeout (seconds)
     save_trace: bool = False  # Save raw_elements to JSON for benchmarking/training
     trace_path: str | None = None  # Path to save trace (default: "trace_{timestamp}.json")
     goal: str | None = None  # Optional goal/task description for the snapshot
diff --git a/sentience/snapshot.py b/sentience/snapshot.py
index e0393fd..73e334c 100644
--- a/sentience/snapshot.py
+++ b/sentience/snapshot.py
@@ -106,6 +106,13 @@ def from_httpx(cls, e: Exception) -> "SnapshotGatewayError":
             bits.append(f"err_type={type(e).__name__}")
             if err_s:
                 bits.append(f"err={err_s}")
+            else:
+                # Some transport errors (e.g. httpx.ReadError) can stringify to "".
+                # Include repr() so callers can still see the exception type/shape.
+                try:
+                    bits.append(f"err_repr={cls._snip(repr(e), 220)}")
+                except Exception:
+                    pass
         if bits:
             msg = f"{msg}: " + " ".join(bits)
         msg = msg + ". Try using use_api=False to use local extension instead."
@@ -162,6 +169,11 @@ def from_requests(cls, e: Exception) -> "SnapshotGatewayError":
             bits.append(f"err_type={type(e).__name__}")
             if err_s:
                 bits.append(f"err={err_s}")
+            else:
+                try:
+                    bits.append(f"err_repr={cls._snip(repr(e), 220)}")
+                except Exception:
+                    pass
         if bits:
             msg = f"{msg}: " + " ".join(bits)
         msg = msg + ". Try using use_api=False to use local extension instead."
@@ -311,6 +323,8 @@ def _post_snapshot_to_gateway_sync(
     payload: dict[str, Any],
     api_key: str,
     api_url: str = SENTIENCE_API_URL,
+    *,
+    timeout_s: float | None = None,
 ) -> dict[str, Any]:
     """
     Post snapshot payload to gateway (synchronous).
@@ -326,11 +340,12 @@ def _post_snapshot_to_gateway_sync(
     }
 
     try:
+        timeout = 30 if timeout_s is None else float(timeout_s)
         response = requests.post(
             f"{api_url}/v1/snapshot",
             data=payload_json,
             headers=headers,
-            timeout=30,
+            timeout=timeout,
         )
         response.raise_for_status()
         return response.json()
@@ -345,6 +360,8 @@ async def _post_snapshot_to_gateway_async(
     payload: dict[str, Any],
     api_key: str,
     api_url: str = SENTIENCE_API_URL,
+    *,
+    timeout_s: float | None = None,
 ) -> dict[str, Any]:
     """
     Post snapshot payload to gateway (asynchronous).
@@ -362,7 +379,8 @@ async def _post_snapshot_to_gateway_async(
         "Content-Type": "application/json",
     }
 
-    async with httpx.AsyncClient(timeout=30.0) as client:
+    timeout = 30.0 if timeout_s is None else float(timeout_s)
+    async with httpx.AsyncClient(timeout=timeout) as client:
         try:
             response = await client.post(
                 f"{api_url}/v1/snapshot",
@@ -604,7 +622,12 @@ def _snapshot_via_api(
     payload = _build_snapshot_payload(raw_result, options)
 
     try:
-        api_result = _post_snapshot_to_gateway_sync(payload, api_key, api_url)
+        api_result = _post_snapshot_to_gateway_sync(
+            payload,
+            api_key,
+            api_url,
+            timeout_s=options.gateway_timeout_s,
+        )
 
         # Merge API result with local data (screenshot, etc.)
         snapshot_data = _merge_api_result_with_local(api_result, raw_result)
@@ -923,7 +946,8 @@ async def _snapshot_via_api_async(
         # Lazy import httpx - only needed for async API calls
         import httpx
 
-        async with httpx.AsyncClient(timeout=30.0) as client:
+        timeout = 30.0 if options.gateway_timeout_s is None else float(options.gateway_timeout_s)
+        async with httpx.AsyncClient(timeout=timeout) as client:
             response = await client.post(
                 f"{api_url}/v1/snapshot",
                 content=payload_json,
diff --git a/tests/test_snapshot_gateway_timeout.py b/tests/test_snapshot_gateway_timeout.py
new file mode 100644
index 0000000..d115878
--- /dev/null
+++ b/tests/test_snapshot_gateway_timeout.py
@@ -0,0 +1,108 @@
+import asyncio
+import importlib
+import sys
+
+snapshot_module = importlib.import_module("sentience.snapshot")
+from sentience.snapshot import _post_snapshot_to_gateway_async, _post_snapshot_to_gateway_sync
+
+
+class _DummyResponse:
+    def raise_for_status(self):
+        return None
+
+    def json(self):
+        return {"status": "success", "elements": [], "url": "https://example.com"}
+
+
+def test_post_snapshot_async_uses_default_timeout(monkeypatch):
+    class DummyClient:
+        last_timeout = None
+
+        def __init__(self, timeout):
+            DummyClient.last_timeout = timeout
+
+        async def __aenter__(self):
+            return self
+
+        async def __aexit__(self, exc_type, exc, tb):
+            return None
+
+        async def post(self, *args, **kwargs):
+            return _DummyResponse()
+
+    dummy_httpx = type("DummyHttpx", (), {"AsyncClient": DummyClient})
+    monkeypatch.setitem(sys.modules, "httpx", dummy_httpx)
+    asyncio.run(
+        _post_snapshot_to_gateway_async(
+            {"raw_elements": [], "url": "https://example.com", "viewport": None, "goal": None, "options": {}},
+            "sk_test",
+            "https://api.sentienceapi.com",
+        )
+    )
+    assert DummyClient.last_timeout == 30.0
+
+
+def test_post_snapshot_async_uses_custom_timeout(monkeypatch):
+    class DummyClient:
+        last_timeout = None
+
+        def __init__(self, timeout):
+            DummyClient.last_timeout = timeout
+
+        async def __aenter__(self):
+            return self
+
+        async def __aexit__(self, exc_type, exc, tb):
+            return None
+
+        async def post(self, *args, **kwargs):
+            return _DummyResponse()
+
+    dummy_httpx = type("DummyHttpx", (), {"AsyncClient": DummyClient})
+    monkeypatch.setitem(sys.modules, "httpx", dummy_httpx)
+    asyncio.run(
+        _post_snapshot_to_gateway_async(
+            {"raw_elements": [], "url": "https://example.com", "viewport": None, "goal": None, "options": {}},
+            "sk_test",
+            "https://api.sentienceapi.com",
+            timeout_s=12.5,
+        )
+    )
+    assert DummyClient.last_timeout == 12.5
+
+
+def test_post_snapshot_sync_uses_default_timeout(monkeypatch):
+    class DummyRequests:
+        last_timeout = None
+
+        @staticmethod
+        def post(*args, **kwargs):
+            DummyRequests.last_timeout = kwargs.get("timeout")
+            return _DummyResponse()
+
+    monkeypatch.setattr(snapshot_module, "requests", DummyRequests)
+    _post_snapshot_to_gateway_sync(
+        {"raw_elements": [], "url": "https://example.com", "viewport": None, "goal": None, "options": {}},
+        "sk_test",
+        "https://api.sentienceapi.com",
+    )
+    assert DummyRequests.last_timeout == 30
+
+
+def test_post_snapshot_sync_uses_custom_timeout(monkeypatch):
+    class DummyRequests:
+        last_timeout = None
+
+        @staticmethod
+        def post(*args, **kwargs):
+            DummyRequests.last_timeout = kwargs.get("timeout")
+            return _DummyResponse()
+
+    monkeypatch.setattr(snapshot_module, "requests", DummyRequests)
+    _post_snapshot_to_gateway_sync(
+        {"raw_elements": [], "url": "https://example.com", "viewport": None, "goal": None, "options": {}},
+        "sk_test",
+        "https://api.sentienceapi.com",
+        timeout_s=9.0,
+    )
+    assert DummyRequests.last_timeout == 9.0
diff --git a/traces/test-run.jsonl b/traces/test-run.jsonl
index e69de29..e1982f7 100644
--- a/traces/test-run.jsonl
+++ b/traces/test-run.jsonl
@@ -0,0 +1,5 @@
+{"v": 1, "type": "run_start", "ts": "2026-02-05T06:20:59.000Z", "run_id": "test-run", "seq": 1, "data": {"agent": "SentienceAgent"}, "ts_ms": 1770272459843}
+{"v": 1, "type": "run_start", "ts": "2026-02-05T06:20:59.000Z", "run_id": "test-run", "seq": 1, "data": {"agent": "SentienceAgent"}, "ts_ms": 1770272459845}
+{"v": 1, "type": "run_start", "ts": "2026-02-05T06:20:59.000Z", "run_id": "test-run", "seq": 1, "data": {"agent": "SentienceAgent"}, "ts_ms": 1770272459846}
+{"v": 1, "type": "run_start", "ts": "2026-02-05T06:20:59.000Z", "run_id": "test-run", "seq": 1, "data": {"agent": "SentienceAgent"}, "ts_ms": 1770272459848}
+{"v": 1, "type": "run_start", "ts": "2026-02-05T06:20:59.000Z", "run_id": "test-run", "seq": 1, "data": {"agent": "SentienceAgent"}, "ts_ms": 1770272459855}