chrome-service: switch to CDP + persistent profile + hourly snapshot pipeline
The chrome-service stack ran `playwright launch-server`, which creates
ephemeral browser contexts per `connect()`. Despite the encrypted PVC
mounted at /profile, no chromium user-data ever persisted — only npm
cache + fontconfig. Logging in via noVNC was effectively a no-op.
Refactor:
- Replace launch-server with direct chromium (TCP CDP on :9223 internal),
fronted by a Python HTTP+WS bridge on :9222 that rewrites the Host
header to bypass Chrome's hardcoded DNS-rebinding protection (no
`--remote-allow-hosts` flag exists in stock Chrome 130; verified by
binary string grep). Bridge also forces Connection: close on HTTP
responses so Node ws opens a fresh TCP for the WS upgrade rather than
trying to reuse the dead keep-alive socket.
- Add `--user-data-dir=/profile/chromium-data` so cookies/localStorage
actually persist on the encrypted PVC.
- New snapshot-server sidecar (stdlib python HTTP) serves
GET /api/snapshot at chrome.viktorbarzin.me/api/snapshot,
bearer-token-gated by the existing api_bearer_token.
- New chrome-service-snapshot-harvester CronJob (hourly) connects via
CDP, dumps storage_state() (cookies + localStorage), writes atomically
to /profile/snapshots/storage-state.json.
- NetworkPolicy: TCP/9222 (was :3000), TCP/8088 added for traefik.
Caller migration:
- f1-stream: `chromium.connect(ws_url)` → `chromium.connect_over_cdp(cdp_url)`,
env var CHROME_WS_URL → CHROME_CDP_URL. CHROME_WS_TOKEN dropped (no
longer used by code; ExternalSecret kept for symmetry with the snapshot
endpoint).
Dev-box side (out of scope for this commit — see ~/.config/systemd/user/):
- playwright-mcp.service flips to `--isolated --storage-state=...`
so per-Claude-Code-session ephemeral contexts seed from the snapshot.
- playwright-snapshot-refresh.{service,timer} (hourly) pulls the
snapshot via the bearer-gated HTTPS endpoint.
Docs updated:
- docs/architecture/chrome-service.md — new architecture diagram + wire protocol.
- docs/runbooks/chrome-service-snapshot.md — day-2 ops (refresh, rotation,
failure modes, restore).
- stacks/chrome-service/README.md — connect_over_cdp recipe.
Design spec at docs/superpowers/specs/2026-06-04-playwright-per-session-browser-design.md.
This commit is contained in:
parent
b64d8d6168
commit
deede6dd11
10 changed files with 1152 additions and 177 deletions
|
|
@ -86,12 +86,16 @@ def _looks_like_hls_playlist(url: str) -> bool:
|
|||
return bool(_HLS_URL_RE.search(url))
|
||||
|
||||
|
||||
def _resolve_chrome_ws() -> str | None:
|
||||
base = os.getenv("CHROME_WS_URL")
|
||||
token = os.getenv("CHROME_WS_TOKEN")
|
||||
if not base or not token:
|
||||
return None
|
||||
return f"{base.rstrip('/')}/{token}"
|
||||
def _resolve_chrome_cdp() -> str | None:
|
||||
"""Resolve the CHROME_CDP_URL env var (set by f1-stream's TF stack).
|
||||
|
||||
Migrated 2026-06-04 from CHROME_WS_URL/CHROME_WS_TOKEN. chrome-service
|
||||
now runs chromium directly with CDP exposed on :9222 so its persistent
|
||||
user-data-dir actually persists cookies (the old playwright launch-server
|
||||
pattern created ephemeral contexts per `connect()`). NetworkPolicy
|
||||
(labelled client namespaces only) is the only gate — no path token.
|
||||
"""
|
||||
return os.getenv("CHROME_CDP_URL")
|
||||
|
||||
|
||||
class ChromeBrowserExtractor(BaseExtractor):
|
||||
|
|
@ -106,10 +110,10 @@ class ChromeBrowserExtractor(BaseExtractor):
|
|||
return "Chrome Browser"
|
||||
|
||||
async def extract(self) -> list[ExtractedStream]:
|
||||
ws_url = _resolve_chrome_ws()
|
||||
if not ws_url:
|
||||
cdp_url = _resolve_chrome_cdp()
|
||||
if not cdp_url:
|
||||
logger.warning(
|
||||
"[chrome-browser] CHROME_WS_URL/TOKEN not set — extractor disabled"
|
||||
"[chrome-browser] CHROME_CDP_URL not set — extractor disabled"
|
||||
)
|
||||
return []
|
||||
|
||||
|
|
@ -123,9 +127,9 @@ class ChromeBrowserExtractor(BaseExtractor):
|
|||
# round. Contexts are cheap; the browser is shared.
|
||||
async with async_playwright() as p:
|
||||
try:
|
||||
browser = await p.chromium.connect(ws_url, timeout=15_000)
|
||||
browser = await p.chromium.connect_over_cdp(cdp_url, timeout=15_000)
|
||||
except Exception:
|
||||
logger.exception("[chrome-browser] connect to chrome-service failed")
|
||||
logger.exception("[chrome-browser] CDP connect to chrome-service failed")
|
||||
return []
|
||||
|
||||
results: list[ExtractedStream] = []
|
||||
|
|
|
|||
|
|
@ -336,14 +336,32 @@ class PlaybackVerifier:
|
|||
logger.error("playwright not installed — playback verification disabled")
|
||||
return None
|
||||
self._playwright = await async_playwright().start()
|
||||
ws_base = os.getenv("CHROME_WS_URL")
|
||||
ws_token = os.getenv("CHROME_WS_TOKEN")
|
||||
if ws_base and ws_token:
|
||||
self._browser = await self._playwright.chromium.connect(
|
||||
f"{ws_base.rstrip('/')}/{ws_token}", timeout=15_000,
|
||||
)
|
||||
logger.info("connected to remote chrome-service (concurrency=%d)", MAX_CONCURRENCY)
|
||||
else:
|
||||
# CHROME_CDP_URL points to chrome-service's CDP endpoint
|
||||
# (http://chrome-service.chrome-service.svc:9222 by default).
|
||||
# Migrated 2026-06-04 from `chromium.connect(ws_url)` because
|
||||
# chrome-service now runs chromium directly with persistent
|
||||
# user-data-dir for cookie warming — launch-server couldn't
|
||||
# persist. The CDP `Browser` exposes the persistent default
|
||||
# context via `browser.contexts[0]`; here we just call
|
||||
# `new_context()` for incognito-style isolation per verify
|
||||
# round, matching the previous behaviour.
|
||||
cdp_url = os.getenv("CHROME_CDP_URL")
|
||||
if cdp_url:
|
||||
try:
|
||||
self._browser = await self._playwright.chromium.connect_over_cdp(
|
||||
cdp_url, timeout=15_000,
|
||||
)
|
||||
logger.info("connected to remote chrome-service via CDP (concurrency=%d)", MAX_CONCURRENCY)
|
||||
except Exception:
|
||||
logger.exception(
|
||||
"CDP connect failed (%s) — falling back to in-process Chromium", cdp_url,
|
||||
)
|
||||
self._browser = None
|
||||
if self._browser is None:
|
||||
# Either CHROME_CDP_URL was unset, or CDP connect failed.
|
||||
# Fall back to in-process headless so the verifier still
|
||||
# returns playable/unplayable verdicts (degraded but
|
||||
# functional — anti-bot pages may bypass).
|
||||
self._browser = await self._playwright.chromium.launch(
|
||||
headless=True,
|
||||
args=[
|
||||
|
|
@ -355,7 +373,10 @@ class PlaybackVerifier:
|
|||
"--autoplay-policy=no-user-gesture-required",
|
||||
],
|
||||
)
|
||||
logger.warning("CHROME_WS_URL not set — using in-process Chromium (concurrency=%d)", MAX_CONCURRENCY)
|
||||
logger.warning(
|
||||
"using in-process Chromium (CHROME_CDP_URL unset or CDP connect failed) (concurrency=%d)",
|
||||
MAX_CONCURRENCY,
|
||||
)
|
||||
return self._browser
|
||||
|
||||
async def shutdown(self) -> None:
|
||||
|
|
|
|||
|
|
@ -148,18 +148,14 @@ resource "kubernetes_deployment" "f1-stream" {
|
|||
}
|
||||
# Verifier connects to in-cluster headed Chromium pool — see
|
||||
# stacks/chrome-service/. Falls back to in-process headless if unset.
|
||||
# 2026-06-04: migrated WS (:3000 / path-token) → CDP (:9222 /
|
||||
# NetworkPolicy-gated). Token is no longer needed for the
|
||||
# connection itself; the chrome-service-client-secrets ExternalSecret
|
||||
# below stays in place because the snapshot endpoint (dev-box only,
|
||||
# not used by f1-stream) reuses the same Vault key.
|
||||
env {
|
||||
name = "CHROME_WS_URL"
|
||||
value = "ws://chrome-service.chrome-service.svc.cluster.local:3000"
|
||||
}
|
||||
env {
|
||||
name = "CHROME_WS_TOKEN"
|
||||
value_from {
|
||||
secret_key_ref {
|
||||
name = "chrome-service-client-secrets"
|
||||
key = "api_bearer_token"
|
||||
}
|
||||
}
|
||||
name = "CHROME_CDP_URL"
|
||||
value = "http://chrome-service.chrome-service.svc.cluster.local:9222"
|
||||
}
|
||||
# The embed proxy (this pod's /embed?url=…) must be reachable from
|
||||
# the remote chrome-service pod. Default 127.0.0.1 only works for
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue