diff --git a/.claude/reference/service-catalog.md b/.claude/reference/service-catalog.md index 3d888e93..a3619212 100644 --- a/.claude/reference/service-catalog.md +++ b/.claude/reference/service-catalog.md @@ -45,7 +45,8 @@ | nextcloud | File sync/share | nextcloud | | calibre | E-book management (may be merged into ebooks stack) | calibre | | onlyoffice | Document editing | onlyoffice | -| f1-stream | F1 streaming | f1-stream | +| f1-stream | F1 streaming (uses chrome-service for hmembeds verifier) | f1-stream | +| chrome-service | Headed Chromium WebSocket pool (`ws://chrome-service.chrome-service.svc:3000/`) for sibling services driving anti-bot embeds | chrome-service | | rybbit | Analytics | rybbit | | isponsorblocktv | SponsorBlock for TV | isponsorblocktv | | actualbudget | Budgeting (factory pattern) | actualbudget | diff --git a/docs/architecture/chrome-service.md b/docs/architecture/chrome-service.md new file mode 100644 index 00000000..74e100cf --- /dev/null +++ b/docs/architecture/chrome-service.md @@ -0,0 +1,126 @@ +# chrome-service — In-cluster headed Chromium pool + +## Overview + +`chrome-service` is a single-replica, persistent-profile, bearer-token-gated +Playwright **launch-server** that exposes a headed Chromium browser over a +WebSocket. Sibling services connect to it instead of running their own +in-process Chromium when the upstream's anti-bot tooling +(`disable-devtool.js` redirect-to-google trap, console-clear timing tricks, +`navigator.webdriver` checks) defeats a headless browser. + +Initial caller: `f1-stream`'s `playback_verifier`. Future callers attach +via the WS+token contract documented in `stacks/chrome-service/README.md`. + +## Why a separate stack + +In-process Chromium inside `f1-stream`: + +- Runs **headless** by default (no `Xvfb`/`DISPLAY`). +- Has the `HeadlessChromium/...` UA suffix and `navigator.webdriver === true`. +- Trips `disable-devtool.js`'s **Performance** detector — Playwright's CDP + adds latency to `console.log(largeArray)` vs `console.table(largeArray)`, + which the lib reads as "DevTools is open" and redirects to + `https://www.google.com/`. + +`chrome-service` solves this by: + +1. Running **headed** under `Xvfb :99` (via `playwright launch-server` with + a JSON config that pins `headless: false`). +2. Living in a long-lived pod so JIT browser launch latency disappears. +3. Allowing a per-context init script + (`stacks/chrome-service/files/stealth.js` ~ 40 lines, vendored from + `puppeteer-extra-plugin-stealth`) to spoof `webdriver`, `chrome.runtime`, + `plugins`, `languages`, `Permissions.query`, WebGL renderer strings, and + to hide the `disable-devtool-auto` script-tag attribute so the lib's + IIFE exits early. + +## Wire protocol + +```text + ws://chrome-service.chrome-service.svc.cluster.local:3000/ + │ + ┌───────────────────────────────┼───────────────────────────────┐ + │ caller pod │ chrome-service pod + │ (e.g. f1-stream) │ (single replica) + │ │ + │ CHROME_WS_URL ──────────────┘ + │ CHROME_WS_TOKEN ─── from `secret/chrome-service.api_bearer_token` (ESO) + │ + │ await chromium.connect(f"{ws}/{token}") + │ await ctx.add_init_script(STEALTH_JS) + │ page.goto("https://upstream.com/embed/...") + │ + └─── ←── pages render under Xvfb, headed Chromium ──── ─────────┘ +``` + +## Image pin + +Both the server image (`mcr.microsoft.com/playwright:v1.48.0-noble` in +`stacks/chrome-service/main.tf`) and the Python client +(`playwright==1.48.0` in callers' `requirements.txt`) **must match +minor-versions**. Bump in lockstep — Playwright protocol changes between +minors and the client cannot connect to a mismatched server. + +The Microsoft image ships only the browser binaries, not the `playwright` +npm SDK; the start command runs `npx -y playwright@1.48.0 launch-server` +which downloads the SDK on first start (cached under `$HOME/.npm` via the +PVC) and reuses it on subsequent restarts. + +## Storage + +- **`chrome-service-profile-encrypted`** (PVC, 2Gi → 10Gi autoresize, + `proxmox-lvm-encrypted`) — Chromium user-data dir + npm cache. + Encrypted because cookies/localStorage may include third-party auth tokens + for sites callers drive. `HOME=/profile` so npx caches there. +- **`chrome-service-backup-host`** (NFS, RWX) — destination for a 6-hourly + CronJob that `tar -czf /backup/.tar.gz -C /profile .`, + retention 30 days. + +## Auth + secrets + +- Vault KV `secret/chrome-service.api_bearer_token` — 32-byte URL-safe + random, rotated by hand: + `vault kv put secret/chrome-service api_bearer_token=$(python3 -c 'import secrets; print(secrets.token_urlsafe(32))')`. +- ESO syncs into namespace-local Secret `chrome-service-secrets` + (server pod) and `chrome-service-client-secrets` (each caller pod). +- Reloader (`reloader.stakater.com/auto = "true"`) cascades token rotation + to both server and any annotated caller — no manual rollout. + +## Network controls + +- **`kubernetes_network_policy_v1.ws_ingress`** — only namespaces labelled + `chrome-service.viktorbarzin.me/client = "true"` (plus an explicit + fallback for `f1-stream` by `kubernetes.io/metadata.name`) can reach + TCP/3000. +- **WS port 3000** is internal-only (no ingress, no Cloudflare DNS). +- **HTTP port 80** (sidecar `nginxinc/nginx-unprivileged:alpine`) serves + a static health stub at `chrome.viktorbarzin.me`, Authentik-gated. + Lets a human confirm pod liveness without spinning a browser. + +## Adding a new caller + +See `stacks/chrome-service/README.md` for the four-step recipe: + +1. Label the caller's namespace. +2. Add an `ExternalSecret` pulling `secret/chrome-service`. +3. Inject `CHROME_WS_URL` + `CHROME_WS_TOKEN` env vars. +4. Vendor `stealth.js` and apply via `await context.add_init_script(...)` + after every `new_context()`. + +## Limits + risks + +- **Anti-bot vs stealth arms race** — when an upstream beats us (DRM + license check, device-fingerprint mismatch, hotlink protection that + whitelists specific parent domains), the verifier returns + `is_playable=False` and the extractor moves on. No user-visible + breakage, just empty stream lists for that source. +- **JWPlayer DRM error 102630** — observed with several hmembeds embeds + even from the headed chrome-service. The license check bails because + the request origin isn't on the embed's allowlist; this is upstream + policy, not an infra defect. +- **Single replica + RWO PVC** — the deployment uses `Recreate` strategy. + Brief outage on rollout, ~30s for browser warmup. +- **No `/metrics` endpoint** — the cluster's generic + `KubePodCrashLooping` rule covers basic alerting. A Prometheus scrape + exporter is day-2 work. diff --git a/stacks/chrome-service/README.md b/stacks/chrome-service/README.md new file mode 100644 index 00000000..b18e9116 --- /dev/null +++ b/stacks/chrome-service/README.md @@ -0,0 +1,90 @@ +# chrome-service + +In-cluster headed Chromium exposed over Playwright's WebSocket protocol. +Sibling services drive it instead of running their own in-process browser +— useful when the upstream tries to detect headless mode (e.g. hmembeds' +`disable-devtool.js` redirect-to-google trap). + +## Connect + +```python +from playwright.async_api import async_playwright + +WS_URL = "ws://chrome-service.chrome-service.svc.cluster.local:3000" +WS_TOKEN = os.environ["CHROME_WS_TOKEN"] # 32-byte URL-safe random + +async with async_playwright() as p: + browser = await p.chromium.connect(f"{WS_URL}/{WS_TOKEN}", timeout=15_000) + context = await browser.new_context() + await context.add_init_script(STEALTH_JS) # see files/stealth.js + page = await context.new_page() + ... + await browser.close() +``` + +The token comes from Vault KV `secret/chrome-service.api_bearer_token`, +which ESO syncs into a per-namespace K8s Secret in each caller stack +(see f1-stream's `chrome-service-client-secrets`). + +## Add a new caller + +1. **Label the caller's namespace** so the chrome-service NetworkPolicy + admits it: + ```hcl + resource "kubernetes_namespace" "" { + metadata { + labels = { + "chrome-service.viktorbarzin.me/client" = "true" + } + } + } + ``` +2. **Add an ExternalSecret** in the caller stack pulling the token: + ```hcl + resource "kubernetes_manifest" "chrome_token" { + manifest = { + apiVersion = "external-secrets.io/v1beta1" + kind = "ExternalSecret" + metadata = { name = "chrome-service-client-secrets", namespace = "" } + spec = { + refreshInterval = "15m" + secretStoreRef = { name = "vault-kv", kind = "ClusterSecretStore" } + target = { name = "chrome-service-client-secrets" } + dataFrom = [{ extract = { key = "chrome-service" } }] + } + } + } + ``` +3. **Inject `CHROME_WS_URL` + `CHROME_WS_TOKEN`** into the caller's pod env. + Use `secret_key_ref` for the token; the URL is a plain value. +4. **Vendor `stealth.js`** into the caller (or just paste — it's ~40 lines) + and apply via `await context.add_init_script(STEALTH_JS)` after every + `new_context()`. Without it, hmembeds-class anti-bot still trips. + +## Image pin + +Both the server image (`mcr.microsoft.com/playwright:v1.48.0-noble` in +`main.tf`) and the client (`playwright==1.48.0` in callers' requirements) +must match minor-versions. Bump in lockstep — Playwright protocol changes +between minors. + +## Operations + +- **Storage**: encrypted PVC at `/profile` for cookies + npm cache. Ephemeral + contexts (`browser.new_context()`) bypass the profile; persistent contexts + share it. Backed up tar+gzip every 6h to `/srv/nfs/chrome-service-backup/`, + 30-day retention. +- **Probes**: TCP/3000. Playwright run-server has no HTTP `/health`; a TCP + open is the only liveness signal available without spinning a browser. +- **Health page**: visit `https://chrome.viktorbarzin.me` (Authentik-gated) + to confirm the pod is up. The WS port stays internal-only. +- **Token rotation**: `vault kv put secret/chrome-service api_bearer_token=$(python3 -c 'import secrets; print(secrets.token_urlsafe(32))')`. + Reloader cascades the rotation to both the server pod and any caller + whose secret has the `reloader.stakater.com/auto = "true"` annotation. + +## Why headed (Xvfb) instead of headless? + +`disable-devtool.js` and similar libraries detect `navigator.webdriver`, +console-clear timing, and the `HeadlessChromium/...` user-agent suffix. +Running headed inside `Xvfb :99` reports as a normal Chromium, and the +stealth init script handles the JS-visible giveaways. diff --git a/stacks/chrome-service/files/stealth.js b/stacks/chrome-service/files/stealth.js new file mode 100644 index 00000000..dfae98a8 --- /dev/null +++ b/stacks/chrome-service/files/stealth.js @@ -0,0 +1,54 @@ +// Minimal stealth init script for Playwright-driven Chromium. +// Vendored from puppeteer-extra-plugin-stealth/evasions/* (MIT) — covers: +// webdriver, chrome.runtime, navigator.plugins, navigator.languages, +// Permissions.query, WebGL getParameter (vendor + renderer spoof). +// Run via context.add_init_script() so it executes before any page script. +(() => { + // navigator.webdriver — most common detection, removed entirely. + Object.defineProperty(Navigator.prototype, 'webdriver', { get: () => undefined }); + + // window.chrome.runtime — many sites check that real Chrome exposes this. + if (!window.chrome) window.chrome = {}; + window.chrome.runtime = window.chrome.runtime || {}; + + // navigator.plugins — headless reports zero; spoof a plausible PDF viewer. + Object.defineProperty(navigator, 'plugins', { + get: () => [{ name: 'Chrome PDF Plugin' }, { name: 'Chrome PDF Viewer' }, { name: 'Native Client' }], + }); + + // navigator.languages — headless returns empty array. + Object.defineProperty(navigator, 'languages', { get: () => ['en-US', 'en'] }); + + // Permissions.query — headless returns 'denied' for notifications instead of 'default'. + const origQuery = window.navigator.permissions && window.navigator.permissions.query; + if (origQuery) { + window.navigator.permissions.query = (parameters) => + parameters && parameters.name === 'notifications' + ? Promise.resolve({ state: Notification.permission }) + : origQuery(parameters); + } + + // WebGL getParameter — spoof vendor + renderer strings to a real GPU. + const spoofGl = (proto) => { + if (!proto) return; + const orig = proto.getParameter; + proto.getParameter = function (parameter) { + if (parameter === 37445) return 'Intel Inc.'; // UNMASKED_VENDOR_WEBGL + if (parameter === 37446) return 'Intel Iris OpenGL Engine'; // UNMASKED_RENDERER_WEBGL + return orig.apply(this, arguments); + }; + }; + spoofGl(window.WebGLRenderingContext && window.WebGLRenderingContext.prototype); + spoofGl(window.WebGL2RenderingContext && window.WebGL2RenderingContext.prototype); + + // disable-devtool.js (theajack/disable-devtool) auto-inits via a script + // tag with `disable-devtool-auto`. Its Performance detector trips under + // Playwright (CDP adds console.log latency vs console.table) and the + // redirect URL is hard-coded — for hmembeds that's google.com. + // Hide the auto-init marker so the library's IIFE exits early. + const origQS = Document.prototype.querySelector; + Document.prototype.querySelector = function (sel) { + if (typeof sel === 'string' && sel.indexOf('disable-devtool-auto') !== -1) return null; + return origQS.apply(this, arguments); + }; +})(); diff --git a/stacks/chrome-service/main.tf b/stacks/chrome-service/main.tf new file mode 100644 index 00000000..0b2d8b68 --- /dev/null +++ b/stacks/chrome-service/main.tf @@ -0,0 +1,502 @@ +variable "tls_secret_name" { + type = string + sensitive = true +} +variable "nfs_server" { type = string } + +locals { + namespace = "chrome-service" + labels = { + app = "chrome-service" + } + # Pin to the same Playwright minor that the Python client requires. + # If you bump this image, also bump `playwright==X.Y.Z` in the client + # (currently f1-stream) and re-run the connect smoke test. + image = "mcr.microsoft.com/playwright:v1.48.0-noble" +} + +# --- Namespace --- + +resource "kubernetes_namespace" "chrome_service" { + metadata { + name = local.namespace + labels = { + "istio-injection" = "disabled" + tier = local.tiers.aux + "chrome-service.viktorbarzin.me/server" = "true" + } + } + lifecycle { + # KYVERNO_LIFECYCLE_V1: goldilocks-vpa-auto-mode ClusterPolicy stamps this label on every namespace + ignore_changes = [metadata[0].labels["goldilocks.fairwinds.com/vpa-update-mode"]] + } +} + +# --- Secrets (single-key extract: api_bearer_token) --- + +resource "kubernetes_manifest" "external_secret" { + manifest = { + apiVersion = "external-secrets.io/v1beta1" + kind = "ExternalSecret" + metadata = { + name = "chrome-service-secrets" + namespace = local.namespace + } + spec = { + refreshInterval = "15m" + secretStoreRef = { + name = "vault-kv" + kind = "ClusterSecretStore" + } + target = { + name = "chrome-service-secrets" + } + dataFrom = [{ + extract = { + key = "chrome-service" + } + }] + } + } + depends_on = [kubernetes_namespace.chrome_service] +} + +# tls-secret for the chrome.viktorbarzin.me ingress is auto-cloned into +# every namespace by Kyverno's `sync-tls-secret` ClusterPolicy — no local +# module call needed. + +# --- Encrypted profile PVC --- +# Holds Chromium user data: cookies, localStorage, IndexedDB. Sites we +# drive may set auth tokens or session cookies — encrypted is correct. +resource "kubernetes_persistent_volume_claim" "profile_encrypted" { + wait_until_bound = false + metadata { + name = "chrome-service-profile-encrypted" + namespace = kubernetes_namespace.chrome_service.metadata[0].name + annotations = { + "resize.topolvm.io/threshold" = "80%" + "resize.topolvm.io/increase" = "100%" + "resize.topolvm.io/storage_limit" = "10Gi" + } + } + spec { + access_modes = ["ReadWriteOnce"] + storage_class_name = "proxmox-lvm-encrypted" + resources { + requests = { + storage = "2Gi" + } + } + } +} + +# --- NFS backup target --- +module "nfs_chrome_service_backup_host" { + source = "../../modules/kubernetes/nfs_volume" + name = "chrome-service-backup-host" + namespace = kubernetes_namespace.chrome_service.metadata[0].name + nfs_server = "192.168.1.127" + nfs_path = "/srv/nfs/chrome-service-backup" +} + +# --- Deployment --- + +resource "kubernetes_deployment" "chrome_service" { + metadata { + name = "chrome-service" + namespace = kubernetes_namespace.chrome_service.metadata[0].name + labels = merge(local.labels, { + tier = local.tiers.aux + }) + annotations = { + "reloader.stakater.com/auto" = "true" + } + } + spec { + replicas = 1 + strategy { + type = "Recreate" + } + selector { + match_labels = local.labels + } + template { + metadata { + labels = local.labels + } + spec { + security_context { + run_as_user = 1000 + run_as_group = 1000 + fs_group = 1000 + seccomp_profile { + type = "RuntimeDefault" + } + } + + # Fix profile dir ownership (PVC may have root-owned files from prior run). + init_container { + name = "fix-perms" + image = "busybox:1.37" + command = ["sh", "-c", "chown -R 1000:1000 /profile"] + security_context { + run_as_user = 0 + } + volume_mount { + name = "profile" + mount_path = "/profile" + } + resources { + requests = { memory = "32Mi" } + limits = { memory = "64Mi" } + } + } + + container { + name = "chrome-service" + image = local.image + image_pull_policy = "IfNotPresent" + + # `launch-server` (not `run-server`) lets us pin headed mode + + # specific args. `run-server` defaults to headless, which the + # disable-devtool.js Performance detector trips under Playwright + # (CDP adds latency to console.log; lib detects + redirects). + # The Microsoft image ships only the browsers, not the playwright + # npm package itself — `npx -y playwright@` downloads it on + # first start (cached under $HOME/.npm via the PVC) and pins to + # the same minor as the Python client. Bump in lockstep. + command = ["bash", "-c"] + args = [ + <<-EOT + set -e + Xvfb :99 -screen 0 1280x720x24 & + sleep 1 + cat > /tmp/launch.json <chrome-service +

chrome-service

+

Headless-Chromium-as-a-service is running.

+

Connect via Playwright: chromium.connect("ws://chrome-service.chrome-service.svc.cluster.local:3000/<TOKEN>")

+ EOT + } +} + +# --- Services --- +# WS endpoint (internal only, gated by NetworkPolicy + token). +resource "kubernetes_service" "chrome_service" { + metadata { + name = "chrome-service" + namespace = kubernetes_namespace.chrome_service.metadata[0].name + labels = local.labels + } + + spec { + selector = local.labels + port { + name = "ws" + port = 3000 + target_port = 3000 + protocol = "TCP" + } + } +} + +# Health page (Authentik-gated, exposed via ingress). +resource "kubernetes_service" "chrome_health" { + metadata { + name = "chrome" + namespace = kubernetes_namespace.chrome_service.metadata[0].name + labels = local.labels + } + + spec { + selector = local.labels + port { + name = "http" + port = 80 + target_port = 8080 + protocol = "TCP" + } + } +} + +module "ingress" { + source = "../../modules/kubernetes/ingress_factory" + dns_type = "proxied" + namespace = kubernetes_namespace.chrome_service.metadata[0].name + name = "chrome" + tls_secret_name = var.tls_secret_name + protected = true + extra_annotations = { + "gethomepage.dev/enabled" = "true" + "gethomepage.dev/name" = "Chrome Service" + "gethomepage.dev/description" = "Headed Chromium WebSocket pool" + "gethomepage.dev/icon" = "chromium.png" + "gethomepage.dev/group" = "Infrastructure" + } +} + +# --- NetworkPolicy: TCP/3000 ingress only from labelled client namespaces. +# The cluster has no default-deny, so this NP only takes effect inside +# chrome-service ns — pods elsewhere remain unaffected. Callers opt in by +# labelling their namespace `chrome-service.viktorbarzin.me/client = "true"`. +resource "kubernetes_network_policy_v1" "ws_ingress" { + metadata { + name = "chrome-service-ws-ingress" + namespace = kubernetes_namespace.chrome_service.metadata[0].name + } + spec { + pod_selector { + match_labels = local.labels + } + policy_types = ["Ingress"] + ingress { + from { + namespace_selector { + match_labels = { + "chrome-service.viktorbarzin.me/client" = "true" + } + } + } + # Explicit fallback list — admit f1-stream by name in case the label + # is removed by accident. Keep this in sync with the labels above. + from { + namespace_selector { + match_labels = { + "kubernetes.io/metadata.name" = "f1-stream" + } + } + } + ports { + port = "3000" + protocol = "TCP" + } + } + } +} + +# --- Backup CronJob: tar+gzip the profile every 6h, 30-day retention. --- +resource "kubernetes_cron_job_v1" "chrome_service_backup" { + metadata { + name = "chrome-service-backup" + namespace = kubernetes_namespace.chrome_service.metadata[0].name + } + spec { + concurrency_policy = "Replace" + failed_jobs_history_limit = 3 + successful_jobs_history_limit = 1 + schedule = "47 */6 * * *" + starting_deadline_seconds = 60 + job_template { + metadata {} + spec { + backoff_limit = 2 + ttl_seconds_after_finished = 300 + template { + metadata {} + spec { + # PVC is RWO — colocate the backup pod with the chrome-service + # pod so both can mount the volume on the same node. + affinity { + pod_affinity { + required_during_scheduling_ignored_during_execution { + label_selector { + match_labels = local.labels + } + topology_key = "kubernetes.io/hostname" + } + } + } + container { + name = "backup" + image = "docker.io/library/alpine:3.20" + command = ["/bin/sh", "-c", <<-EOT + set -euxo pipefail + ts=$(date +"%Y_%m_%d_%H") + tar -czf /backup/$${ts}.tar.gz -C /profile . + find /backup -maxdepth 1 -type f -name '*.tar.gz' -mtime +30 -delete + echo "Backup complete: $${ts}.tar.gz" + EOT + ] + volume_mount { + name = "profile" + mount_path = "/profile" + read_only = true + } + volume_mount { + name = "backup" + mount_path = "/backup" + } + resources { + requests = { cpu = "10m", memory = "32Mi" } + limits = { memory = "64Mi" } + } + } + volume { + name = "profile" + persistent_volume_claim { + claim_name = kubernetes_persistent_volume_claim.profile_encrypted.metadata[0].name + } + } + volume { + name = "backup" + persistent_volume_claim { + claim_name = module.nfs_chrome_service_backup_host.claim_name + } + } + restart_policy = "OnFailure" + } + } + } + } + } + lifecycle { + # KYVERNO_LIFECYCLE_V1: Kyverno admission webhook mutates dns_config with ndots=2 + ignore_changes = [spec[0].job_template[0].spec[0].template[0].spec[0].dns_config] + } +} diff --git a/stacks/chrome-service/terragrunt.hcl b/stacks/chrome-service/terragrunt.hcl new file mode 100644 index 00000000..0d1c8e53 --- /dev/null +++ b/stacks/chrome-service/terragrunt.hcl @@ -0,0 +1,8 @@ +include "root" { + path = find_in_parent_folders() +} + +dependency "platform" { + config_path = "../platform" + skip_outputs = true +} diff --git a/stacks/f1-stream/files/backend/playback_verifier.py b/stacks/f1-stream/files/backend/playback_verifier.py index b870173e..bb239f70 100644 --- a/stacks/f1-stream/files/backend/playback_verifier.py +++ b/stacks/f1-stream/files/backend/playback_verifier.py @@ -336,18 +336,26 @@ class PlaybackVerifier: logger.error("playwright not installed — playback verification disabled") return None self._playwright = await async_playwright().start() - self._browser = await self._playwright.chromium.launch( - headless=True, - args=[ - "--disable-dev-shm-usage", - "--disable-web-security", - "--no-sandbox", - "--disable-setuid-sandbox", - "--disable-features=IsolateOrigins,site-per-process", - "--autoplay-policy=no-user-gesture-required", - ], - ) - logger.info("Playwright browser launched (concurrency=%d)", MAX_CONCURRENCY) + ws_base = os.getenv("CHROME_WS_URL") + ws_token = os.getenv("CHROME_WS_TOKEN") + if ws_base and ws_token: + self._browser = await self._playwright.chromium.connect( + f"{ws_base.rstrip('/')}/{ws_token}", timeout=15_000, + ) + logger.info("connected to remote chrome-service (concurrency=%d)", MAX_CONCURRENCY) + else: + self._browser = await self._playwright.chromium.launch( + headless=True, + args=[ + "--disable-dev-shm-usage", + "--disable-web-security", + "--no-sandbox", + "--disable-setuid-sandbox", + "--disable-features=IsolateOrigins,site-per-process", + "--autoplay-policy=no-user-gesture-required", + ], + ) + logger.warning("CHROME_WS_URL not set — using in-process Chromium (concurrency=%d)", MAX_CONCURRENCY) return self._browser async def shutdown(self) -> None: @@ -387,6 +395,8 @@ class PlaybackVerifier: viewport={"width": 1280, "height": 720}, bypass_csp=True, ) + from backend.stealth import STEALTH_JS + await context.add_init_script(STEALTH_JS) page = await context.new_page() except Exception as e: return PlaybackVerdict( diff --git a/stacks/f1-stream/files/backend/stealth.py b/stacks/f1-stream/files/backend/stealth.py new file mode 100644 index 00000000..a5dfb5b7 --- /dev/null +++ b/stacks/f1-stream/files/backend/stealth.py @@ -0,0 +1,43 @@ +"""Vendored Playwright stealth init script. + +Mirror of `stacks/chrome-service/files/stealth.js`. Kept in sync by hand +— update both files together if the JS is changed. +""" + +STEALTH_JS = r""" +(() => { + Object.defineProperty(Navigator.prototype, 'webdriver', { get: () => undefined }); + if (!window.chrome) window.chrome = {}; + window.chrome.runtime = window.chrome.runtime || {}; + Object.defineProperty(navigator, 'plugins', { + get: () => [{ name: 'Chrome PDF Plugin' }, { name: 'Chrome PDF Viewer' }, { name: 'Native Client' }], + }); + Object.defineProperty(navigator, 'languages', { get: () => ['en-US', 'en'] }); + const origQuery = window.navigator.permissions && window.navigator.permissions.query; + if (origQuery) { + window.navigator.permissions.query = (parameters) => + parameters && parameters.name === 'notifications' + ? Promise.resolve({ state: Notification.permission }) + : origQuery(parameters); + } + const spoofGl = (proto) => { + if (!proto) return; + const orig = proto.getParameter; + proto.getParameter = function (parameter) { + if (parameter === 37445) return 'Intel Inc.'; + if (parameter === 37446) return 'Intel Iris OpenGL Engine'; + return orig.apply(this, arguments); + }; + }; + spoofGl(window.WebGLRenderingContext && window.WebGLRenderingContext.prototype); + spoofGl(window.WebGL2RenderingContext && window.WebGL2RenderingContext.prototype); + // disable-devtool.js auto-init evasion: hide the marker attribute so the + // library's IIFE exits early. Without this, hmembeds-class players redirect + // to google.com when the Performance detector trips under Playwright. + const origQS = Document.prototype.querySelector; + Document.prototype.querySelector = function (sel) { + if (typeof sel === 'string' && sel.indexOf('disable-devtool-auto') !== -1) return null; + return origQS.apply(this, arguments); + }; +})(); +""" diff --git a/stacks/f1-stream/main.tf b/stacks/f1-stream/main.tf index 13f463a6..cfa1cd60 100644 --- a/stacks/f1-stream/main.tf +++ b/stacks/f1-stream/main.tf @@ -11,7 +11,8 @@ resource "kubernetes_namespace" "f1-stream" { name = "f1-stream" labels = { "istio-injection" : "disabled" - tier = local.tiers.aux + tier = local.tiers.aux + "chrome-service.viktorbarzin.me/client" = "true" } } lifecycle { @@ -47,6 +48,35 @@ resource "kubernetes_manifest" "external_secret" { depends_on = [kubernetes_namespace.f1-stream] } +# Pull the chrome-service bearer token into this namespace as a separate +# Secret so the verifier can reach the in-cluster Playwright pool. +resource "kubernetes_manifest" "chrome_service_client_secret" { + manifest = { + apiVersion = "external-secrets.io/v1beta1" + kind = "ExternalSecret" + metadata = { + name = "chrome-service-client-secrets" + namespace = "f1-stream" + } + spec = { + refreshInterval = "15m" + secretStoreRef = { + name = "vault-kv" + kind = "ClusterSecretStore" + } + target = { + name = "chrome-service-client-secrets" + } + dataFrom = [{ + extract = { + key = "chrome-service" + } + }] + } + } + depends_on = [kubernetes_namespace.f1-stream] +} + resource "kubernetes_persistent_volume_claim" "data_proxmox" { wait_until_bound = false metadata { @@ -127,6 +157,29 @@ resource "kubernetes_deployment" "f1-stream" { name = "DISCORD_CHANNELS" value = var.discord_f1_channel_ids } + # Verifier connects to in-cluster headed Chromium pool — see + # stacks/chrome-service/. Falls back to in-process headless if unset. + env { + name = "CHROME_WS_URL" + value = "ws://chrome-service.chrome-service.svc.cluster.local:3000" + } + env { + name = "CHROME_WS_TOKEN" + value_from { + secret_key_ref { + name = "chrome-service-client-secrets" + key = "api_bearer_token" + } + } + } + # The embed proxy (this pod's /embed?url=…) must be reachable from + # the remote chrome-service pod. Default 127.0.0.1 only works for + # in-process Chromium — for the remote browser we point it at our + # own ClusterIP service. + env { + name = "PLAYBACK_VERIFY_PROXY_BASE" + value = "http://f1.f1-stream.svc.cluster.local" + } volume_mount { name = "data" mount_path = "/data"