infra/stacks/chrome-service/main.tf
Viktor Barzin deede6dd11 chrome-service: switch to CDP + persistent profile + hourly snapshot pipeline
The chrome-service stack ran `playwright launch-server`, which creates
ephemeral browser contexts per `connect()`. Despite the encrypted PVC
mounted at /profile, no chromium user-data ever persisted — only npm
cache + fontconfig. Logging in via noVNC was effectively a no-op.

Refactor:
- Replace launch-server with direct chromium (TCP CDP on :9223 internal),
  fronted by a Python HTTP+WS bridge on :9222 that rewrites the Host
  header to bypass Chrome's hardcoded DNS-rebinding protection (no
  `--remote-allow-hosts` flag exists in stock Chrome 130; verified by
  binary string grep). Bridge also forces Connection: close on HTTP
  responses so Node ws opens a fresh TCP for the WS upgrade rather than
  trying to reuse the dead keep-alive socket.
- Add `--user-data-dir=/profile/chromium-data` so cookies/localStorage
  actually persist on the encrypted PVC.
- New snapshot-server sidecar (stdlib python HTTP) serves
  GET /api/snapshot at chrome.viktorbarzin.me/api/snapshot,
  bearer-token-gated by the existing api_bearer_token.
- New chrome-service-snapshot-harvester CronJob (hourly) connects via
  CDP, dumps storage_state() (cookies + localStorage), writes atomically
  to /profile/snapshots/storage-state.json.
- NetworkPolicy: TCP/9222 (was :3000), TCP/8088 added for traefik.

Caller migration:
- f1-stream: `chromium.connect(ws_url)` → `chromium.connect_over_cdp(cdp_url)`,
  env var CHROME_WS_URL → CHROME_CDP_URL. CHROME_WS_TOKEN dropped (no
  longer used by code; ExternalSecret kept for symmetry with the snapshot
  endpoint).

Dev-box side (out of scope for this commit — see ~/.config/systemd/user/):
- playwright-mcp.service flips to `--isolated --storage-state=...`
  so per-Claude-Code-session ephemeral contexts seed from the snapshot.
- playwright-snapshot-refresh.{service,timer} (hourly) pulls the
  snapshot via the bearer-gated HTTPS endpoint.

Docs updated:
- docs/architecture/chrome-service.md — new architecture diagram + wire protocol.
- docs/runbooks/chrome-service-snapshot.md — day-2 ops (refresh, rotation,
  failure modes, restore).
- stacks/chrome-service/README.md — connect_over_cdp recipe.

Design spec at docs/superpowers/specs/2026-06-04-playwright-per-session-browser-design.md.
2026-06-05 09:19:10 +00:00

833 lines
28 KiB
HCL

variable "tls_secret_name" {
type = string
sensitive = true
}
variable "nfs_server" { type = string }
locals {
namespace = "chrome-service"
labels = {
app = "chrome-service"
}
# Pin to the same Playwright minor that the Python client requires.
# If you bump this image, also bump `playwright==X.Y.Z` in callers'
# requirements (currently f1-stream, snapshot-harvester) and re-run the
# connect smoke test. Image ships chromium under /ms-playwright/.
image = "mcr.microsoft.com/playwright:v1.48.0-noble"
# Python image for the snapshot-harvester CronJob and the snapshot-server
# sidecar (the latter just runs a 60-line stdlib HTTP server).
python_image = "mcr.microsoft.com/playwright/python:v1.48.0-noble"
snapshot_dir = "/profile/snapshots"
}
# --- Namespace ---
resource "kubernetes_namespace" "chrome_service" {
metadata {
name = local.namespace
labels = {
"istio-injection" = "disabled"
tier = local.tiers.aux
"chrome-service.viktorbarzin.me/server" = "true"
"keel.sh/enrolled" = "true"
}
}
lifecycle {
# KYVERNO_LIFECYCLE_V1: goldilocks-vpa-auto-mode ClusterPolicy stamps this label on every namespace
ignore_changes = [metadata[0].labels["goldilocks.fairwinds.com/vpa-update-mode"]]
}
}
# --- Secrets (single-key extract: api_bearer_token) ---
resource "kubernetes_manifest" "external_secret" {
manifest = {
apiVersion = "external-secrets.io/v1beta1"
kind = "ExternalSecret"
metadata = {
name = "chrome-service-secrets"
namespace = local.namespace
}
spec = {
refreshInterval = "15m"
secretStoreRef = {
name = "vault-kv"
kind = "ClusterSecretStore"
}
target = {
name = "chrome-service-secrets"
}
dataFrom = [{
extract = {
key = "chrome-service"
}
}]
}
}
depends_on = [kubernetes_namespace.chrome_service]
}
# tls-secret for the chrome.viktorbarzin.me ingress is auto-cloned into
# every namespace by Kyverno's `sync-tls-secret` ClusterPolicy — no local
# module call needed.
# --- Encrypted profile PVC ---
# Holds Chromium user data: cookies, localStorage, IndexedDB. Sites we
# drive may set auth tokens or session cookies — encrypted is correct.
resource "kubernetes_persistent_volume_claim" "profile_encrypted" {
wait_until_bound = false
metadata {
name = "chrome-service-profile-encrypted"
namespace = kubernetes_namespace.chrome_service.metadata[0].name
annotations = {
"resize.topolvm.io/threshold" = "10%"
"resize.topolvm.io/increase" = "100%"
"resize.topolvm.io/storage_limit" = "10Gi"
}
}
spec {
access_modes = ["ReadWriteOnce"]
storage_class_name = "proxmox-lvm-encrypted"
resources {
requests = {
storage = "2Gi"
}
}
}
lifecycle {
# The autoresizer expands requests.storage up to storage_limit and
# PVCs can't shrink. Without this, every TF apply tries to revert
# to the spec value, K8s rejects the shrink, and the PVC ends up
# in Terminating-but-in-use limbo.
ignore_changes = [spec[0].resources[0].requests]
}
}
# --- NFS backup target ---
module "nfs_chrome_service_backup_host" {
source = "../../modules/kubernetes/nfs_volume"
name = "chrome-service-backup-host"
namespace = kubernetes_namespace.chrome_service.metadata[0].name
nfs_server = "192.168.1.127"
nfs_path = "/srv/nfs/chrome-service-backup"
}
# --- Deployment ---
resource "kubernetes_deployment" "chrome_service" {
metadata {
name = "chrome-service"
namespace = kubernetes_namespace.chrome_service.metadata[0].name
labels = merge(local.labels, {
tier = local.tiers.aux
# Deliberate pin: chrome-service's playwright image MUST match
# the playwright Python version in f1-stream (see local.image
# comment above). Opt out of Keel auto-update via this label —
# the inject-keel-annotations ClusterPolicy excludes workloads
# selector-matching keel.sh/policy=never.
"keel.sh/policy" = "never"
})
annotations = {
"reloader.stakater.com/auto" = "true"
}
}
spec {
replicas = 1
strategy {
type = "Recreate"
}
selector {
match_labels = local.labels
}
template {
metadata {
labels = local.labels
}
spec {
# The noVNC sidecar pulls from registry.viktorbarzin.me which needs
# auth. Kyverno's `sync-registry-credentials` ClusterPolicy syncs
# the secret into every namespace.
image_pull_secrets {
name = "registry-credentials"
}
security_context {
run_as_user = 1000
run_as_group = 1000
fs_group = 1000
seccomp_profile {
type = "RuntimeDefault"
}
}
# Fix profile dir ownership (PVC may have root-owned files from prior run).
init_container {
name = "fix-perms"
image = "busybox:1.37"
command = ["sh", "-c", "chown -R 1000:1000 /profile"]
security_context {
run_as_user = 0
}
volume_mount {
name = "profile"
mount_path = "/profile"
}
resources {
requests = { memory = "32Mi" }
limits = { memory = "64Mi" }
}
}
container {
name = "chrome-service"
image = local.image
image_pull_policy = "IfNotPresent"
# Direct chromium launch (NOT `playwright launch-server`). Reason:
# launch-server creates ephemeral browser contexts per `connect()`
# call, so cookies/localStorage never persist to the PVC — the
# `/profile` mount only ever held npm cache + fontconfig.
# Replaced 2026-06-04 with a CDP+persistent-profile model so the
# warm browser (where Viktor logs in via noVNC) keeps cookies, and
# the hourly snapshot-harvester CronJob can dump them via the
# CDP endpoint. Callers migrate `chromium.connect()` →
# `chromium.connect_over_cdp()` (see f1-stream's playback_verifier).
#
# --remote-debugging-port=9222 : TCP CDP (vs default pipe).
# --remote-debugging-address=0.0.0.0 : bind on all pod IFs;
# NetworkPolicy is the gate.
# --remote-allow-origins=* : Chrome 111+ requires for
# non-loopback CDP origins.
# --user-data-dir=/profile/chromium-data: persistent profile on
# the encrypted PVC.
command = ["bash", "-c"]
args = [
<<-EOT
set -e
# Locate chromium in the Microsoft image. The path is
# /ms-playwright/chromium-XXXX/chrome-linux/chrome where XXXX
# is the playwright-pinned build; resolve at runtime so a minor
# bump of the image doesn't break the launch line.
CHROMIUM=$(find /ms-playwright -maxdepth 4 -name 'chrome' -type f -executable -path '*/chrome-linux/*' 2>/dev/null | head -1)
if [ -z "$CHROMIUM" ]; then
echo "ERROR: chromium binary not found under /ms-playwright" >&2
exit 1
fi
echo "[chrome-service] using chromium: $CHROMIUM"
# -listen tcp enables localhost:6099 so the noVNC sidecar can
# attach over the pod's shared network ns (Ubuntu 24.04
# defaults Xvfb to -nolisten tcp). -ac disables X access
# control; safe because Xvfb only listens on the pod's lo.
Xvfb :99 -screen 0 1280x720x24 -listen tcp -ac &
sleep 1
mkdir -p /profile/chromium-data ${local.snapshot_dir}
# Why a bridge?
# Stock Chrome binaries silently ignore --remote-debugging-address
# (the flag is gated by a build-time switch most distributions don't
# set), so CDP always binds 127.0.0.1:<port> regardless of what we
# pass. The K8s liveness/readiness probe + cluster callers reach
# the pod via its pod-IP, never localhost.
# Fix: chromium listens on 127.0.0.1:9223 (hidden internal port),
# cdp_bridge.py listens on 0.0.0.0:9222 (the public CDP port) and
# transparently forwards. K8s Service, probes, NetworkPolicy all
# stay on 9222 no caller-side changes needed.
# (Microsoft playwright image ships python3 but not socat, so the
# bridge is a tiny stdlib script see files/cdp_bridge.py.)
python3 /scripts/cdp_bridge.py &
BRIDGE_PID=$!
trap "kill $BRIDGE_PID 2>/dev/null" EXIT
exec "$CHROMIUM" \
--remote-debugging-port=9223 \
--remote-allow-origins=* \
--user-data-dir=/profile/chromium-data \
--no-sandbox \
--no-first-run \
--no-default-browser-check \
--disable-blink-features=AutomationControlled \
--disable-features=IsolateOrigins,site-per-process \
--autoplay-policy=no-user-gesture-required \
--disable-dev-shm-usage \
--password-store=basic \
--use-mock-keychain \
about:blank
EOT
]
env {
name = "DISPLAY"
value = ":99"
}
env {
name = "HOME"
value = "/profile"
}
port {
name = "cdp"
container_port = 9222
protocol = "TCP"
}
# Chrome's CDP endpoint serves /json/version once it's bound;
# TCP-open is enough for readiness.
liveness_probe {
tcp_socket { port = 9222 }
initial_delay_seconds = 30
period_seconds = 30
failure_threshold = 3
}
readiness_probe {
tcp_socket { port = 9222 }
initial_delay_seconds = 10
period_seconds = 10
}
startup_probe {
tcp_socket { port = 9222 }
period_seconds = 5
failure_threshold = 24 # up to 2 minutes
}
volume_mount {
name = "profile"
mount_path = "/profile"
}
volume_mount {
name = "dshm"
mount_path = "/dev/shm"
}
# /scripts/cdp_bridge.py provides the 0.0.0.0:9222 127.0.0.1:9223
# TCP forwarder (see entrypoint comment above for why).
volume_mount {
name = "scripts"
mount_path = "/scripts"
read_only = true
}
resources {
requests = {
cpu = "200m"
memory = "1500Mi"
}
limits = {
memory = "2Gi"
}
}
}
# noVNC sidecar exposes a live HTML5 view of the headed Chromium
# session via x11vnc + websockify, gated by the Authentik-protected
# ingress at chrome.viktorbarzin.me. CDP port 9222 (the new
# Playwright endpoint) stays internal-only.
container {
name = "novnc"
# Phase 3 cutover 2026-05-07 Forgejo registry consolidation.
image = "forgejo.viktorbarzin.me/viktor/chrome-service-novnc:v4"
image_pull_policy = "IfNotPresent"
port {
name = "http"
container_port = 6080
protocol = "TCP"
}
# x11vnc connects to the chrome-service container's Xvfb over
# localhost TCP (shared pod network). Same uid 1000 as chrome
# container so we can read MIT-MAGIC-COOKIE if Xvfb adds one.
resources {
requests = { cpu = "10m", memory = "32Mi" }
limits = { memory = "96Mi" }
}
}
# snapshot-server sidecar serves the hourly storage-state.json
# snapshot (written by the snapshot-harvester CronJob to the same
# PVC) over an HTTP endpoint, bearer-gated by PW_TOKEN. Mounted
# behind Traefik at chrome.viktorbarzin.me/api/snapshot with
# auth=none; the bearer check inside this server is the gate.
# Source: files/snapshot_server.py 60 lines, stdlib only.
container {
name = "snapshot-server"
image = local.python_image
image_pull_policy = "IfNotPresent"
command = ["python3", "/scripts/snapshot_server.py"]
env {
name = "PW_TOKEN"
value_from {
secret_key_ref {
name = "chrome-service-secrets"
key = "api_bearer_token"
}
}
}
env {
name = "SNAPSHOT_PATH"
value = "${local.snapshot_dir}/storage-state.json"
}
env {
name = "PORT"
value = "8088"
}
port {
name = "snap"
container_port = 8088
protocol = "TCP"
}
liveness_probe {
http_get {
path = "/healthz"
port = 8088
}
initial_delay_seconds = 5
period_seconds = 30
}
readiness_probe {
http_get {
path = "/healthz"
port = 8088
}
initial_delay_seconds = 2
period_seconds = 10
}
volume_mount {
name = "profile"
mount_path = "/profile"
read_only = true
}
volume_mount {
name = "scripts"
mount_path = "/scripts"
read_only = true
}
resources {
requests = { cpu = "5m", memory = "32Mi" }
limits = { memory = "96Mi" }
}
}
volume {
name = "profile"
persistent_volume_claim {
claim_name = kubernetes_persistent_volume_claim.profile_encrypted.metadata[0].name
}
}
volume {
name = "dshm"
empty_dir {
medium = "Memory"
size_limit = "256Mi"
}
}
volume {
name = "scripts"
config_map {
name = kubernetes_config_map_v1.snapshot_scripts.metadata[0].name
default_mode = "0555"
}
}
}
}
}
lifecycle {
ignore_changes = [
spec[0].template[0].spec[0].dns_config, # KYVERNO_LIFECYCLE_V1
metadata[0].annotations["keel.sh/policy"],
metadata[0].annotations["keel.sh/trigger"],
metadata[0].annotations["keel.sh/pollSchedule"], # KYVERNO_LIFECYCLE_V2
metadata[0].annotations["keel.sh/match-tag"],
spec[0].template[0].spec[0].container[0].image, # KEEL_IGNORE_IMAGE Keel manages tag updates
spec[0].template[0].spec[0].container[1].image,
spec[0].template[0].spec[0].init_container[0].image,
metadata[0].annotations["kubernetes.io/change-cause"],
metadata[0].annotations["deployment.kubernetes.io/revision"],
spec[0].template[0].metadata[0].annotations["keel.sh/update-time"], # KEEL_LIFECYCLE_V1
]
}
}
# --- ConfigMap: sidecar + harvester scripts ---
resource "kubernetes_config_map_v1" "snapshot_scripts" {
metadata {
name = "snapshot-scripts"
namespace = kubernetes_namespace.chrome_service.metadata[0].name
labels = local.labels
}
data = {
"snapshot_server.py" = file("${path.module}/files/snapshot_server.py")
"snapshot_harvester.py" = file("${path.module}/files/snapshot_harvester.py")
# Tiny TCP forwarder used by chrome-service container to bridge
# 0.0.0.0:9222 127.0.0.1:9223 (Chromium silently ignores
# --remote-debugging-address on stock builds; see cdp_bridge.py).
"cdp_bridge.py" = file("${path.module}/files/cdp_bridge.py")
}
}
# --- Services ---
# CDP endpoint (internal only, gated by NetworkPolicy). 2026-06-04: switched
# from Playwright WS (:3000) to direct chromium CDP (:9222) so the persistent
# user-data-dir actually persists cookies; callers use `connect_over_cdp()`.
resource "kubernetes_service" "chrome_service" {
metadata {
name = "chrome-service"
namespace = kubernetes_namespace.chrome_service.metadata[0].name
labels = local.labels
}
spec {
selector = local.labels
port {
name = "cdp"
port = 9222
target_port = 9222
protocol = "TCP"
}
}
}
# noVNC view (Authentik-gated, exposed via ingress).
resource "kubernetes_service" "chrome_novnc" {
metadata {
name = "chrome"
namespace = kubernetes_namespace.chrome_service.metadata[0].name
labels = local.labels
}
spec {
selector = local.labels
port {
name = "http"
port = 80
target_port = 6080
protocol = "TCP"
}
}
}
# Snapshot-server endpoint (bearer-gated, exposed via ingress sub-path
# chrome.viktorbarzin.me/api/snapshot auth=none at the ingress layer
# because the bearer check happens inside snapshot_server.py).
resource "kubernetes_service" "chrome_snapshot" {
metadata {
name = "chrome-snapshot"
namespace = kubernetes_namespace.chrome_service.metadata[0].name
labels = local.labels
}
spec {
selector = local.labels
port {
name = "snap"
port = 8088
target_port = 8088
protocol = "TCP"
}
}
}
module "ingress" {
source = "../../modules/kubernetes/ingress_factory"
dns_type = "proxied"
namespace = kubernetes_namespace.chrome_service.metadata[0].name
name = "chrome"
tls_secret_name = var.tls_secret_name
auth = "required"
# noVNC defaults to /vnc.html auto-redirect / there.
ingress_path = ["/"]
extra_annotations = {
"gethomepage.dev/enabled" = "true"
"gethomepage.dev/name" = "Chrome Service"
"gethomepage.dev/description" = "Live noVNC view of headed Chromium"
"gethomepage.dev/icon" = "chromium.png"
"gethomepage.dev/group" = "Infrastructure"
}
}
# Second ingress on the same host (chrome.viktorbarzin.me) carving out
# /api/snapshot to the snapshot-server sidecar. Path-level carve-out
# pattern see CLAUDE.md "For path-level carve-outs (e.g. wrongmove has
# `/` behind Anubis but `/api` direct), declare a second ingress_factory
# with `ingress_path = ["/<path>"]` pointing at the bare backend service."
module "ingress_snapshot" {
source = "../../modules/kubernetes/ingress_factory"
# auth = "none": bearer-token gated inside snapshot-server.py; Authentik
# forward-auth would require an OIDC cookie that the dev-box refresh
# timer can't replay.
auth = "none"
dns_type = "none" # DNS already created by module.ingress
namespace = kubernetes_namespace.chrome_service.metadata[0].name
name = "chrome-snapshot"
host = "chrome"
service_name = kubernetes_service.chrome_snapshot.metadata[0].name
port = 8088
ingress_path = ["/api/snapshot"]
tls_secret_name = var.tls_secret_name
extra_annotations = {
"gethomepage.dev/enabled" = "false"
}
}
# --- NetworkPolicy: scoped ingress.
# - TCP/9222 (Chromium CDP): only from labelled client namespaces.
# - TCP/6080 (noVNC HTTP+WS): only from the traefik namespace (public path
# is chrome.viktorbarzin.me Traefik sidecar; Authentik forward-auth
# gates external access at the Traefik layer).
# - TCP/8088 (snapshot-server): only from the traefik namespace
# (chrome.viktorbarzin.me/api/snapshot Traefik sidecar; bearer token
# is the gate inside snapshot-server.py).
# The cluster has no default-deny, so this NP only takes effect inside
# chrome-service ns pods elsewhere remain unaffected.
resource "kubernetes_network_policy_v1" "ws_ingress" {
metadata {
name = "chrome-service-ws-ingress"
namespace = kubernetes_namespace.chrome_service.metadata[0].name
}
spec {
pod_selector {
match_labels = local.labels
}
policy_types = ["Ingress"]
ingress {
from {
namespace_selector {
match_labels = {
"chrome-service.viktorbarzin.me/client" = "true"
}
}
}
# Explicit fallback list admit f1-stream by name in case the label
# is removed by accident. Keep this in sync with the labels above.
from {
namespace_selector {
match_labels = {
"kubernetes.io/metadata.name" = "f1-stream"
}
}
}
# Also admit chrome-service's own namespace (the snapshot-harvester
# CronJob runs here and needs to reach the CDP endpoint).
from {
namespace_selector {
match_labels = {
"kubernetes.io/metadata.name" = "chrome-service"
}
}
}
ports {
port = "9222"
protocol = "TCP"
}
}
ingress {
from {
namespace_selector {
match_labels = {
"kubernetes.io/metadata.name" = "traefik"
}
}
}
ports {
port = "6080"
protocol = "TCP"
}
ports {
port = "8088"
protocol = "TCP"
}
}
}
}
# --- Backup CronJob: tar+gzip the profile every 6h, 30-day retention. ---
resource "kubernetes_cron_job_v1" "chrome_service_backup" {
metadata {
name = "chrome-service-backup"
namespace = kubernetes_namespace.chrome_service.metadata[0].name
}
spec {
concurrency_policy = "Replace"
failed_jobs_history_limit = 3
successful_jobs_history_limit = 1
schedule = "47 */6 * * *"
starting_deadline_seconds = 60
job_template {
metadata {}
spec {
backoff_limit = 2
ttl_seconds_after_finished = 300
template {
metadata {}
spec {
# PVC is RWO — colocate the backup pod with the chrome-service
# pod so both can mount the volume on the same node.
affinity {
pod_affinity {
required_during_scheduling_ignored_during_execution {
label_selector {
match_labels = local.labels
}
topology_key = "kubernetes.io/hostname"
}
}
}
container {
name = "backup"
image = "docker.io/library/alpine:3.20"
command = ["/bin/sh", "-c", <<-EOT
set -euxo pipefail
ts=$(date +"%Y_%m_%d_%H")
tar -czf /backup/$${ts}.tar.gz -C /profile .
find /backup -maxdepth 1 -type f -name '*.tar.gz' -mtime +30 -delete
echo "Backup complete: $${ts}.tar.gz"
EOT
]
volume_mount {
name = "profile"
mount_path = "/profile"
read_only = true
}
volume_mount {
name = "backup"
mount_path = "/backup"
}
resources {
requests = { cpu = "10m", memory = "32Mi" }
limits = { memory = "64Mi" }
}
}
volume {
name = "profile"
persistent_volume_claim {
claim_name = kubernetes_persistent_volume_claim.profile_encrypted.metadata[0].name
}
}
volume {
name = "backup"
persistent_volume_claim {
claim_name = module.nfs_chrome_service_backup_host.claim_name
}
}
restart_policy = "OnFailure"
}
}
}
}
}
lifecycle {
# KYVERNO_LIFECYCLE_V1: Kyverno admission webhook mutates dns_config with ndots=2
ignore_changes = [spec[0].job_template[0].spec[0].template[0].spec[0].dns_config]
}
}
# --- Snapshot harvester CronJob: hourly storage_state() dump via CDP ---
# Connects to the live chrome-service CDP endpoint, accesses the
# persistent default browser context (where Viktor's noVNC logins live),
# and writes cookies + localStorage to /profile/snapshots/storage-state.json
# (atomic rename). The snapshot-server sidecar reads from the same file.
resource "kubernetes_cron_job_v1" "chrome_service_snapshot_harvester" {
metadata {
name = "chrome-service-snapshot-harvester"
namespace = kubernetes_namespace.chrome_service.metadata[0].name
}
spec {
concurrency_policy = "Replace"
failed_jobs_history_limit = 3
successful_jobs_history_limit = 1
# Hourly, offset from the backup CronJob (which runs at :47 every 6h)
# so they don't fight for the encrypted PVC at the same minute.
schedule = "23 * * * *"
starting_deadline_seconds = 60
job_template {
metadata {}
spec {
backoff_limit = 2
ttl_seconds_after_finished = 300
template {
metadata {}
spec {
# PVC is RWO — colocate with the chrome-service pod.
affinity {
pod_affinity {
required_during_scheduling_ignored_during_execution {
label_selector {
match_labels = local.labels
}
topology_key = "kubernetes.io/hostname"
}
}
}
container {
name = "harvester"
image = local.python_image
image_pull_policy = "IfNotPresent"
# The Microsoft playwright/python image ships only browsers +
# Python — the `playwright` pip package itself is NOT installed
# (it's meant for CI that brings its own requirements). We
# install at startup, caching to the PVC so subsequent runs
# are near-instant.
command = ["bash", "-c"]
args = [
<<-EOT
set -e
export PIP_CACHE_DIR=/profile/.cache/pip
export PIP_DISABLE_PIP_VERSION_CHECK=1
python3 -c 'import playwright' 2>/dev/null \
|| pip install --quiet --no-warn-script-location playwright==1.48.0
exec python3 /scripts/snapshot_harvester.py
EOT
]
env {
name = "CDP_URL"
value = "http://chrome-service.chrome-service.svc.cluster.local:9222"
}
env {
name = "SNAPSHOT_DIR"
value = local.snapshot_dir
}
# Don't try to download browsers — connect_over_cdp doesn't
# need them locally.
env {
name = "PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD"
value = "1"
}
volume_mount {
name = "profile"
mount_path = "/profile"
}
volume_mount {
name = "scripts"
mount_path = "/scripts"
read_only = true
}
resources {
requests = { cpu = "20m", memory = "128Mi" }
limits = { memory = "512Mi" }
}
}
volume {
name = "profile"
persistent_volume_claim {
claim_name = kubernetes_persistent_volume_claim.profile_encrypted.metadata[0].name
}
}
volume {
name = "scripts"
config_map {
name = kubernetes_config_map_v1.snapshot_scripts.metadata[0].name
default_mode = "0555"
}
}
restart_policy = "OnFailure"
}
}
}
}
}
lifecycle {
# KYVERNO_LIFECYCLE_V1: Kyverno admission webhook mutates dns_config with ndots=2
ignore_changes = [spec[0].job_template[0].spec[0].template[0].spec[0].dns_config]
}
}