stem95su: scheduled Drive->site sync CronJob (every 10m)

CronJob stem95su-gdrive-sync (*/10) mounts the content PVC RW and
rclone-syncs the read-only Drive folder "claude" (stem claude/files) onto
it (rclone/rclone:1.74.3, scope=drive.readonly, empty-source guard +
--max-delete 25). ESO ExternalSecret stem95su-rclone <- Vault
secret/stem95su. Requires the GCP OAuth app published to Production or the
refresh token expires ~weekly.

Lands the gdrive-sync stack on master (it had landed on a feature branch
by accident on the shared devvm checkout).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Viktor Barzin 2026-06-09 08:42:26 +00:00
parent 05b50d2b96
commit 6d224861c4
1168 changed files with 120 additions and 358547 deletions

View file

@ -1,214 +0,0 @@
#!/usr/bin/env python3
"""CDP-aware proxy: 0.0.0.0:9222 → 127.0.0.1:9223 with Host header rewriting.
Why this exists:
Stock Chrome binaries silently ignore --remote-debugging-address (the flag is
gated by a build-time switch most distributions don't set), so CDP always
binds 127.0.0.1:<port>. Worse, Chrome enforces DNS rebinding protection on
the HTTP DevTools endpoint: any Host header that isn't `localhost`,
`127.0.0.1`, or `[::1]` returns 500 "Host header is specified and is not an
IP address or localhost". There is no `--remote-allow-hosts` flag in stock
Chrome 130 (verified by binary string search).
This means a raw TCP forwarder doesn't work — clients hitting the K8s
Service DNS get 500 because Chrome rejects the Host header.
What this script does:
- Listens on 0.0.0.0:9222 (the public CDP port the K8s Service exposes).
- For each TCP connection from a CDP client:
1. Read the HTTP request line + headers.
2. Rewrite `Host: <whatever>` to `Host: localhost:9222`, remembering
the original value (for response rewriting).
3. Open a connection to Chrome at 127.0.0.1:9223 and forward the
modified request line + headers + body.
4. Read Chrome's HTTP response. If it's 101 Switching Protocols
(WebSocket upgrade), forward it as-is and switch to raw byte piping
in both directions (CDP frames are binary, no further parsing).
5. Otherwise it's a regular HTTP/JSON response. Substitute
`localhost:9222` (the URL Chrome composed from the rewritten Host)
back to the client's original Host header value. Forward.
- The Microsoft playwright image ships python3 but not socat, hence this
stdlib-only helper.
Limitations:
- Only HTTP/1.x supported (CDP doesn't use HTTP/2).
- Body is assumed to fit in one read for non-WS responses (CDP JSON
responses are kilobytes, well within limits).
- No SSL/TLS the cluster network is the trust boundary.
"""
import os
import socket
import sys
import threading
LISTEN_ADDR = os.environ.get("BRIDGE_LISTEN_ADDR", "0.0.0.0")
LISTEN_PORT = int(os.environ.get("BRIDGE_LISTEN_PORT", "9222"))
TARGET_ADDR = os.environ.get("BRIDGE_TARGET_ADDR", "127.0.0.1")
TARGET_PORT = int(os.environ.get("BRIDGE_TARGET_PORT", "9223"))
INTERNAL_HOST = f"localhost:{LISTEN_PORT}"
def recv_until(sock: socket.socket, marker: bytes, max_bytes: int = 65536) -> bytes:
"""Read from sock until marker is seen or max_bytes hit. Returns everything read."""
buf = b""
while marker not in buf and len(buf) < max_bytes:
chunk = sock.recv(4096)
if not chunk:
break
buf += chunk
return buf
def rewrite_host(headers: bytes, new_host: str) -> tuple[bytes, str | None]:
"""Replace the Host header. Returns (new_headers, original_host)."""
lines = headers.split(b"\r\n")
original = None
out = []
for line in lines:
if line.lower().startswith(b"host:"):
original = line.split(b":", 1)[1].strip().decode("latin-1")
out.append(f"Host: {new_host}".encode("latin-1"))
else:
out.append(line)
return b"\r\n".join(out), original
def pipe(src: socket.socket, dst: socket.socket) -> None:
"""Raw byte pipe used after WS upgrade."""
try:
while True:
data = src.recv(65536)
if not data:
break
dst.sendall(data)
except OSError:
pass
finally:
try:
src.shutdown(socket.SHUT_RD)
except OSError:
pass
try:
dst.shutdown(socket.SHUT_WR)
except OSError:
pass
def handle(client: socket.socket) -> None:
upstream: socket.socket | None = None
try:
# Read until end-of-headers.
head_buf = recv_until(client, b"\r\n\r\n")
if b"\r\n\r\n" not in head_buf:
return
head, tail = head_buf.split(b"\r\n\r\n", 1)
new_head, original_host = rewrite_host(head, INTERNAL_HOST)
upstream = socket.create_connection((TARGET_ADDR, TARGET_PORT), timeout=5)
# `create_connection(timeout=5)` sets the socket's timeout to 5s,
# which then applies to all subsequent recv() calls too. After a WS
# upgrade either side can stay silent for minutes — leave timeouts
# off so the pipe doesn't blow up the connection on idle.
upstream.settimeout(None)
upstream.sendall(new_head + b"\r\n\r\n" + tail)
# Read response headers from upstream.
resp_head_buf = recv_until(upstream, b"\r\n\r\n")
if b"\r\n\r\n" not in resp_head_buf:
return
resp_head, resp_tail = resp_head_buf.split(b"\r\n\r\n", 1)
first_line = resp_head.split(b"\r\n", 1)[0].decode("latin-1", errors="replace")
# Match any 101 status (Chrome's CDP says "101 WebSocket Protocol
# Handshake", not the canonical "101 Switching Protocols"). Sniff the
# status code from the first line, e.g. "HTTP/1.1 101 ...".
parts = first_line.split(" ", 2)
status_code = parts[1] if len(parts) >= 2 else ""
if status_code == "101":
# WS upgrade. Forward as-is and start raw pipe.
client.sendall(resp_head + b"\r\n\r\n" + resp_tail)
t1 = threading.Thread(target=pipe, args=(client, upstream), daemon=True)
t2 = threading.Thread(target=pipe, args=(upstream, client), daemon=True)
t1.start()
t2.start()
t1.join()
t2.join()
return
# Regular HTTP response. Determine body length (Content-Length only —
# CDP doesn't use chunked encoding for /json/* endpoints) and rewrite.
content_length = 0
for line in resp_head.split(b"\r\n"):
if line.lower().startswith(b"content-length:"):
try:
content_length = int(line.split(b":", 1)[1].strip())
except ValueError:
pass
break
body = resp_tail
while len(body) < content_length:
chunk = upstream.recv(65536)
if not chunk:
break
body += chunk
# Truncate any extra bytes that came past content_length (shouldn't
# happen with stock chrome but defensive against pipelined responses).
if content_length and len(body) > content_length:
body = body[:content_length]
# Rewrite the URLs Chrome composed using its localhost Host so callers
# can follow them back through this bridge.
if original_host:
body = body.replace(INTERNAL_HOST.encode(), original_host.encode())
# Rebuild response headers: drop any existing Content-Length / Connection
# header and force `Connection: close` + the new Content-Length. This
# keeps the bridge one-request-per-connection (no keep-alive); avoids a
# whole class of upstream/downstream desync issues, especially because
# Node's ws library will open a fresh TCP for the WS upgrade rather
# than trying to reuse the HTTP probe's connection.
new_lines = []
for line in resp_head.split(b"\r\n"):
l = line.lower()
if l.startswith(b"content-length:") or l.startswith(b"connection:"):
continue
new_lines.append(line)
new_lines.append(f"Content-Length: {len(body)}".encode())
new_lines.append(b"Connection: close")
resp_head = b"\r\n".join(new_lines)
client.sendall(resp_head + b"\r\n\r\n" + body)
except Exception as e:
sys.stderr.write(f"[cdp-bridge] handle error: {e}\n")
finally:
try:
client.close()
except OSError:
pass
if upstream is not None:
try:
upstream.close()
except OSError:
pass
def main() -> int:
listener = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
listener.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
listener.bind((LISTEN_ADDR, LISTEN_PORT))
listener.listen(64)
sys.stderr.write(
f"[cdp-bridge] HTTP-aware proxy listening on {LISTEN_ADDR}:{LISTEN_PORT}"
f"{TARGET_ADDR}:{TARGET_PORT} (rewriting Host → {INTERNAL_HOST})\n"
)
while True:
client, _ = listener.accept()
threading.Thread(target=handle, args=(client,), daemon=True).start()
if __name__ == "__main__":
sys.exit(main() or 0)

View file

@ -1,19 +0,0 @@
FROM docker.io/library/ubuntu:24.04
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
x11vnc \
novnc \
websockify \
ca-certificates \
&& rm -rf /var/lib/apt/lists/*
# noVNC ships /usr/share/novnc/vnc.html; alias to index.html so / works.
RUN ln -sf /usr/share/novnc/vnc.html /usr/share/novnc/index.html
EXPOSE 6080
COPY entrypoint.sh /entrypoint.sh
RUN chmod +x /entrypoint.sh
CMD ["/entrypoint.sh"]

View file

@ -1,39 +0,0 @@
#!/usr/bin/env bash
# Connect to the chrome-service container's Xvfb (shared pod network, TCP)
# and serve the noVNC HTML5 client + websockify bridge on :6080.
set -e
for i in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15; do
if echo > /dev/tcp/127.0.0.1/6099 2>/dev/null; then
echo "Xvfb TCP up after attempt $i"
break
fi
echo "waiting for Xvfb TCP 6099 attempt=$i"
sleep 2
done
# websockify runs as PID 1; x11vnc is a child so its logs land on container stdout
# `-noshm` skips MIT-SHM probes that fail across container boundaries (each
# container has its own /dev/shm); `-noxdamage` skips XDAMAGE which Xvfb
# doesn't expose; `-quiet` keeps the polling chatter out of pod logs.
echo "starting x11vnc -> :5900"
x11vnc -display localhost:99 -nopw -listen 0.0.0.0 -rfbport 5900 \
-forever -shared -noshm -noxdamage -quiet 2>&1 &
X11VNC_PID=$!
for i in 1 2 3 4 5 6 7 8 9 10; do
if echo > /dev/tcp/127.0.0.1/5900 2>/dev/null; then
echo "x11vnc bound 5900 after attempt $i"
break
fi
echo "waiting for x11vnc :5900 attempt=$i"
sleep 2
done
if ! echo > /dev/tcp/127.0.0.1/5900 2>/dev/null; then
echo "ERROR: x11vnc did not bind 5900"
exit 1
fi
echo "starting websockify -> :6080"
exec websockify --web=/usr/share/novnc 6080 localhost:5900

View file

@ -1,69 +0,0 @@
#!/usr/bin/env python3
"""Connect to chrome-service via CDP, dump storage state, write atomically.
Runs hourly as a Kubernetes CronJob. Mounts the chrome-service encrypted
PVC at /profile (same node via pod-affinity) and writes the snapshot to
/profile/snapshots/storage-state.json. The snapshot-server sidecar reads
from the same path and serves it bearer-gated.
CDP endpoint is plain HTTP protection is the chrome-service
NetworkPolicy (allow only labelled client namespaces). Same security model
as the previous WS endpoint, just unauthenticated within the trust zone.
"""
import asyncio
import logging
import os
import pathlib
import sys
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
log = logging.getLogger("snapshot-harvester")
CDP_URL = os.environ.get(
"CDP_URL", "http://chrome-service.chrome-service.svc.cluster.local:9222"
)
SNAPSHOT_DIR = pathlib.Path(os.environ.get("SNAPSHOT_DIR", "/profile/snapshots"))
SNAPSHOT_FILE = SNAPSHOT_DIR / "storage-state.json"
TMP_FILE = SNAPSHOT_DIR / "storage-state.json.tmp"
async def main() -> int:
try:
from playwright.async_api import async_playwright
except ImportError:
log.error("playwright not installed in image")
return 2
SNAPSHOT_DIR.mkdir(parents=True, exist_ok=True)
async with async_playwright() as p:
try:
browser = await p.chromium.connect_over_cdp(CDP_URL, timeout=20_000)
except Exception:
log.exception("connect_over_cdp failed (%s)", CDP_URL)
return 3
try:
contexts = browser.contexts
if not contexts:
log.error("no browser contexts found — chrome-service may not have launched a persistent context yet")
return 4
ctx = contexts[0]
# storage_state writes cookies + localStorage to a JSON file.
# IndexedDB and sessionStorage are NOT included (known Playwright limitation).
await ctx.storage_state(path=str(TMP_FILE))
os.replace(TMP_FILE, SNAPSHOT_FILE)
size = SNAPSHOT_FILE.stat().st_size
log.info("wrote snapshot (%d bytes) to %s", size, SNAPSHOT_FILE)
finally:
try:
await browser.close()
except Exception:
pass
return 0
if __name__ == "__main__":
sys.exit(asyncio.run(main()))

View file

@ -1,68 +0,0 @@
#!/usr/bin/env python3
"""Tiny HTTP server that exposes /api/snapshot, gated by a bearer token.
Runs as a sidecar in the chrome-service pod. Reads the persisted storage
state written hourly by the snapshot-harvester CronJob and returns it to
authenticated callers (the dev-box `playwright-snapshot-refresh` timer).
Token is read from the PW_TOKEN env var, same secret the legacy WS path
used. The endpoint is mounted behind Traefik on `chrome.viktorbarzin.me`
at the `/api/snapshot` path (auth=none at the ingress; the bearer check
is here).
"""
import os
import sys
from http.server import HTTPServer, BaseHTTPRequestHandler
TOKEN = os.environ.get("PW_TOKEN")
SNAPSHOT_PATH = os.environ.get(
"SNAPSHOT_PATH", "/profile/snapshots/storage-state.json"
)
PORT = int(os.environ.get("PORT", "8088"))
class Handler(BaseHTTPRequestHandler):
server_version = "chrome-snapshot/1"
def _short(self, status: int, body: bytes = b"") -> None:
self.send_response(status)
self.send_header("Content-Length", str(len(body)))
self.end_headers()
if body:
self.wfile.write(body)
def do_GET(self):
if self.path == "/healthz":
self._short(200, b"ok\n")
return
if self.path != "/api/snapshot":
self._short(404)
return
if TOKEN is None:
self._short(503, b"{\"error\":\"token not configured\"}\n")
return
if self.headers.get("Authorization", "") != f"Bearer {TOKEN}":
self._short(401, b"{\"error\":\"invalid bearer\"}\n")
return
try:
with open(SNAPSHOT_PATH, "rb") as f:
data = f.read()
except FileNotFoundError:
self._short(404, b"{\"error\":\"snapshot not yet available\"}\n")
return
self.send_response(200)
self.send_header("Content-Type", "application/json")
self.send_header("Cache-Control", "no-cache")
self.send_header("Content-Length", str(len(data)))
self.end_headers()
self.wfile.write(data)
def log_message(self, fmt, *args):
sys.stderr.write(
"[snapshot-server] %s - %s\n" % (self.address_string(), fmt % args)
)
if __name__ == "__main__":
HTTPServer(("0.0.0.0", PORT), Handler).serve_forever()

View file

@ -1,54 +0,0 @@
// Minimal stealth init script for Playwright-driven Chromium.
// Vendored from puppeteer-extra-plugin-stealth/evasions/* (MIT) — covers:
// webdriver, chrome.runtime, navigator.plugins, navigator.languages,
// Permissions.query, WebGL getParameter (vendor + renderer spoof).
// Run via context.add_init_script() so it executes before any page script.
(() => {
// navigator.webdriver — most common detection, removed entirely.
Object.defineProperty(Navigator.prototype, 'webdriver', { get: () => undefined });
// window.chrome.runtime — many sites check that real Chrome exposes this.
if (!window.chrome) window.chrome = {};
window.chrome.runtime = window.chrome.runtime || {};
// navigator.plugins — headless reports zero; spoof a plausible PDF viewer.
Object.defineProperty(navigator, 'plugins', {
get: () => [{ name: 'Chrome PDF Plugin' }, { name: 'Chrome PDF Viewer' }, { name: 'Native Client' }],
});
// navigator.languages — headless returns empty array.
Object.defineProperty(navigator, 'languages', { get: () => ['en-US', 'en'] });
// Permissions.query — headless returns 'denied' for notifications instead of 'default'.
const origQuery = window.navigator.permissions && window.navigator.permissions.query;
if (origQuery) {
window.navigator.permissions.query = (parameters) =>
parameters && parameters.name === 'notifications'
? Promise.resolve({ state: Notification.permission })
: origQuery(parameters);
}
// WebGL getParameter — spoof vendor + renderer strings to a real GPU.
const spoofGl = (proto) => {
if (!proto) return;
const orig = proto.getParameter;
proto.getParameter = function (parameter) {
if (parameter === 37445) return 'Intel Inc.'; // UNMASKED_VENDOR_WEBGL
if (parameter === 37446) return 'Intel Iris OpenGL Engine'; // UNMASKED_RENDERER_WEBGL
return orig.apply(this, arguments);
};
};
spoofGl(window.WebGLRenderingContext && window.WebGLRenderingContext.prototype);
spoofGl(window.WebGL2RenderingContext && window.WebGL2RenderingContext.prototype);
// disable-devtool.js (theajack/disable-devtool) auto-inits via a script
// tag with `disable-devtool-auto`. Its Performance detector trips under
// Playwright (CDP adds console.log latency vs console.table) and the
// redirect URL is hard-coded — for hmembeds that's google.com.
// Hide the auto-init marker so the library's IIFE exits early.
const origQS = Document.prototype.querySelector;
Document.prototype.querySelector = function (sel) {
if (typeof sel === 'string' && sel.indexOf('disable-devtool-auto') !== -1) return null;
return origQS.apply(this, arguments);
};
})();