Merge remote-tracking branch 'origin/master' into wizard/reconcile-mirror
All checks were successful
ci/woodpecker/push/postmortem-todos Pipeline was successful
ci/woodpecker/push/default Pipeline was successful

# Conflicts:
#	scripts/t3-provision-users.sh
This commit is contained in:
Viktor Barzin 2026-06-16 22:32:43 +00:00
commit 8a2a3d9eca
13 changed files with 383 additions and 159 deletions

View file

@ -0,0 +1,35 @@
[Unit]
# Per-user isolated playwright-mcp HTTP server — the browser MCP each user's
# Claude Code sessions connect to (user-scope `.claude.json` entry "playwright"
# -> http://localhost:<PLAYWRIGHT_PORT>/mcp). System-level TEMPLATE unit (one
# committed file, one instance per OS user: playwright-mcp@<user>.service), so
# it is reproducible from git and root-manageable WITHOUT systemd --user / linger.
# Installed to /etc/systemd/system by setup-devvm.sh; enabled per-user by
# t3-provision-users.sh. Supersedes the hand-made ~/.config/systemd/user units.
Description=Per-user isolated playwright-mcp HTTP server (%i)
After=network-online.target playwright-snapshot-refresh@%i.service
Wants=network-online.target playwright-snapshot-refresh@%i.service
[Service]
Type=simple
User=%i
# PLAYWRIGHT_PORT is written per-user by t3-provision-users.sh from roster_engine
# (PLAYWRIGHT_BASE_PORT, sticky allocation). Required (no `-`): a missing port
# file should fail loudly rather than start npx with an empty --port.
EnvironmentFile=/etc/t3-serve/playwright-%i.env
Restart=on-failure
RestartSec=5
# --isolated: each MCP HTTP connection (= each Claude Code session) gets a fresh
# ephemeral BrowserContext, so a single user's concurrent sessions never share
# tabs. --storage-state seeds each context from the hourly cookie snapshot
# harvested from in-cluster chrome-service (warm logged-in state).
# Version PINNED (see the T3_PIN rationale in setup-devvm.sh): @latest re-resolves
# on every restart, so an upstream breaking release would silently roll the
# whole fleet. Bump deliberately in git. %h is NOT used (it resolves to /root
# in a system unit even with User=); the home path is spelled out as /home/%i.
ExecStart=/usr/bin/npx -y @playwright/mcp@0.0.76 --port ${PLAYWRIGHT_PORT} --host localhost --headless --browser chrome --isolated --storage-state /home/%i/.cache/playwright-shared-storage-state.json
StandardOutput=journal
StandardError=journal
[Install]
WantedBy=multi-user.target

View file

@ -0,0 +1,57 @@
#!/usr/bin/env bash
# Refresh the local cookie+localStorage snapshot served by chrome-service.
#
# Run per-user by the hourly playwright-snapshot-refresh@<user>.timer systemd
# unit (as that user, so $HOME resolves to the user's home). Per-session Claude
# Code MCP contexts (`@playwright/mcp --isolated --storage-state=…`) read this
# file on each connection — fresh state is visible to NEW sessions, existing
# ones keep what they were seeded with.
#
# Token: cached at ~/.config/playwright/token. Seeded per-user (if-absent) by
# t3-provision-users.sh from the root-staged /etc/t3-serve/chrome-service-token
# (which setup-devvm.sh writes from Vault `secret/chrome-service`
# api_bearer_token). Rotate by re-staging + re-copying; the snapshot endpoint
# reloads the token via Reloader, local caches must be refreshed.
set -euo pipefail
URL="${PLAYWRIGHT_SNAPSHOT_URL:-https://chrome.viktorbarzin.me/api/snapshot}"
TOKEN_FILE="${PLAYWRIGHT_SNAPSHOT_TOKEN:-$HOME/.config/playwright/token}"
DEST="${PLAYWRIGHT_SNAPSHOT_PATH:-$HOME/.cache/playwright-shared-storage-state.json}"
if [ ! -r "$TOKEN_FILE" ]; then
echo "ERROR: token file $TOKEN_FILE missing or unreadable" >&2
exit 1
fi
mkdir -p "$(dirname "$DEST")"
TMP="$DEST.new.$$"
trap 'rm -f "$TMP"' EXIT
TOKEN="$(cat "$TOKEN_FILE")"
HTTP_CODE=$(curl -sS \
-H "Authorization: Bearer $TOKEN" \
-o "$TMP" \
-w '%{http_code}' \
--max-time 30 \
"$URL")
if [ "$HTTP_CODE" != "200" ]; then
echo "ERROR: HTTP $HTTP_CODE from $URL" >&2
cat "$TMP" >&2
exit 1
fi
# Sanity: response must be valid JSON with at least the cookies/origins keys.
python3 - "$TMP" <<'PY' || { echo "ERROR: response is not a valid storageState JSON" >&2; exit 1; }
import json, sys
with open(sys.argv[1]) as f:
data = json.load(f)
if "cookies" not in data or "origins" not in data:
raise SystemExit("missing required keys")
PY
mv -f "$TMP" "$DEST"
trap - EXIT
chmod 600 "$DEST"
echo "snapshot refreshed: $DEST ($(stat -c %s "$DEST") bytes)"

View file

@ -0,0 +1,22 @@
[Unit]
# Per-user oneshot that pulls the warm cookie+localStorage snapshot from
# in-cluster chrome-service into ~/.cache/playwright-shared-storage-state.json,
# which playwright-mcp@%i seeds every new session from. System-level TEMPLATE
# (one instance per user); runs the shared /usr/local/bin script as the user.
Description=Refresh %i's playwright storage-state snapshot from chrome-service
After=network-online.target
Wants=network-online.target
[Service]
Type=oneshot
User=%i
# Runs as %i, so the script's $HOME-relative paths (token, cache dest) resolve to
# the user's home. $HOME/$USER are set by systemd because User= is set.
ExecStart=/usr/local/bin/playwright-snapshot-refresh
StandardOutput=journal
StandardError=journal
# Don't hang if chrome-service is unreachable — the timer retries next hour.
TimeoutStartSec=60
[Install]
WantedBy=multi-user.target

View file

@ -0,0 +1,16 @@
[Unit]
Description=Hourly refresh of %i's playwright storage-state snapshot from chrome-service
After=network-online.target
[Timer]
# 5 minutes after the in-cluster snapshot-harvester CronJob (runs at :23 every
# hour) so the file we pull is the freshest one. Also once shortly after boot so
# a freshly-booted box doesn't wait until the next :28 to populate the cache.
OnCalendar=*-*-* *:28:00
OnBootSec=2min
Persistent=true
RandomizedDelaySec=30
Unit=playwright-snapshot-refresh@%i.service
[Install]
WantedBy=timers.target

View file

@ -21,6 +21,12 @@ from typing import Iterable
import yaml
BASE_PORT = 3773
# Per-user playwright-mcp HTTP port (the browser MCP each user's Claude sessions
# connect to). Distinct range from T3_PORT, allocated for EVERY roster user incl.
# the admin (wizard is listed). Sticky from existing, so the live in-session
# assignments (wizard 8931, emo 8932, ancamilea 8933) are preserved across
# reconciles once seeded; a fresh box allocates from 8931 in sorted order.
PLAYWRIGHT_BASE_PORT = 8931
VALID_TIERS = ("admin", "power-user", "namespace-owner")
# single - ~/code IS the locked infra clone (the original non-admin layout)
# workspace - ~/code is a plain directory of per-project clones; the locked
@ -82,6 +88,7 @@ class DesiredState:
ttyd_user_map: str
dispatch: dict[str, dict]
ports: dict[str, int]
playwright_ports: dict[str, int] = field(default_factory=dict)
@dataclass(frozen=True)
@ -203,13 +210,18 @@ def has_blocking_errors(issues: list[ValidationIssue]) -> bool:
# --------------------------------------------------------------------------
def _allocate_ports(roster: Roster, existing_ports: dict[str, int]) -> dict[str, int]:
def _allocate_ports(
roster: Roster, existing_ports: dict[str, int], base: int = BASE_PORT
) -> dict[str, int]:
"""Sticky port allocation: keep every roster user's existing port, then assign
each new user the next free port from `base`. Used for both T3_PORT (base 3773)
and the per-user playwright-mcp port (base 8932)."""
ports = {u: existing_ports[u] for u in roster.users if u in existing_ports}
used = set(ports.values())
for os_user in sorted(roster.users):
if os_user in ports:
continue
candidate = BASE_PORT
candidate = base
while candidate in used:
candidate += 1
ports[os_user] = candidate
@ -224,9 +236,14 @@ _TTYD_MAP_HEADER = (
def derive_desired_state(
roster: Roster, existing_ports: dict[str, int]
roster: Roster,
existing_ports: dict[str, int],
existing_playwright_ports: dict[str, int] | None = None,
) -> DesiredState:
ports = _allocate_ports(roster, existing_ports)
playwright_ports = _allocate_ports(
roster, existing_playwright_ports or {}, base=PLAYWRIGHT_BASE_PORT
)
ordered = sorted(roster.users.values(), key=lambda u: ports[u.os_user])
ttyd_lines = [f"{u.authentik_user}={u.os_user}" for u in ordered]
ttyd_user_map = _TTYD_MAP_HEADER + "\n".join(ttyd_lines) + "\n"
@ -246,7 +263,7 @@ def derive_desired_state(
)
for u in roster.users.values()
}
return DesiredState(accounts, ttyd_user_map, dispatch, ports)
return DesiredState(accounts, ttyd_user_map, dispatch, ports, playwright_ports)
def groups_to_add(desired: Iterable[str], current: Iterable[str]) -> list[str]:
@ -303,6 +320,7 @@ def _desired_state_to_dict(ds: DesiredState) -> dict:
"ttyd_user_map": ds.ttyd_user_map,
"dispatch": ds.dispatch,
"ports": ds.ports,
"playwright_ports": ds.playwright_ports,
}
@ -318,7 +336,11 @@ def _main(argv: list[str]) -> int:
pv.add_argument("--k8s-users-json", required=True, help="JSON map {k8s_user: tier}")
pd = sub.add_parser("derive", help="emit desired state as JSON")
pd.add_argument("--roster", required=True)
pd.add_argument("--ports-json", required=True, help="JSON map {os_user: port}")
pd.add_argument("--ports-json", required=True, help="JSON map {os_user: T3_PORT}")
pd.add_argument(
"--playwright-ports-json",
help="JSON map {os_user: PLAYWRIGHT_PORT} (optional; sticky allocation)",
)
args = parser.parse_args(argv)
roster = load_roster_file(args.roster)
@ -329,7 +351,12 @@ def _main(argv: list[str]) -> int:
print(f"{issue.severity.upper()}: {issue.message}", file=sys.stderr)
return 1 if has_blocking_errors(issues) else 0
with open(args.ports_json, encoding="utf-8") as fh:
desired = derive_desired_state(roster, json.load(fh))
existing_ports = json.load(fh)
existing_playwright_ports = {}
if args.playwright_ports_json:
with open(args.playwright_ports_json, encoding="utf-8") as fh:
existing_playwright_ports = json.load(fh)
desired = derive_desired_state(roster, existing_ports, existing_playwright_ports)
json.dump(_desired_state_to_dict(desired), sys.stdout, indent=2, sort_keys=True)
sys.stdout.write("\n")
return 0

View file

@ -144,6 +144,16 @@ if command -v vault >/dev/null; then
else
log "WARN: secret/workstation codex_shared_auth_json absent -> shared Codex auth not staged"
fi
# 8c) chrome-service snapshot bearer token -> root file the provisioner copies
# per-user (if-absent) to ~/.config/playwright/token, which the per-user
# playwright-snapshot-refresh reads. One token for all users (single shared
# warm profile, by design). 0600: the snapshot it fetches holds cookies.
if cs_tok="$(vault kv get -field=api_bearer_token secret/chrome-service 2>/dev/null)"; then
install -m 0600 /dev/stdin /etc/t3-serve/chrome-service-token <<<"$cs_tok"
log "staged /etc/t3-serve/chrome-service-token (playwright snapshot auth)"
else
log "WARN: secret/chrome-service api_bearer_token absent -> playwright snapshot refresh will 401"
fi
fi
# 9) service layer: install + enable the machine-wide systemd units (sources in
@ -181,6 +191,16 @@ for u in t3-serve@.service \
t3-dispatch.service; do
install -m 0644 "$SCRIPTS/$u" "/etc/systemd/system/$u"
done
# 9e) per-user playwright-mcp browser MCP: system-level TEMPLATE units (one
# instance per OS user) + the snapshot-refresh script. Reproducible-from-git
# replacement for the hand-made ~/.config/systemd/user/playwright-* units
# (no systemd --user / linger needed). Enabled per-user by the provisioner;
# PLAYWRIGHT_PORT (roster_engine) + the chrome-service token (8c) feed them.
install -m 0755 "$HERE/playwright/playwright-snapshot-refresh" /usr/local/bin/playwright-snapshot-refresh
for u in playwright-mcp@.service playwright-snapshot-refresh@.service playwright-snapshot-refresh@.timer; do
install -m 0644 "$HERE/playwright/$u" "/etc/systemd/system/$u"
done
log "playwright: template units + snapshot-refresh script installed (per-user enable in provisioner)"
systemctl daemon-reload
systemctl enable --now t3-dispatch.service \
t3-autoupdate.timer t3-backup-state.timer t3-provision-users.timer >/dev/null 2>&1 || \

View file

@ -296,6 +296,53 @@ def test_derive_is_deterministic():
)
# --------------------------------------------------------------------------
# derive_desired_state: per-user playwright-mcp ports (reproducible browser MCP)
# --------------------------------------------------------------------------
# wizard (admin) IS a roster user, so playwright ports are allocated for every
# user incl. the admin, from PLAYWRIGHT_BASE_PORT=8931. The live in-session
# assignment is wizard 8931, emo 8932, ancamilea 8933.
LIVE_PLAYWRIGHT_PORTS = {"wizard": 8931, "emo": 8932, "ancamilea": 8933}
def test_derive_allocates_playwright_ports_for_all_users_incl_admin():
ds = eng.derive_desired_state(_roster(THREE), {})
# fresh box: sorted os_user order (ancamilea, emo, wizard) from 8931
assert ds.playwright_ports == {"ancamilea": 8931, "emo": 8932, "wizard": 8933}
def test_derive_preserves_existing_sticky_playwright_ports():
# Seeded with the live assignment -> preserved exactly (nobody's port moves).
ds = eng.derive_desired_state(
_roster(THREE), {}, existing_playwright_ports=LIVE_PLAYWRIGHT_PORTS
)
assert ds.playwright_ports == LIVE_PLAYWRIGHT_PORTS
def test_derive_allocates_next_free_playwright_port_for_new_user():
# Existing users sticky; a brand-new user gets the next free port from 8931.
ds = eng.derive_desired_state(
_roster(THREE), {}, existing_playwright_ports={"wizard": 8931, "emo": 8932}
)
assert ds.playwright_ports["wizard"] == 8931
assert ds.playwright_ports["emo"] == 8932
assert ds.playwright_ports["ancamilea"] == 8933 # next free, skipping 8931/8932
def test_playwright_ports_are_disjoint_from_t3_ports():
ds = eng.derive_desired_state(_roster(THREE), LIVE_PORTS, LIVE_PLAYWRIGHT_PORTS)
assert set(ds.ports.values()).isdisjoint(ds.playwright_ports.values())
def test_desired_state_dict_includes_playwright_ports():
# The JSON adapter is the contract the bash provisioner consumes via jq.
d = eng._desired_state_to_dict(
eng.derive_desired_state(_roster(THREE), {}, LIVE_PLAYWRIGHT_PORTS)
)
assert d["playwright_ports"] == LIVE_PLAYWRIGHT_PORTS
# --------------------------------------------------------------------------
# groups_to_add: the additive-only invariant (module #1)
# --------------------------------------------------------------------------