diff --git a/.claude/reference/service-catalog.md b/.claude/reference/service-catalog.md index 51d50c79..c9a7638c 100644 --- a/.claude/reference/service-catalog.md +++ b/.claude/reference/service-catalog.md @@ -32,7 +32,7 @@ |---------|-------------|-------| | k8s-dashboard | Kubernetes dashboard | k8s-dashboard | | reverse-proxy | Generic reverse proxy | reverse-proxy | -| t3code | Multi-user coding-agent GUI at t3.viktorbarzin.me. `auth=required` (Authentik) → DevVM `t3-dispatch` service (`10.0.10.10:3780`, unprivileged user) maps `X-authentik-username` → that user's own `t3-serve@` instance (file perms enforced by uid; wizard→:3773, emo→:3774; unmapped→403) and **auto-injects the t3 session on first visit** (mints via the root `t3-mint` wrapper, scoped sudoers → `/api/auth/bootstrap` `t3_session` cookie). Source of truth `/etc/ttyd-user-map`; `t3-provision-users` reconcile (systemd timer) turns map entries into `t3-serve@` instances + `dispatch.json`. **Add a user:** one line in `/etc/ttyd-user-map` (must already be an OS account + Authentik identity) → reconcile. DevVM artifacts versioned in `infra/scripts/` (`t3-serve@.service`, `t3-provision-users`, `t3-dispatch/`, `t3-mint`, `sudoers-t3-autopair`); TF (`stacks/t3code`) owns only the ingress + Endpoints→:3780. Native app/app.t3.codes unsupported (cross-origin) — deferred until published. Design: `docs/plans/2026-06-01-t3-auto-provision-*`. | t3code | +| t3code | Multi-user coding-agent GUI at t3.viktorbarzin.me. `auth=required` (Authentik) → DevVM `t3-dispatch` service (`10.0.10.10:3780`, unprivileged user) maps `X-authentik-username` → that user's own `t3-serve@` instance (file perms enforced by uid; wizard→:3773, emo→:3774; unmapped→403) and **auto-injects the t3 session on first visit** (mints via the root `t3-mint` wrapper, scoped sudoers → `/api/auth/bootstrap` `t3_session` cookie). Source of truth `/etc/ttyd-user-map`; `t3-provision-users` reconcile (systemd timer) turns map entries into `t3-serve@` instances + `dispatch.json`. **Add a user:** one line in `/etc/ttyd-user-map` (must already be an OS account + Authentik identity) → reconcile. DevVM artifacts versioned in `infra/scripts/` (`t3-serve@.service`, `t3-provision-users`, `t3-dispatch/`, `t3-mint`, `sudoers-t3-autopair`, `t3-autoupdate.*`); TF (`stacks/t3code`) owns only the ingress + Endpoints→:3780. **t3 binary tracks `nightly`** via `t3-autoupdate` (daily systemd timer; health-check + auto-rollback on a bad build; restarts only idle instances) — so new models (e.g. Opus 4.8) land as t3 ships them. Native app/app.t3.codes unsupported (cross-origin) — deferred until published. Design: `docs/plans/2026-06-01-t3-auto-provision-*`. | t3code | ## Active Use | Service | Description | Stack | diff --git a/scripts/t3-autoupdate.service b/scripts/t3-autoupdate.service new file mode 100644 index 00000000..d3306da7 --- /dev/null +++ b/scripts/t3-autoupdate.service @@ -0,0 +1,8 @@ +[Unit] +Description=Track latest t3 nightly (health-checked, idle-only restart) +After=network-online.target +Wants=network-online.target + +[Service] +Type=oneshot +ExecStart=/usr/local/bin/t3-autoupdate diff --git a/scripts/t3-autoupdate.sh b/scripts/t3-autoupdate.sh new file mode 100644 index 00000000..962f3fc4 --- /dev/null +++ b/scripts/t3-autoupdate.sh @@ -0,0 +1,49 @@ +#!/usr/bin/env bash +# Track the latest t3 nightly — with a health-check + auto-rollback (lesson from +# the Keel auto-update incidents: never blindly trust a new build) and idle-only +# restarts (never kill an in-flight coding session). Runs as root via the unit. +set -uo pipefail +LOG() { logger -t t3-autoupdate "$*"; echo "t3-autoupdate: $*"; } + +ver() { t3 --version 2>/dev/null | awk '{print $NF}' | sed 's/^v//'; } + +before=$(ver); LOG "current: ${before:-unknown}" +npm i -g t3@nightly >/dev/null 2>&1 || { LOG "npm install failed; staying on ${before:-current}"; exit 0; } +after=$(ver) + +if [[ -z "$after" || "$after" == "$before" ]]; then + LOG "already latest (${before:-?}); nothing to do"; exit 0 +fi +LOG "installed $after (was $before); health-checking…" + +# Health-check the NEW binary on a throwaway port/base-dir before trusting it. +SMOKE_PORT=3799; SMOKE_DIR=$(mktemp -d) +t3 serve --host 127.0.0.1 --port "$SMOKE_PORT" --base-dir "$SMOKE_DIR" >/dev/null 2>&1 & +smoke=$!; ok=0 +for _ in $(seq 1 15); do + [[ "$(curl -s -o /dev/null -w '%{http_code}' --max-time 5 "http://127.0.0.1:$SMOKE_PORT/" 2>/dev/null)" == "200" ]] && { ok=1; break; } + sleep 2 +done +kill "$smoke" 2>/dev/null; wait "$smoke" 2>/dev/null; rm -rf "$SMOKE_DIR" + +if [[ "$ok" != "1" ]]; then + LOG "HEALTH-CHECK FAILED for $after — rolling back to $before" + if [[ -n "$before" ]] && npm i -g "t3@$before" >/dev/null 2>&1; then + LOG "rolled back to $before" + else + LOG "ROLLBACK FAILED — manual fix needed (t3 may be broken)" + fi + exit 1 +fi +LOG "health OK; restarting idle instances" + +# Restart only IDLE per-user instances; defer any with an active agent child. +for unit in $(systemctl list-units --type=service --state=running --no-legend 't3-serve@*' | awk '{print $1}'); do + pid=$(systemctl show -p MainPID --value "$unit") + if [[ -n "$pid" && "$pid" != 0 ]] && pgrep -aP "$pid" 2>/dev/null | grep -qiE 'claude|codex|opencode'; then + LOG "deferring $unit (active agent) — updates next cycle when idle" + else + systemctl restart "$unit" && LOG "restarted $unit -> $after" + fi +done +LOG "update complete: $after" diff --git a/scripts/t3-autoupdate.timer b/scripts/t3-autoupdate.timer new file mode 100644 index 00000000..a59135f7 --- /dev/null +++ b/scripts/t3-autoupdate.timer @@ -0,0 +1,10 @@ +[Unit] +Description=Daily t3 nightly auto-update + +[Timer] +OnCalendar=*-*-* 04:00:00 +RandomizedDelaySec=1h +Persistent=true + +[Install] +WantedBy=timers.target