Add per-user Claude auth renewal

Each workstation user needs a continuously valid Claude token under their own Enterprise identity. Store only that user's OAuth state in an isolated Vault path, renew and verify it automatically, recover from Vault when possible, and alert when interactive SSO is required.
This commit is contained in:
Viktor Barzin 2026-06-20 20:10:40 +00:00
parent 834c5e6a2a
commit 5549fc3672
11 changed files with 408 additions and 28 deletions

View file

@ -0,0 +1,153 @@
#!/usr/bin/env bash
# Keep one Workstation user's Claude subscription OAuth credentials recoverable.
# Claude owns access/refresh-token rotation in ~/.claude/.credentials.json. This
# helper validates auth with real inference, stores only the claudeAiOauth object
# in the user's isolated Vault path, and attempts one restore on failure.
set -euo pipefail
CAS_USER="${CLAUDE_AUTH_USER:-$(id -un)}"
CAS_HOME="${HOME:?HOME must be set}"
CAS_CREDENTIALS="${CLAUDE_CREDENTIALS_FILE:-$CAS_HOME/.claude/.credentials.json}"
CAS_CONFIG_DIR="${CLAUDE_AUTH_CONFIG_DIR:-$CAS_HOME/.config/claude-auth-sync}"
CAS_VAULT_TOKEN_FILE="${CLAUDE_AUTH_VAULT_TOKEN_FILE:-$CAS_CONFIG_DIR/vault-token}"
CAS_VAULT_PATH="${CLAUDE_AUTH_VAULT_PATH:-secret/workstation/claude-users/$CAS_USER}"
CAS_STATE_DIR="${CLAUDE_AUTH_STATE_DIR:-$CAS_HOME/.local/state/claude-auth-sync}"
CAS_LOG="$CAS_STATE_DIR/sync.log"
cas_log() {
mkdir -p "$CAS_STATE_DIR"
printf '%s %s\n' "$(date -Is)" "$*" >> "$CAS_LOG"
logger -t claude-auth-sync -- "user=$CAS_USER $*" 2>/dev/null || true
}
# Print the Claude OAuth object, or fail without exposing any token material.
cas_oauth_from_credentials() {
jq -ce '.claudeAiOauth
| select((.accessToken | type) == "string" and (.accessToken | length) > 0)
| select((.refreshToken | type) == "string" and (.refreshToken | length) > 0)
| select((.expiresAt | type) == "number")' "$1"
}
# Merge a recovered OAuth object while preserving unrelated credentials (MCP OAuth).
cas_merge_oauth() {
local credentials="$1" oauth="$2"
jq -ce --argjson oauth "$oauth" '.claudeAiOauth = $oauth' "$credentials"
}
cas_vault_identity_ok() {
local display_name="$1" policies_csv="$2"
[[ "$display_name" == "token-devvm-claude-auth-$CAS_USER" ]] || return 1
printf ',%s,' "$policies_csv" | grep -q ",workstation-claude-$CAS_USER,"
}
cas_prepare_vault() {
[[ -s "$CAS_VAULT_TOKEN_FILE" ]] || {
cas_log "FAIL missing scoped Vault token; admin must run workstation provisioning"
return 1
}
export VAULT_ADDR="${VAULT_ADDR:-https://vault.viktorbarzin.me}"
VAULT_TOKEN="$(<"$CAS_VAULT_TOKEN_FILE")"; export VAULT_TOKEN
local info display_name policies
info="$(vault token lookup -format=json 2>/dev/null)" || {
cas_log "FAIL scoped Vault token lookup failed"
return 1
}
display_name="$(jq -r '.data.display_name // ""' <<<"$info")"
policies="$(jq -r '((.data.policies // []) + (.data.identity_policies // [])) | join(",")' <<<"$info")"
cas_vault_identity_ok "$display_name" "$policies" || {
cas_log "FAIL scoped Vault token drift detected; refusing foreign token"
return 1
}
vault token renew -format=json >/dev/null 2>&1 || {
cas_log "FAIL scoped Vault token renewal failed"
return 1
}
}
# auth status is not authoritative: it reported loggedIn=true during a real 401
# on 2026-06-20. A tiny, non-persistent inference is the feedback loop.
cas_live_auth_ok() {
local out
out="$(timeout 60 claude -p 'Reply with exactly AUTH_OK and nothing else.' \
--model haiku --max-turns 1 --no-session-persistence --tools "" \
--disable-slash-commands --setting-sources "" 2>/dev/null)" || return 1
[[ "$out" == "AUTH_OK" ]]
}
cas_backup() {
local oauth expires
oauth="$(cas_oauth_from_credentials "$CAS_CREDENTIALS")" || {
cas_log "FAIL local Claude OAuth credential is absent or malformed"
return 1
}
expires="$(jq -r '.expiresAt' <<<"$oauth")"
vault kv put "$CAS_VAULT_PATH" \
claude_ai_oauth_json="$oauth" \
credential_expires_at_ms="$expires" \
backed_up_at="$(date -Is)" >/dev/null || {
cas_log "FAIL Vault credential backup failed"
return 1
}
cas_log "OK Claude auth valid; refreshed OAuth state backed up to Vault"
}
cas_restore() {
local oauth base tmp
oauth="$(vault kv get -field=claude_ai_oauth_json "$CAS_VAULT_PATH" 2>/dev/null)" || {
cas_log "FAIL no recoverable Claude OAuth credential in Vault"
return 1
}
jq -e 'select((.accessToken | type) == "string" and (.accessToken | length) > 0)
| select((.refreshToken | type) == "string" and (.refreshToken | length) > 0)
| select((.expiresAt | type) == "number")' <<<"$oauth" >/dev/null || {
cas_log "FAIL Vault Claude OAuth credential is malformed"
return 1
}
mkdir -p "$(dirname "$CAS_CREDENTIALS")"
if jq -e 'type == "object"' "$CAS_CREDENTIALS" >/dev/null 2>&1; then
base="$CAS_CREDENTIALS"
else
base="$(mktemp)"; printf '{}\n' > "$base"
fi
tmp="$(mktemp "${CAS_CREDENTIALS}.XXXXXX")"
if ! cas_merge_oauth "$base" "$oauth" > "$tmp"; then
rm -f "$tmp"; [[ "$base" == "$CAS_CREDENTIALS" ]] || rm -f "$base"
cas_log "FAIL could not merge Vault Claude OAuth credential"
return 1
fi
chmod 0600 "$tmp"
mv "$tmp" "$CAS_CREDENTIALS"
[[ "$base" == "$CAS_CREDENTIALS" ]] || rm -f "$base"
cas_log "RECOVERED restored Claude OAuth state from Vault"
}
cas_main() {
umask 077
for bin in jq vault claude timeout flock; do
command -v "$bin" >/dev/null || { cas_log "FAIL missing dependency: $bin"; return 1; }
done
mkdir -p "$CAS_STATE_DIR"
exec 9>"$CAS_STATE_DIR/lock"
flock -n 9 || { cas_log "SKIP another sync is already running"; return 0; }
cas_prepare_vault || return 1
if cas_live_auth_ok; then
cas_backup
return
fi
cas_log "WARN live Claude auth failed; attempting one Vault restore"
cas_restore || return 1
if cas_live_auth_ok; then
cas_backup
return
fi
cas_log "FAIL Claude auth still invalid after Vault restore; interactive SSO login required"
return 1
}
if [[ "${BASH_SOURCE[0]}" == "$0" ]]; then
cas_main "$@"
fi

View file

@ -125,14 +125,10 @@ if command -v vault >/dev/null; then
if [[ -z "${VAULT_TOKEN:-}" && -r /home/wizard/.vault-token ]]; then
VAULT_TOKEN="$(cat /home/wizard/.vault-token)"; export VAULT_TOKEN
fi
# 8a) Shared Claude subscription OAuth token (long-lived sk-ant-oat01) -> root file the
# provisioner injects into non-admins' t3-serve env (only those without their own login).
if claude_tok="$(vault kv get -field=claude_oauth_token secret/workstation 2>/dev/null)"; then
install -m 0600 /dev/stdin /etc/t3-serve/claude-oauth-token <<<"$claude_tok"
log "staged /etc/t3-serve/claude-oauth-token (shared Claude subscription)"
else
log "WARN: secret/workstation claude_oauth_token absent -> non-admins won't share Claude auth"
fi
# 8a) Claude auth is deliberately NOT shared. Each roster user signs in with their own
# Enterprise identity; claude-auth-sync backs up only their OAuth object to an
# isolated Vault path. The provisioner mints its scoped Vault token when this admin
# VAULT_TOKEN is present.
# 8b) Shared Codex auth -> /opt/codex-shared/auth.json (the codex wrapper symlinks each
# user's ~/.codex/auth.json here). Previously a manual host change that did NOT survive
# a rebuild even though the Vault key existed — now reproducible from Vault.
@ -166,6 +162,7 @@ SCRIPTS="$HERE/.."
install -m 0755 "$SCRIPTS/t3-autoupdate.sh" /usr/local/bin/t3-autoupdate
install -m 0755 "$SCRIPTS/t3-backup-state.sh" /usr/local/bin/t3-backup-state
install -m 0755 "$SCRIPTS/t3-mint" /usr/local/bin/t3-mint
install -m 0755 "$HERE/claude-auth-sync.sh" /usr/local/bin/claude-auth-sync
# 9b) t3-dispatch: unprivileged system account + compiled Go binary (build-if-absent)
id -u t3-dispatch >/dev/null 2>&1 || useradd --system --no-create-home --shell /usr/sbin/nologin t3-dispatch
if [[ ! -x /usr/local/bin/t3-dispatch ]]; then
@ -197,12 +194,14 @@ fi
# 9d) unit files + enablement. Timers self-heal; t3-dispatch is long-running.
# t3-serve@ is a TEMPLATE (enabled per-user by the provisioner, not here).
for u in t3-serve@.service \
claude-auth-sync@.service claude-auth-sync@.timer \
t3-autoupdate.service t3-autoupdate.timer \
t3-backup-state.service t3-backup-state.timer \
t3-provision-users.service t3-provision-users.timer \
t3-dispatch.service; do
install -m 0644 "$SCRIPTS/$u" "/etc/systemd/system/$u"
done
log "claude auth: per-user sync script + template units installed"
# 9e) per-user playwright-mcp browser MCP: system-level TEMPLATE units (one
# instance per OS user) + the snapshot-refresh script. Reproducible-from-git
# replacement for the hand-made ~/.config/systemd/user/playwright-* units
@ -219,4 +218,11 @@ systemctl enable --now t3-dispatch.service \
log "WARN: some units failed to enable (check: systemctl status t3-dispatch t3-*.timer)"
log "service units installed + enabled (t3-dispatch + 3 timers; t3-serve@ per-user)"
# Run one foreground reconcile while the admin Vault token borrowed in section 8
# is still available. This is what mints new roster users' isolated periodic
# Vault tokens; the hourly no-admin-token reconcile only maintains existing ones.
if [[ -n "${VAULT_TOKEN:-}" ]]; then
/usr/local/bin/t3-provision-users || log "WARN: foreground provisioner failed; scoped Claude-auth tokens may need a retry"
fi
log "OK (idempotent)"