claude-breakglass: in-cluster warm break-glass UI for the devvm

Stand up the infra for Viktor's break-glass: when the devvm is wedged (cluster
healthy), open breakglass.viktorbarzin.me, have Claude SSH in to diagnose/fix,
and power-cycle VM 102 via the Proxmox host if needed. App half landed in the
claude-agent-service repo.

New stack stacks/claude-breakglass/ — own namespace + SA, NO Vault role (ESO
syncs only its key, so the pod has zero direct Vault access). Hardened to
survive the pressure it exists to fix: priorityClassName tier-0-core, broad
node-pressure tolerations, anti-affinity off node1, imagePullPolicy Always.
auth="required" ingress so it rides the Authentik resilience proxy and stays
reachable via the basic-auth fallback during an auth-stack outage. Runs the
shared claude-agent-service image with the breakglass entrypoint.
files/breakglass-pve is the PVE forced-command (status|forensics|reset|stop|
start|cycle on VM 102, forensics-first).

Isolation: the shared claude-agent pod's terraform-state Vault policy is
explicitly DENIED secret/claude-breakglass/* (stacks/vault/main.tf) so a
prompt-injected agent on that pod can't read the root-on-devvm key.

traefik: add a checksum/auth-proxy-htpasswd annotation so the auth-proxy rolls
when the emergency basic-auth password rotates (it's a subPath mount that
doesn't auto-update) — regenerated this session so Viktor has a known
emergency credential, which the auth-stack-outage failure domain requires.

Docs: docs/runbooks/breakglass-ui.md (full incident + bootstrap procedure,
incl. the per-host from= NAT quirks) and a security.md note recording the two
new privileged footholds.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
Viktor Barzin 2026-06-12 21:40:17 +00:00
parent 02785987dd
commit 32cf75635f
7 changed files with 629 additions and 0 deletions

View file

@ -0,0 +1,115 @@
#!/bin/bash
# breakglass-pve — forced-command wrapper for the in-cluster claude-breakglass
# service. Installed in the Proxmox host's /root/.ssh/authorized_keys behind a
# command="/usr/local/bin/breakglass-pve",restrict,from="<cluster CIDRs>"
# entry, so the breakglass SSH key can ONLY run the verbs below against VM 102
# (the devvm) — never a free shell on the hypervisor.
#
# The requested verb arrives in $SSH_ORIGINAL_COMMAND. Anything that is not a
# single bare verb from the allowlist is rejected and logged. Every MUTATING
# verb captures forensics first, unconditionally, so an erroneous reset never
# destroys the evidence of why the devvm was wedged.
#
# Deployed via scp (see docs/runbooks/breakglass-ui.md); not Terraform-managed
# (PVE host config is out-of-band, like fan-control / pve-nfs-exports).
set -euo pipefail
VMID=102
LOG=/var/log/breakglass-pve.log
ts() { date -u +%Y-%m-%dT%H:%M:%SZ; }
log() { echo "$(ts) [breakglass-pve] $*" >>"$LOG" 2>/dev/null || true; }
verb="${SSH_ORIGINAL_COMMAND:-}"
src="${SSH_CLIENT%% *}"
# Only a single bare verb is accepted — no arguments, no shell metacharacters,
# no second VMID. This is the whole security boundary of the forced command.
case "$verb" in
status|forensics|reset|stop|start|cycle) : ;;
*)
log "REJECTED verb='$verb' from=$src"
echo "breakglass-pve: rejected '$verb'. allowed: status|forensics|reset|stop|start|cycle (VM $VMID only)" >&2
exit 2
;;
esac
forensics() {
echo "=== breakglass forensics $(ts) — VM $VMID on $(hostname) ==="
echo "--- qm status ---"; qm status "$VMID" 2>&1 || true
echo "--- qm config ---"; qm config "$VMID" 2>&1 || true
echo "--- qm pending (staged) ---"; qm pending "$VMID" 2>&1 || true
echo "--- guest agent ping ---"; timeout 5 qm agent "$VMID" ping 2>&1 || echo "(no guest-agent response)"
echo "--- qmp query-status ---"; echo "info status" | timeout 5 qm monitor "$VMID" 2>&1 || true
echo "--- qmp block jobs ---"; echo "info block-jobs" | timeout 5 qm monitor "$VMID" 2>&1 || true
echo "--- host uptime/load ---"; uptime 2>&1 || true
echo "--- host memory ---"; free -h 2>&1 || true
echo "--- host io (1s) ---"; ( command -v iostat >/dev/null && iostat -dx 1 2 2>/dev/null | tail -n +4 ) || echo "(iostat unavailable)"
echo "=== end forensics ==="
}
# Wait until VM reaches 'stopped', up to ~timeout seconds. Returns 0 if stopped.
wait_stopped() {
local timeout="$1" i
for ((i=0; i<timeout; i+=2)); do
qm status "$VMID" 2>/dev/null | grep -q 'status: stopped' && return 0
sleep 2
done
return 1
}
log "verb=$verb from=$src"
case "$verb" in
status)
qm status "$VMID"
;;
forensics)
forensics
;;
stop|reset|start|cycle)
# Forensics-first: emit to the caller AND persist on the host.
F="$(forensics)"
printf '%s\n' "$F"
printf '%s\n' "$F" | sed "s/^/$(ts) [forensics] /" >>"$LOG" 2>/dev/null || true
case "$verb" in
start)
qm start "$VMID"
;;
reset)
# Warm reset — reuses the QEMU process. Does NOT apply staged config.
qm reset "$VMID"
;;
stop)
qm stop "$VMID"
;;
cycle)
# Cold stop->start: spawns a FRESH QEMU process, so staged config
# (qm pending) is applied — the fix class for the 2026-06-11 I/O stall.
# If a wedged QEMU ignores a clean stop, escalate to killing the
# process (matches the 2026-06-11 manual recovery), then start.
echo "$(ts) cycle: requesting clean stop of VM $VMID"
qm stop "$VMID" >/dev/null 2>&1 || true
if wait_stopped 40; then
echo "$(ts) cycle: clean stop OK"
else
log "cycle: clean stop FAILED — killing wedged QEMU for $VMID"
echo "$(ts) cycle: clean stop failed, killing wedged QEMU"
pid="$(cat "/var/run/qemu-server/$VMID.pid" 2>/dev/null || true)"
if [[ -n "$pid" ]]; then
kill -9 "$pid" 2>/dev/null || true
else
pkill -9 -f -- "-id $VMID" 2>/dev/null || true
fi
sleep 3
qm unlock "$VMID" 2>/dev/null || true
fi
qm start "$VMID"
;;
esac
log "verb=$verb COMPLETE"
;;
esac

View file

@ -0,0 +1,361 @@
# claude-breakglass in-cluster emergency-recovery UI for the devvm.
#
# A SEPARATE deployment from claude-agent-service (own namespace, own
# ServiceAccount, NO Vault K8s-auth role) that runs ONLY the breakglass agent.
# It shares the claude-agent-service image but overrides the command with the
# breakglass entrypoint. The untrusted-input agents (recruiter-triage,
# nextcloud-todos) never share this process or these credentials.
# See claude-agent-service/docs/adr/0001-breakglass-security-architecture.md.
#
# Scope is the WARM case: devvm wedged while the cluster is healthy. The cold,
# cluster-down path is the break-glass SSH on PVE :52222 (docs/runbooks/breakglass-ssh.md)
# + the server-lifecycle iDRAC CLI out of scope here.
variable "tls_secret_name" {
type = string
sensitive = true
}
locals {
namespace = "claude-breakglass"
# Same image as claude-agent-service the breakglass code lives in that repo
# under app/breakglass/, and the deployment below overrides the command.
image = "forgejo.viktorbarzin.me/viktor/claude-agent-service"
image_tag = "latest"
labels = {
app = "claude-breakglass"
}
}
# --- Namespace ---
resource "kubernetes_namespace" "breakglass" {
metadata {
name = local.namespace
labels = {
tier = local.tiers.aux
}
}
lifecycle {
# KYVERNO_LIFECYCLE_V1: goldilocks/vpa-mode label stamping (harmless if absent)
ignore_changes = [metadata[0].labels["goldilocks.fairwinds.com/vpa-update-mode"]]
}
}
resource "kubernetes_service_account" "breakglass" {
metadata {
name = "claude-breakglass"
namespace = kubernetes_namespace.breakglass.metadata[0].name
}
}
# --- Secrets (synced by ESO; the pod itself has NO Vault access) ---
# SSH private key (devvm sudo + PVE forced-command). Mounted as a file the
# entrypoint loads into ssh-agent. Dedicated path secret/claude-breakglass/*
# the claude-agent namespace's terraform-state Vault policy is explicitly
# DENIED this path (see stacks/vault/main.tf) so the shared, prompt-injectable
# pod can never read it.
resource "kubernetes_manifest" "external_secret_ssh" {
manifest = {
apiVersion = "external-secrets.io/v1beta1"
kind = "ExternalSecret"
metadata = {
name = "breakglass-ssh"
namespace = local.namespace
}
spec = {
refreshInterval = "1h"
secretStoreRef = { name = "vault-kv", kind = "ClusterSecretStore" }
target = { name = "breakglass-ssh" }
data = [
{
secretKey = "private_key"
remoteRef = { key = "claude-breakglass/ssh_key", property = "private_key" }
},
]
}
}
depends_on = [kubernetes_namespace.breakglass]
}
# Env secrets: the Anthropic OAuth token (shared with claude-agent-service
# same account) and the app bearer token (in-cluster/CLI fallback caller auth).
resource "kubernetes_manifest" "external_secret_env" {
manifest = {
apiVersion = "external-secrets.io/v1beta1"
kind = "ExternalSecret"
metadata = {
name = "breakglass-env"
namespace = local.namespace
}
spec = {
refreshInterval = "1h"
secretStoreRef = { name = "vault-kv", kind = "ClusterSecretStore" }
target = { name = "breakglass-env" }
data = [
{
secretKey = "CLAUDE_CODE_OAUTH_TOKEN"
remoteRef = { key = "claude-agent-service", property = "claude_oauth_token" }
},
{
secretKey = "API_BEARER_TOKEN"
remoteRef = { key = "claude-breakglass", property = "api_bearer_token" }
},
]
}
}
depends_on = [kubernetes_namespace.breakglass]
}
# --- Deployment ---
resource "kubernetes_deployment" "breakglass" {
metadata {
name = "claude-breakglass"
namespace = kubernetes_namespace.breakglass.metadata[0].name
labels = local.labels
}
spec {
replicas = 1
strategy { type = "Recreate" }
selector { match_labels = local.labels }
template {
metadata { labels = local.labels }
spec {
service_account_name = kubernetes_service_account.breakglass.metadata[0].name
image_pull_secrets {
name = "registry-credentials"
}
# Survive the very pressure event the breakglass exists to fix: high
# priority (resist eviction), tolerate node pressure, and prefer NOT to
# land on the contended GPU node1. Pull policy is Always: nodes already
# cache the OLD claude-agent-service:latest (no breakglass entrypoint),
# so IfNotPresent would run stale code. A registry-down-on-restart is
# the cluster-down (cold) case, which this UI doesn't cover anyway.
priority_class_name = "tier-0-core"
toleration {
key = "node.kubernetes.io/memory-pressure"
operator = "Exists"
effect = "NoSchedule"
}
toleration {
key = "node.kubernetes.io/disk-pressure"
operator = "Exists"
effect = "NoSchedule"
}
toleration {
key = "node.kubernetes.io/not-ready"
operator = "Exists"
effect = "NoExecute"
toleration_seconds = 300
}
toleration {
key = "node.kubernetes.io/unreachable"
operator = "Exists"
effect = "NoExecute"
toleration_seconds = 300
}
affinity {
node_affinity {
preferred_during_scheduling_ignored_during_execution {
weight = 100
preference {
match_expressions {
key = "kubernetes.io/hostname"
operator = "NotIn"
values = ["k8s-node1"]
}
}
}
}
}
security_context {
run_as_user = 1000
run_as_group = 1000
fs_group = 1000
}
# Seed the breakglass agent into the fresh ~/.claude emptyDir and make
# the session dir writable by uid 1000.
init_container {
name = "seed-agent"
image = "${local.image}:${local.image_tag}"
command = ["sh", "-c", <<-EOT
set -e
mkdir -p /home/agent/.claude/agents /workspace/sessions
cp /usr/share/agent-seed/breakglass.md /home/agent/.claude/agents/breakglass.md
chown -R 1000:1000 /home/agent/.claude /workspace
EOT
]
image_pull_policy = "Always"
security_context {
run_as_user = 0
}
volume_mount {
name = "claude-home"
mount_path = "/home/agent/.claude"
}
volume_mount {
name = "sessions"
mount_path = "/workspace"
}
resources {
requests = { memory = "32Mi" }
limits = { memory = "64Mi" }
}
}
container {
name = "claude-breakglass"
image = "${local.image}:${local.image_tag}"
image_pull_policy = "Always"
# Override the image's default CMD (the claude-agent-service uvicorn)
# with the breakglass entrypoint: ssh-agent bootstrap + ssh aliases,
# then uvicorn app.breakglass.server:app.
command = ["/srv/docker-entrypoint-breakglass.sh"]
port { container_port = 8080 }
# OAuth token (claude -p) + app bearer token.
env_from {
secret_ref { name = "breakglass-env" }
}
env {
name = "BREAKGLASS_KEY_PATH"
value = "/secrets/breakglass/private_key"
}
env {
name = "BREAKGLASS_SESSIONS_DIR"
value = "/workspace/sessions"
}
env {
name = "HOME"
value = "/home/agent"
}
liveness_probe {
http_get {
path = "/health"
port = 8080
}
initial_delay_seconds = 10
period_seconds = 30
}
readiness_probe {
http_get {
path = "/health"
port = 8080
}
initial_delay_seconds = 5
period_seconds = 10
}
volume_mount {
name = "claude-home"
mount_path = "/home/agent/.claude"
}
volume_mount {
name = "sessions"
mount_path = "/workspace"
}
volume_mount {
name = "breakglass-ssh"
mount_path = "/secrets/breakglass"
read_only = true
}
resources {
requests = {
cpu = "200m"
memory = "512Mi"
}
limits = {
memory = "4Gi"
}
}
}
volume {
name = "claude-home"
empty_dir {}
}
volume {
name = "sessions"
empty_dir {}
}
volume {
name = "breakglass-ssh"
secret {
secret_name = "breakglass-ssh"
# 0440 + fsGroup 1000 readable by uid 1000; the entrypoint copies
# to a 0600 tmpfs file before ssh-add (which rejects group-readable).
default_mode = "0440"
}
}
}
}
}
lifecycle {
ignore_changes = [spec[0].template[0].spec[0].dns_config] # KYVERNO_LIFECYCLE_V1
}
depends_on = [
kubernetes_manifest.external_secret_ssh,
kubernetes_manifest.external_secret_env,
]
}
# --- Service ---
resource "kubernetes_service" "breakglass" {
metadata {
name = "claude-breakglass"
namespace = kubernetes_namespace.breakglass.metadata[0].name
labels = local.labels
}
spec {
selector = local.labels
port {
port = 8080
target_port = 8080
}
type = "ClusterIP"
}
}
# --- Ingress: breakglass.viktorbarzin.me ---
# auth = "required": Authentik forward-auth via the resilience proxy, which
# FALLS BACK to HTTP basic-auth when Authentik is down the whole point, so the
# breakglass is reachable during an auth-stack outage. CrowdSec + rate-limit are
# attached by default (not excluded). The app additionally accepts the injected
# X-authentik-username header (or a bearer) as its own gate.
module "ingress" {
source = "../../modules/kubernetes/ingress_factory"
name = "breakglass"
service_name = kubernetes_service.breakglass.metadata[0].name
port = 8080
namespace = kubernetes_namespace.breakglass.metadata[0].name
tls_secret_name = var.tls_secret_name
auth = "required"
dns_type = "proxied"
extra_annotations = {
"gethomepage.dev/enabled" = "true"
"gethomepage.dev/name" = "devvm breakglass"
"gethomepage.dev/description" = "Emergency recovery UI for the devvm"
"gethomepage.dev/icon" = "proxmox.png"
"gethomepage.dev/group" = "Infrastructure"
}
}

View file

@ -0,0 +1,20 @@
include "root" {
path = find_in_parent_folders()
}
# Platform (Traefik/ingress middlewares), Vault (ESO reads secrets), and
# external-secrets (the ClusterSecretStore) must exist first.
dependency "platform" {
config_path = "../platform"
skip_outputs = true
}
dependency "vault" {
config_path = "../vault"
skip_outputs = true
}
dependency "external-secrets" {
config_path = "../external-secrets"
skip_outputs = true
}

View file

@ -851,6 +851,10 @@ resource "kubernetes_deployment" "auth_proxy" {
# nginx only reads its config at startup roll the pods whenever
# the ConfigMap content changes.
"checksum/auth-proxy-config" = sha1(kubernetes_config_map.auth_proxy_config.data["default.conf"])
# The emergency-fallback htpasswd is a subPath secret mount, which
# does NOT auto-update on change roll the pods when it rotates so a
# regenerated emergency password actually takes effect.
"checksum/auth-proxy-htpasswd" = sha1(var.auth_fallback_htpasswd)
}
}
spec {

View file

@ -598,6 +598,19 @@ resource "vault_policy" "terraform_state" {
path "secret/metadata/vault" {
capabilities = ["deny"]
}
# Explicit deny on the breakglass SSH key (added with the claude-breakglass
# stack, 2026-06-12). That key grants root-on-devvm + PVE VM-102 power
# verbs; it must NOT be readable by the shared claude-agent pod, whose
# agents (recruiter-triage, nextcloud-todos-exec) ingest untrusted input
# with Bash. The breakglass pod runs in its own namespace with NO Vault
# role and gets the key via ESO only. See
# claude-agent-service/docs/adr/0001-breakglass-security-architecture.md.
path "secret/data/claude-breakglass/*" {
capabilities = ["deny"]
}
path "secret/metadata/claude-breakglass/*" {
capabilities = ["deny"]
}
EOT
}