## Context
The claude-agent-service K8s pod (deployed 2026-04-15) provides an HTTP API
for running Claude headless agents. Three workflows still SSH'd to the DevVM
(10.0.10.10) to invoke `claude -p`. This eliminates that dependency.
## This change
Pipeline migrations (SSH → HTTP POST to claude-agent-service):
- `.woodpecker/issue-automation.yml` — Vault auth fetches API token instead
of SSH key; curl POST /execute + poll /jobs/{id} replaces SSH invocation
- `scripts/postmortem-pipeline.sh` — same pattern; uses jq for safe JSON
construction of TODO payloads
- `.woodpecker/postmortem-todos.yml` — drop openssh-client from apk install
- `stacks/n8n/workflows/diun-upgrade.json` — SSH node replaced with HTTP
Request node; API token via $env.CLAUDE_AGENT_API_TOKEN (added to Vault
secret/n8n)
Documentation updates:
- `docs/architecture/incident-response.md` — Mermaid diagram: DevVM → K8s
- `docs/architecture/automated-upgrades.md` — pipeline diagram + n8n action
- `AGENTS.md` — pipeline description updated
## What is NOT in this change
- DevVM decommissioning (still hosts terminal/foolery services)
- Removal of SSH key secrets from Vault (kept for rollback)
- n8n workflow import (must be done manually in n8n UI)
[ci skip]
Co-Authored-By: Claude Opus 4 (1M context) <noreply@anthropic.com>
81 lines
2.7 KiB
Bash
Executable file
81 lines
2.7 KiB
Bash
Executable file
#!/bin/sh
|
|
# postmortem-pipeline.sh — Woodpecker pipeline step for post-mortem TODO automation
|
|
# Called from .woodpecker/postmortem-todos.yml
|
|
set -e
|
|
|
|
# 1. Find post-mortem(s) with TODO items
|
|
# Scan all post-mortems — don't rely on git diff (Woodpecker shallow clone breaks HEAD~1)
|
|
PM_FILE=""
|
|
for f in docs/post-mortems/*.md; do
|
|
if grep -q '| TODO |' "$f" 2>/dev/null; then
|
|
PM_FILE="$f"
|
|
break
|
|
fi
|
|
done
|
|
if [ -z "$PM_FILE" ]; then
|
|
echo "No post-mortem with pending TODOs found"
|
|
exit 0
|
|
fi
|
|
echo "Post-mortem with TODOs: $PM_FILE"
|
|
|
|
# 3. Parse TODOs
|
|
sh scripts/parse-postmortem-todos.sh "$PM_FILE" > /tmp/todos.json
|
|
cat /tmp/todos.json
|
|
TODO_COUNT=$(jq '.safe_todos' /tmp/todos.json)
|
|
echo "$TODO_COUNT safe TODO(s) found"
|
|
if [ "$TODO_COUNT" -eq 0 ]; then
|
|
echo "No auto-implementable TODOs — skipping"
|
|
exit 0
|
|
fi
|
|
|
|
# 4. Authenticate to Vault via K8s SA JWT
|
|
SA_TOKEN=$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)
|
|
VAULT_RESP=$(curl -sf -X POST http://vault-active.vault.svc.cluster.local:8200/v1/auth/kubernetes/login \
|
|
-d "{\"role\":\"ci\",\"jwt\":\"$SA_TOKEN\"}")
|
|
VAULT_TOKEN=$(echo "$VAULT_RESP" | jq -r .auth.client_token)
|
|
if [ -z "$VAULT_TOKEN" ] || [ "$VAULT_TOKEN" = "null" ]; then
|
|
echo "ERROR: Vault authentication failed"
|
|
exit 1
|
|
fi
|
|
echo "Vault authenticated"
|
|
|
|
# 5. Fetch API token for claude-agent-service
|
|
AGENT_TOKEN=$(curl -sf -H "X-Vault-Token: $VAULT_TOKEN" \
|
|
http://vault-active.vault.svc.cluster.local:8200/v1/secret/data/claude-agent-service | \
|
|
jq -r '.data.data.api_bearer_token')
|
|
if [ -z "$AGENT_TOKEN" ] || [ "$AGENT_TOKEN" = "null" ]; then
|
|
echo "ERROR: Failed to fetch agent API token"
|
|
exit 1
|
|
fi
|
|
echo "Agent token fetched"
|
|
|
|
# 6. Submit to claude-agent-service
|
|
TODOS=$(cat /tmp/todos.json)
|
|
PAYLOAD=$(jq -n \
|
|
--arg prompt "Implement the auto-implementable TODOs from $PM_FILE. Parsed TODO list: $TODOS" \
|
|
--arg agent ".claude/agents/postmortem-todo-resolver" \
|
|
'{prompt: $prompt, agent: $agent, max_budget_usd: 5, timeout_seconds: 900}')
|
|
|
|
RESP=$(curl -sf -X POST \
|
|
-H "Authorization: Bearer $AGENT_TOKEN" \
|
|
-H "Content-Type: application/json" \
|
|
-d "$PAYLOAD" \
|
|
http://claude-agent-service.claude-agent.svc.cluster.local:8080/execute)
|
|
JOB_ID=$(echo "$RESP" | jq -r '.job_id')
|
|
echo "Job submitted: $JOB_ID"
|
|
|
|
# 7. Poll for completion (15min max)
|
|
for i in $(seq 1 60); do
|
|
sleep 15
|
|
RESULT=$(curl -sf \
|
|
-H "Authorization: Bearer $AGENT_TOKEN" \
|
|
http://claude-agent-service.claude-agent.svc.cluster.local:8080/jobs/$JOB_ID)
|
|
STATUS=$(echo "$RESULT" | jq -r '.status')
|
|
echo "[$i/60] Status: $STATUS"
|
|
if [ "$STATUS" != "running" ]; then
|
|
echo "$RESULT" | jq .
|
|
if [ "$STATUS" = "completed" ]; then exit 0; else exit 1; fi
|
|
fi
|
|
done
|
|
echo "ERROR: Job timed out after 15 minutes"
|
|
exit 1
|