From 99180bec42dbbfcb7f5a37b4445995607abf8b6b Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Sat, 18 Apr 2026 10:41:09 +0000 Subject: [PATCH] =?UTF-8?q?[n8n]=20Fix=20broken=20DIUN=20auto-upgrade=20pi?= =?UTF-8?q?peline=20=E2=80=94=20missing=20auth=20token=20to=20claude-agent?= =?UTF-8?q?-service?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Context DIUN has been detecting image updates and firing Slack + webhook notifications for weeks, but zero automated upgrades ran because the handoff from n8n to claude-agent-service was silently 401-ing. The pipeline (DIUN → n8n webhook → claude-agent-service /execute → service-upgrade agent) was migrated from DevVM SSH to K8s HTTP in 42f1c3cf. The migration wired `claude-agent-service` (API_BEARER_TOKEN env set), updated the n8n workflow JSON to POST with `Authorization: Bearer $env.CLAUDE_AGENT_API_TOKEN`, but missed two things on the n8n side: 1. The deployment didn't expose `CLAUDE_AGENT_API_TOKEN` to the n8n container — workflow sent `Authorization: Bearer ` (empty). 2. The workflow header expression used JS concat (`='Bearer ' + $env.X`) which n8n 1.x does NOT evaluate in HTTP Request node header params. It needs template-literal form: `=Bearer {{ $env.X }}`. Evidence: `claude-agent-service` logs showed only `/health` probes — zero `/execute` calls over 12h despite DIUN firing webhooks. n8n PG execution 2250 returned `401 Missing bearer token`. ## This change - Adds ExternalSecret `claude-agent-token` in the `n8n` namespace that pulls `api_bearer_token` from Vault `secret/claude-agent-service` (same source as the receiving service's token). - Wires the token into the n8n container as env var `CLAUDE_AGENT_API_TOKEN` via `secret_key_ref`. - Sets `N8N_BLOCK_ENV_ACCESS_IN_NODE=false` so expressions CAN read `$env.*` at all (default in 1.x is false already, but setting explicitly guards against upstream default flips). - Fixes the workflow JSON backup (`workflows/diun-upgrade.json`) header expression to use `{{ $env.X }}` template syntax. The live workflow in n8n's PG DB was also patched in place (one-time `UPDATE workflow_entity SET nodes = REPLACE(...)` — workflows are not TF-managed; they were imported once). ## What is NOT in this change - No retroactive re-run of skipped DIUN events. They'll be rediscovered in future scans. - No change to the `claude-agent-service` side — its token and endpoint were already correct. - No Slack alert on n8n HTTP-node failures — future work; right now a broken workflow fails silently unless you check Execution History. ## End-to-end verification ``` $ curl -X POST n8n.viktorbarzin.me/webhook/30805ab6-... \ -d '{"diun_entry_status":"update","diun_entry_image":"docker.io/library/httpd","diun_entry_imagetag":"2.4.66",...}' {"message":"Workflow was started"} HTTP 200 # n8n PG: execution_entity latest row → status=success # claude-agent-service logs → "POST /execute HTTP/1.1" 202 Accepted ``` ## Reproduce locally ``` 1. vault login -method=oidc 2. cd stacks/n8n && ../../scripts/tg apply 3. kubectl -n n8n exec deploy/n8n -- printenv CLAUDE_AGENT_API_TOKEN (should print 64-char hex) 4. Fire synthetic webhook with non-critical image (httpd / alpine) 5. Check n8n execution is success, claude-agent-service shows 202 ``` Closes: code-ekz Related: code-bck Co-Authored-By: Claude Opus 4.7 (1M context) --- stacks/n8n/main.tf | 42 ++++++++++++++++++++++++++ stacks/n8n/workflows/diun-upgrade.json | 2 +- 2 files changed, 43 insertions(+), 1 deletion(-) diff --git a/stacks/n8n/main.tf b/stacks/n8n/main.tf index 4e4f8de1..fb6b4309 100644 --- a/stacks/n8n/main.tf +++ b/stacks/n8n/main.tf @@ -47,6 +47,35 @@ resource "kubernetes_manifest" "external_secret" { depends_on = [kubernetes_namespace.n8n] } +resource "kubernetes_manifest" "external_secret_claude_agent" { + manifest = { + apiVersion = "external-secrets.io/v1beta1" + kind = "ExternalSecret" + metadata = { + name = "claude-agent-token" + namespace = "n8n" + } + spec = { + refreshInterval = "15m" + secretStoreRef = { + name = "vault-kv" + kind = "ClusterSecretStore" + } + target = { + name = "claude-agent-token" + } + data = [{ + secretKey = "api_bearer_token" + remoteRef = { + key = "claude-agent-service" + property = "api_bearer_token" + } + }] + } + } + depends_on = [kubernetes_namespace.n8n] +} + resource "kubernetes_persistent_volume_claim" "data_encrypted" { wait_until_bound = false metadata { @@ -207,6 +236,19 @@ resource "kubernetes_deployment" "n8n" { name = "WEBHOOK_URL" value = "https://n8n.viktorbarzin.me" } + env { + name = "CLAUDE_AGENT_API_TOKEN" + value_from { + secret_key_ref { + name = "claude-agent-token" + key = "api_bearer_token" + } + } + } + env { + name = "N8N_BLOCK_ENV_ACCESS_IN_NODE" + value = "false" + } volume_mount { name = "data" mount_path = "/home/node/.n8n" diff --git a/stacks/n8n/workflows/diun-upgrade.json b/stacks/n8n/workflows/diun-upgrade.json index fcb10994..3a3e852d 100644 --- a/stacks/n8n/workflows/diun-upgrade.json +++ b/stacks/n8n/workflows/diun-upgrade.json @@ -43,7 +43,7 @@ "sendHeaders": true, "headerParameters": { "parameters": [ - {"name": "Authorization", "value": "='Bearer ' + $env.CLAUDE_AGENT_API_TOKEN"}, + {"name": "Authorization", "value": "=Bearer {{ $env.CLAUDE_AGENT_API_TOKEN }}"}, {"name": "Content-Type", "value": "application/json"} ] },