From f0948493b365777836ad7a5f98b1c9490fca24d4 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Tue, 2 Jun 2026 21:02:36 +0000 Subject: [PATCH] claude-agent-service: wire parallel execution (git-crypt mount, memory, MAX_CONCURRENCY) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The service now runs agent calls concurrently (bounded semaphore, per-job isolated clones) instead of single-flight. Infra side: - mount git-crypt-key into the main container (each job re-unlocks its own clone) - MAX_CONCURRENCY=10 env (excess calls queue FIFO) - bump pod memory 2Gi req / 12Gi limit, cpu req 1 (Burstable, tier-aux) — sized for ~10 concurrent claude+terraform runs; fits node2/3/5 headroom - docs: beads-auto-dispatch + automated-upgrades no longer describe single-slot Service code: viktor/claude-agent-service @ 66104a3. Co-Authored-By: Claude Opus 4.8 --- docs/architecture/automated-upgrades.md | 2 +- docs/runbooks/beads-auto-dispatch.md | 9 ++++--- stacks/claude-agent-service/main.tf | 33 ++++++++++++++++++++----- 3 files changed, 34 insertions(+), 10 deletions(-) diff --git a/docs/architecture/automated-upgrades.md b/docs/architecture/automated-upgrades.md index 8ff35609..5d8b1c9e 100644 --- a/docs/architecture/automated-upgrades.md +++ b/docs/architecture/automated-upgrades.md @@ -141,7 +141,7 @@ curl -s -X POST "$WEBHOOK" \ -d '{"diun_entry_status":"update","diun_entry_image":"","diun_entry_imagetag":"","diun_entry_provider":"kubernetes"}' ``` -n8n processes all webhooks in parallel (one `claude -p` per webhook). Before bulk runs, increase the rate limit in the n8n Code node (`MAX_UPGRADES_PER_WINDOW`) and reset the counter: +n8n processes all webhooks in parallel (one `claude -p` per webhook); `claude-agent-service` runs them concurrently via a bounded pool (`MAX_CONCURRENCY`, default 10, excess queued) — it no longer single-flight-locks. Before bulk runs, increase the rate limit in the n8n Code node (`MAX_UPGRADES_PER_WINDOW`) and reset the counter: ```sql -- Reset rate limiter diff --git a/docs/runbooks/beads-auto-dispatch.md b/docs/runbooks/beads-auto-dispatch.md index f8bade68..dd3fb3c7 100644 --- a/docs/runbooks/beads-auto-dispatch.md +++ b/docs/runbooks/beads-auto-dispatch.md @@ -137,9 +137,12 @@ bd assign agent # re-arm for next dispatcher tick - **Sentinel assignee `agent`** — free-form, no Beads schema change. Any bd client can set it (`bd assign agent`). -- **Sequential dispatch** — matches `claude-agent-service`'s single-slot - `asyncio.Lock`. With a 2-min poll cadence and ~5-min average run, - throughput is ~12 beads/hour. Parallelism is a separate plan. +- **One-bead-per-tick dispatch** — the dispatcher submits at most one bead + per 2-min tick, gating on `claude-agent-service`'s `/health` `busy` flag. + `busy` now means `active >= capacity` (bounded semaphore, default 10) — the + service no longer single-flight-locks via `asyncio.Lock`. So up to + ~`capacity` beads can run concurrently; the 2-min poll cadence (not + single-slot execution) now bounds ramp-up. - **Fixed agent (`beads-task-runner`)** — read-only rails, matches BeadBoard's manual Dispatch button. Broader-privilege agents stay manual. - **CronJob (not in-service polling, not n8n)** — matches existing infra diff --git a/stacks/claude-agent-service/main.tf b/stacks/claude-agent-service/main.tf index 2187f52f..d789c938 100644 --- a/stacks/claude-agent-service/main.tf +++ b/stacks/claude-agent-service/main.tf @@ -12,7 +12,7 @@ locals { namespace = "claude-agent" # Phase 3 cutover 2026-05-07 — see infra/docs/plans/2026-05-07-forgejo-registry-consolidation-plan.md. image = "forgejo.viktorbarzin.me/viktor/claude-agent-service" - image_tag = "191ed5dd" + image_tag = "latest" labels = { app = "claude-agent-service" } @@ -201,8 +201,11 @@ resource "kubernetes_cluster_role_binding" "claude_agent" { # For cases where the agent DOES need to persist state across pod restarts # (caches, ad-hoc outputs, anything that should survive a pod reschedule), # `module.persistent` below provides a 5Gi NFS-backed RWX volume mounted -# at /persistent. RWX so all 3 replicas can read/write the same dir; -# sequential job mutex in the service prevents concurrent writes. +# at /persistent for state that should survive a pod reschedule. Since the +# service now runs jobs concurrently (bounded semaphore, no single-flight +# lock), agents sharing /persistent must use per-job paths to avoid races — +# per-job *workspaces* are isolated (own clone under /workspace/jobs/), +# but /persistent is shared. module "persistent" { source = "../../modules/kubernetes/nfs_volume" name = "claude-agent-persistent" @@ -416,6 +419,14 @@ resource "kubernetes_deployment" "claude_agent" { value = "/workspace/infra" } + # Soft-unbounded concurrency: this caps simultaneous agent runs; + # excess calls queue FIFO rather than 409/503. Each run peaks ~0.5-1.5Gi + # (claude + terraform), so this and the memory limit are sized together. + env { + name = "MAX_CONCURRENCY" + value = "10" + } + liveness_probe { http_get { path = "/health" @@ -451,13 +462,23 @@ resource "kubernetes_deployment" "claude_agent" { mount_path = "/home/agent/.claude" } + # git-crypt key — each job re-unlocks its own clone, so the runtime + # container (not just the git-init init container) needs the key. + volume_mount { + name = "git-crypt-key" + mount_path = "/secrets/git-crypt" + } + + # Burstable (tier-aux). Sized for ~10 concurrent agent runs at + # ~0.5-1.5Gi each (see MAX_CONCURRENCY). No CPU limit per cluster + # policy (CFS throttling); request only. resources { requests = { - cpu = "500m" - memory = "1Gi" + cpu = "1" + memory = "2Gi" } limits = { - memory = "2Gi" + memory = "12Gi" } } }