claude-agent-service/tests/test_concurrency.py
Viktor Barzin 66104a32ab
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
parallel execution: replace single-flight lock with bounded semaphore + per-job workspace
Multiple agent calls now run concurrently, each in its own isolated git
checkout (local clone of the warm base, hardlinked objects, git-crypt
re-unlocked), so concurrent jobs never share a working tree.

- execution_lock (asyncio.Lock) -> execution_semaphore (default MAX_CONCURRENCY=10);
  excess calls queue FIFO instead of 409/503. MAX_QUEUE_DEPTH safety valve.
- /execute never returns 409; jobs go queued -> running. Timeout covers
  execution only, not queue wait.
- /v1/chat/completions queues for a slot instead of 503-busy.
- /health: busy = at-capacity, plus active/queued/capacity fields.
- per-job workspace prepare/cleanup under a short git lock; the agent run holds none.
- in-memory job registry evicted past JOB_TTL_SECONDS.

Design: docs/2026-06-02-parallel-execution-design.md

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-02 20:57:41 +00:00

223 lines
9.2 KiB
Python

"""Tests for parallel, independent execution.
These exercise the post-lock behavior: multiple agent calls run concurrently,
each in its own workspace, with a bounded semaphore + FIFO queue instead of a
single-flight lock.
"""
import asyncio
import pytest
from unittest.mock import AsyncMock, patch
from httpx import ASGITransport, AsyncClient
from app import main as app_main
from app.main import app
@pytest.fixture
def auth_header():
return {"Authorization": "Bearer test-token"}
class _BlockingStdout:
"""async-iterable stdout that blocks on first read until `release` is set,
then ends with no output — mimics a long-running `claude -p`."""
def __init__(self, release: asyncio.Event):
self._release = release
self._done = False
def __aiter__(self):
return self
async def __anext__(self):
if self._done:
raise StopAsyncIteration
await self._release.wait()
self._done = True
raise StopAsyncIteration
class ConcurrencyProbe:
"""Tracks how many mock subprocesses have started, and gates their exit."""
def __init__(self):
self.started = 0
self.release = asyncio.Event()
def factory(self):
async def make(*args, **kwargs):
self.started += 1
mock = AsyncMock()
mock.stdout = _BlockingStdout(self.release)
mock.stderr = AsyncMock()
mock.stderr.read = AsyncMock(return_value=b"")
mock.wait = AsyncMock(return_value=0)
mock.returncode = 0
return mock
return make
async def wait_started(self, n: int, timeout: float = 2.0):
deadline = asyncio.get_event_loop().time() + timeout
while self.started < n:
if asyncio.get_event_loop().time() > deadline:
break
await asyncio.sleep(0.01)
def _patch_workspace():
"""Patch the per-job workspace seams so no real git runs."""
return (
patch("app.main.prepare_workspace", new=AsyncMock(return_value="/tmp/ws")),
patch("app.main.cleanup_workspace", new=AsyncMock()),
)
@pytest.mark.asyncio
async def test_execute_does_not_return_409_when_a_job_is_running(auth_header, drain):
"""A second /execute must NOT be rejected with 409 while one is in flight."""
probe = ConcurrencyProbe()
pw, cw = _patch_workspace()
with pw, cw, patch("app.main.asyncio.create_subprocess_exec", side_effect=probe.factory()):
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as client:
r1 = await client.post("/execute", json={"prompt": "a", "agent": "x"}, headers=auth_header)
await probe.wait_started(1)
r2 = await client.post("/execute", json={"prompt": "b", "agent": "y"}, headers=auth_header)
probe.release.set()
await drain()
assert r1.status_code == 202
assert r2.status_code == 202
@pytest.mark.asyncio
async def test_two_execute_jobs_run_concurrently(auth_header, drain):
"""Two /execute jobs run their subprocesses at the same time (not serialized)."""
probe = ConcurrencyProbe()
pw, cw = _patch_workspace()
with pw, cw, patch("app.main.asyncio.create_subprocess_exec", side_effect=probe.factory()):
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as client:
await client.post("/execute", json={"prompt": "a", "agent": "x"}, headers=auth_header)
await client.post("/execute", json={"prompt": "b", "agent": "y"}, headers=auth_header)
await probe.wait_started(2)
both_running = probe.started >= 2
probe.release.set()
await drain()
assert both_running, "both jobs should have started before either finished"
@pytest.mark.asyncio
async def test_safety_queue_blocks_beyond_capacity(auth_header, drain):
"""With capacity=1, the 2nd job is accepted but stays queued until a slot frees."""
app_main.execution_semaphore = asyncio.Semaphore(1)
probe = ConcurrencyProbe()
pw, cw = _patch_workspace()
with pw, cw, patch("app.main.asyncio.create_subprocess_exec", side_effect=probe.factory()):
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as client:
r1 = await client.post("/execute", json={"prompt": "a", "agent": "x"}, headers=auth_header)
await probe.wait_started(1)
r2 = await client.post("/execute", json={"prompt": "b", "agent": "y"}, headers=auth_header)
# Give the 2nd task a chance to (not) start — capacity is 1.
await asyncio.sleep(0.05)
only_one_started = probe.started == 1
job2 = (await client.get(f"/jobs/{r2.json()['job_id']}", headers=auth_header)).json()
probe.release.set()
await drain()
assert r1.status_code == 202
assert r2.status_code == 202
assert only_one_started, "2nd job must wait while capacity is full"
assert job2["status"] == "queued"
@pytest.mark.asyncio
async def test_two_chat_completions_run_concurrently(auth_header):
"""Concurrent /v1/chat/completions both run — no 503 busy."""
probe = ConcurrencyProbe()
pw, cw = _patch_workspace()
with pw, cw, patch("app.main.asyncio.create_subprocess_exec", side_effect=probe.factory()):
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as client:
payload = {"model": "haiku", "messages": [{"role": "user", "content": "hi"}]}
t1 = asyncio.create_task(client.post("/v1/chat/completions", json=payload, headers=auth_header))
t2 = asyncio.create_task(client.post("/v1/chat/completions", json=payload, headers=auth_header))
await probe.wait_started(2)
both_running = probe.started >= 2
probe.release.set()
r1, r2 = await asyncio.gather(t1, t2)
assert both_running, "both chat calls should run concurrently"
assert r1.status_code == 200
assert r2.status_code == 200
@pytest.mark.asyncio
async def test_health_reports_capacity_fields():
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as client:
body = (await client.get("/health")).json()
assert body["status"] == "ok"
assert body["busy"] is False
assert body["active"] == 0
assert body["queued"] == 0
assert body["capacity"] == app_main.MAX_CONCURRENCY
@pytest.mark.asyncio
async def test_each_job_gets_distinct_workspace(auth_header, drain):
"""prepare_workspace is called per job with the job id, yielding distinct cwds."""
seen_job_ids = []
async def fake_prepare(job_id):
seen_job_ids.append(job_id)
return f"/tmp/ws/{job_id}"
probe = ConcurrencyProbe()
with patch("app.main.prepare_workspace", side_effect=fake_prepare), \
patch("app.main.cleanup_workspace", new=AsyncMock()), \
patch("app.main.asyncio.create_subprocess_exec", side_effect=probe.factory()):
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as client:
await client.post("/execute", json={"prompt": "a", "agent": "x"}, headers=auth_header)
await client.post("/execute", json={"prompt": "b", "agent": "y"}, headers=auth_header)
await probe.wait_started(2)
probe.release.set()
await drain()
assert len(set(seen_job_ids)) == 2, "each job should prepare its own workspace"
@pytest.mark.asyncio
async def test_queue_depth_rejection(auth_header, drain):
"""Beyond MAX_QUEUE_DEPTH, /execute is rejected with 429."""
app_main.execution_semaphore = asyncio.Semaphore(1)
app_main.MAX_QUEUE_DEPTH = 2
probe = ConcurrencyProbe()
pw, cw = _patch_workspace()
with pw, cw, patch("app.main.asyncio.create_subprocess_exec", side_effect=probe.factory()):
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as client:
r1 = await client.post("/execute", json={"prompt": "a", "agent": "x"}, headers=auth_header)
await probe.wait_started(1)
r2 = await client.post("/execute", json={"prompt": "b", "agent": "y"}, headers=auth_header)
r3 = await client.post("/execute", json={"prompt": "c", "agent": "z"}, headers=auth_header)
probe.release.set()
await drain()
assert r1.status_code == 202 # active
assert r2.status_code == 202 # queued
assert r3.status_code == 429 # over depth
def test_evict_old_jobs_drops_finished_past_ttl():
"""Completed jobs older than JOB_TTL are evicted; running/queued are kept."""
import time
app_main.jobs.clear()
now = time.time()
app_main.jobs["old"] = {"status": "completed", "finished_epoch": now - 99999}
app_main.jobs["fresh"] = {"status": "completed", "finished_epoch": now}
app_main.jobs["running"] = {"status": "running"}
app_main.jobs["queued"] = {"status": "queued"}
app_main._evict_old_jobs()
assert "old" not in app_main.jobs
assert "fresh" in app_main.jobs
assert "running" in app_main.jobs
assert "queued" in app_main.jobs