All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
Multiple agent calls now run concurrently, each in its own isolated git checkout (local clone of the warm base, hardlinked objects, git-crypt re-unlocked), so concurrent jobs never share a working tree. - execution_lock (asyncio.Lock) -> execution_semaphore (default MAX_CONCURRENCY=10); excess calls queue FIFO instead of 409/503. MAX_QUEUE_DEPTH safety valve. - /execute never returns 409; jobs go queued -> running. Timeout covers execution only, not queue wait. - /v1/chat/completions queues for a slot instead of 503-busy. - /health: busy = at-capacity, plus active/queued/capacity fields. - per-job workspace prepare/cleanup under a short git lock; the agent run holds none. - in-memory job registry evicted past JOB_TTL_SECONDS. Design: docs/2026-06-02-parallel-execution-design.md Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
223 lines
9.2 KiB
Python
223 lines
9.2 KiB
Python
"""Tests for parallel, independent execution.
|
|
|
|
These exercise the post-lock behavior: multiple agent calls run concurrently,
|
|
each in its own workspace, with a bounded semaphore + FIFO queue instead of a
|
|
single-flight lock.
|
|
"""
|
|
import asyncio
|
|
|
|
import pytest
|
|
from unittest.mock import AsyncMock, patch
|
|
from httpx import ASGITransport, AsyncClient
|
|
|
|
from app import main as app_main
|
|
from app.main import app
|
|
|
|
|
|
@pytest.fixture
|
|
def auth_header():
|
|
return {"Authorization": "Bearer test-token"}
|
|
|
|
|
|
class _BlockingStdout:
|
|
"""async-iterable stdout that blocks on first read until `release` is set,
|
|
then ends with no output — mimics a long-running `claude -p`."""
|
|
|
|
def __init__(self, release: asyncio.Event):
|
|
self._release = release
|
|
self._done = False
|
|
|
|
def __aiter__(self):
|
|
return self
|
|
|
|
async def __anext__(self):
|
|
if self._done:
|
|
raise StopAsyncIteration
|
|
await self._release.wait()
|
|
self._done = True
|
|
raise StopAsyncIteration
|
|
|
|
|
|
class ConcurrencyProbe:
|
|
"""Tracks how many mock subprocesses have started, and gates their exit."""
|
|
|
|
def __init__(self):
|
|
self.started = 0
|
|
self.release = asyncio.Event()
|
|
|
|
def factory(self):
|
|
async def make(*args, **kwargs):
|
|
self.started += 1
|
|
mock = AsyncMock()
|
|
mock.stdout = _BlockingStdout(self.release)
|
|
mock.stderr = AsyncMock()
|
|
mock.stderr.read = AsyncMock(return_value=b"")
|
|
mock.wait = AsyncMock(return_value=0)
|
|
mock.returncode = 0
|
|
return mock
|
|
return make
|
|
|
|
async def wait_started(self, n: int, timeout: float = 2.0):
|
|
deadline = asyncio.get_event_loop().time() + timeout
|
|
while self.started < n:
|
|
if asyncio.get_event_loop().time() > deadline:
|
|
break
|
|
await asyncio.sleep(0.01)
|
|
|
|
|
|
def _patch_workspace():
|
|
"""Patch the per-job workspace seams so no real git runs."""
|
|
return (
|
|
patch("app.main.prepare_workspace", new=AsyncMock(return_value="/tmp/ws")),
|
|
patch("app.main.cleanup_workspace", new=AsyncMock()),
|
|
)
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_execute_does_not_return_409_when_a_job_is_running(auth_header, drain):
|
|
"""A second /execute must NOT be rejected with 409 while one is in flight."""
|
|
probe = ConcurrencyProbe()
|
|
pw, cw = _patch_workspace()
|
|
with pw, cw, patch("app.main.asyncio.create_subprocess_exec", side_effect=probe.factory()):
|
|
transport = ASGITransport(app=app)
|
|
async with AsyncClient(transport=transport, base_url="http://test") as client:
|
|
r1 = await client.post("/execute", json={"prompt": "a", "agent": "x"}, headers=auth_header)
|
|
await probe.wait_started(1)
|
|
r2 = await client.post("/execute", json={"prompt": "b", "agent": "y"}, headers=auth_header)
|
|
probe.release.set()
|
|
await drain()
|
|
assert r1.status_code == 202
|
|
assert r2.status_code == 202
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_two_execute_jobs_run_concurrently(auth_header, drain):
|
|
"""Two /execute jobs run their subprocesses at the same time (not serialized)."""
|
|
probe = ConcurrencyProbe()
|
|
pw, cw = _patch_workspace()
|
|
with pw, cw, patch("app.main.asyncio.create_subprocess_exec", side_effect=probe.factory()):
|
|
transport = ASGITransport(app=app)
|
|
async with AsyncClient(transport=transport, base_url="http://test") as client:
|
|
await client.post("/execute", json={"prompt": "a", "agent": "x"}, headers=auth_header)
|
|
await client.post("/execute", json={"prompt": "b", "agent": "y"}, headers=auth_header)
|
|
await probe.wait_started(2)
|
|
both_running = probe.started >= 2
|
|
probe.release.set()
|
|
await drain()
|
|
assert both_running, "both jobs should have started before either finished"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_safety_queue_blocks_beyond_capacity(auth_header, drain):
|
|
"""With capacity=1, the 2nd job is accepted but stays queued until a slot frees."""
|
|
app_main.execution_semaphore = asyncio.Semaphore(1)
|
|
probe = ConcurrencyProbe()
|
|
pw, cw = _patch_workspace()
|
|
with pw, cw, patch("app.main.asyncio.create_subprocess_exec", side_effect=probe.factory()):
|
|
transport = ASGITransport(app=app)
|
|
async with AsyncClient(transport=transport, base_url="http://test") as client:
|
|
r1 = await client.post("/execute", json={"prompt": "a", "agent": "x"}, headers=auth_header)
|
|
await probe.wait_started(1)
|
|
r2 = await client.post("/execute", json={"prompt": "b", "agent": "y"}, headers=auth_header)
|
|
# Give the 2nd task a chance to (not) start — capacity is 1.
|
|
await asyncio.sleep(0.05)
|
|
only_one_started = probe.started == 1
|
|
job2 = (await client.get(f"/jobs/{r2.json()['job_id']}", headers=auth_header)).json()
|
|
probe.release.set()
|
|
await drain()
|
|
assert r1.status_code == 202
|
|
assert r2.status_code == 202
|
|
assert only_one_started, "2nd job must wait while capacity is full"
|
|
assert job2["status"] == "queued"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_two_chat_completions_run_concurrently(auth_header):
|
|
"""Concurrent /v1/chat/completions both run — no 503 busy."""
|
|
probe = ConcurrencyProbe()
|
|
pw, cw = _patch_workspace()
|
|
with pw, cw, patch("app.main.asyncio.create_subprocess_exec", side_effect=probe.factory()):
|
|
transport = ASGITransport(app=app)
|
|
async with AsyncClient(transport=transport, base_url="http://test") as client:
|
|
payload = {"model": "haiku", "messages": [{"role": "user", "content": "hi"}]}
|
|
t1 = asyncio.create_task(client.post("/v1/chat/completions", json=payload, headers=auth_header))
|
|
t2 = asyncio.create_task(client.post("/v1/chat/completions", json=payload, headers=auth_header))
|
|
await probe.wait_started(2)
|
|
both_running = probe.started >= 2
|
|
probe.release.set()
|
|
r1, r2 = await asyncio.gather(t1, t2)
|
|
assert both_running, "both chat calls should run concurrently"
|
|
assert r1.status_code == 200
|
|
assert r2.status_code == 200
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_health_reports_capacity_fields():
|
|
transport = ASGITransport(app=app)
|
|
async with AsyncClient(transport=transport, base_url="http://test") as client:
|
|
body = (await client.get("/health")).json()
|
|
assert body["status"] == "ok"
|
|
assert body["busy"] is False
|
|
assert body["active"] == 0
|
|
assert body["queued"] == 0
|
|
assert body["capacity"] == app_main.MAX_CONCURRENCY
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_each_job_gets_distinct_workspace(auth_header, drain):
|
|
"""prepare_workspace is called per job with the job id, yielding distinct cwds."""
|
|
seen_job_ids = []
|
|
|
|
async def fake_prepare(job_id):
|
|
seen_job_ids.append(job_id)
|
|
return f"/tmp/ws/{job_id}"
|
|
|
|
probe = ConcurrencyProbe()
|
|
with patch("app.main.prepare_workspace", side_effect=fake_prepare), \
|
|
patch("app.main.cleanup_workspace", new=AsyncMock()), \
|
|
patch("app.main.asyncio.create_subprocess_exec", side_effect=probe.factory()):
|
|
transport = ASGITransport(app=app)
|
|
async with AsyncClient(transport=transport, base_url="http://test") as client:
|
|
await client.post("/execute", json={"prompt": "a", "agent": "x"}, headers=auth_header)
|
|
await client.post("/execute", json={"prompt": "b", "agent": "y"}, headers=auth_header)
|
|
await probe.wait_started(2)
|
|
probe.release.set()
|
|
await drain()
|
|
assert len(set(seen_job_ids)) == 2, "each job should prepare its own workspace"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_queue_depth_rejection(auth_header, drain):
|
|
"""Beyond MAX_QUEUE_DEPTH, /execute is rejected with 429."""
|
|
app_main.execution_semaphore = asyncio.Semaphore(1)
|
|
app_main.MAX_QUEUE_DEPTH = 2
|
|
probe = ConcurrencyProbe()
|
|
pw, cw = _patch_workspace()
|
|
with pw, cw, patch("app.main.asyncio.create_subprocess_exec", side_effect=probe.factory()):
|
|
transport = ASGITransport(app=app)
|
|
async with AsyncClient(transport=transport, base_url="http://test") as client:
|
|
r1 = await client.post("/execute", json={"prompt": "a", "agent": "x"}, headers=auth_header)
|
|
await probe.wait_started(1)
|
|
r2 = await client.post("/execute", json={"prompt": "b", "agent": "y"}, headers=auth_header)
|
|
r3 = await client.post("/execute", json={"prompt": "c", "agent": "z"}, headers=auth_header)
|
|
probe.release.set()
|
|
await drain()
|
|
assert r1.status_code == 202 # active
|
|
assert r2.status_code == 202 # queued
|
|
assert r3.status_code == 429 # over depth
|
|
|
|
|
|
def test_evict_old_jobs_drops_finished_past_ttl():
|
|
"""Completed jobs older than JOB_TTL are evicted; running/queued are kept."""
|
|
import time
|
|
app_main.jobs.clear()
|
|
now = time.time()
|
|
app_main.jobs["old"] = {"status": "completed", "finished_epoch": now - 99999}
|
|
app_main.jobs["fresh"] = {"status": "completed", "finished_epoch": now}
|
|
app_main.jobs["running"] = {"status": "running"}
|
|
app_main.jobs["queued"] = {"status": "queued"}
|
|
app_main._evict_old_jobs()
|
|
assert "old" not in app_main.jobs
|
|
assert "fresh" in app_main.jobs
|
|
assert "running" in app_main.jobs
|
|
assert "queued" in app_main.jobs
|