parallel execution: replace single-flight lock with bounded semaphore + per-job workspace
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
Multiple agent calls now run concurrently, each in its own isolated git checkout (local clone of the warm base, hardlinked objects, git-crypt re-unlocked), so concurrent jobs never share a working tree. - execution_lock (asyncio.Lock) -> execution_semaphore (default MAX_CONCURRENCY=10); excess calls queue FIFO instead of 409/503. MAX_QUEUE_DEPTH safety valve. - /execute never returns 409; jobs go queued -> running. Timeout covers execution only, not queue wait. - /v1/chat/completions queues for a slot instead of 503-busy. - /health: busy = at-capacity, plus active/queued/capacity fields. - per-job workspace prepare/cleanup under a short git lock; the agent run holds none. - in-memory job registry evicted past JOB_TTL_SECONDS. Design: docs/2026-06-02-parallel-execution-design.md Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
parent
add15325bb
commit
66104a32ab
6 changed files with 637 additions and 140 deletions
223
tests/test_concurrency.py
Normal file
223
tests/test_concurrency.py
Normal file
|
|
@ -0,0 +1,223 @@
|
|||
"""Tests for parallel, independent execution.
|
||||
|
||||
These exercise the post-lock behavior: multiple agent calls run concurrently,
|
||||
each in its own workspace, with a bounded semaphore + FIFO queue instead of a
|
||||
single-flight lock.
|
||||
"""
|
||||
import asyncio
|
||||
|
||||
import pytest
|
||||
from unittest.mock import AsyncMock, patch
|
||||
from httpx import ASGITransport, AsyncClient
|
||||
|
||||
from app import main as app_main
|
||||
from app.main import app
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def auth_header():
|
||||
return {"Authorization": "Bearer test-token"}
|
||||
|
||||
|
||||
class _BlockingStdout:
|
||||
"""async-iterable stdout that blocks on first read until `release` is set,
|
||||
then ends with no output — mimics a long-running `claude -p`."""
|
||||
|
||||
def __init__(self, release: asyncio.Event):
|
||||
self._release = release
|
||||
self._done = False
|
||||
|
||||
def __aiter__(self):
|
||||
return self
|
||||
|
||||
async def __anext__(self):
|
||||
if self._done:
|
||||
raise StopAsyncIteration
|
||||
await self._release.wait()
|
||||
self._done = True
|
||||
raise StopAsyncIteration
|
||||
|
||||
|
||||
class ConcurrencyProbe:
|
||||
"""Tracks how many mock subprocesses have started, and gates their exit."""
|
||||
|
||||
def __init__(self):
|
||||
self.started = 0
|
||||
self.release = asyncio.Event()
|
||||
|
||||
def factory(self):
|
||||
async def make(*args, **kwargs):
|
||||
self.started += 1
|
||||
mock = AsyncMock()
|
||||
mock.stdout = _BlockingStdout(self.release)
|
||||
mock.stderr = AsyncMock()
|
||||
mock.stderr.read = AsyncMock(return_value=b"")
|
||||
mock.wait = AsyncMock(return_value=0)
|
||||
mock.returncode = 0
|
||||
return mock
|
||||
return make
|
||||
|
||||
async def wait_started(self, n: int, timeout: float = 2.0):
|
||||
deadline = asyncio.get_event_loop().time() + timeout
|
||||
while self.started < n:
|
||||
if asyncio.get_event_loop().time() > deadline:
|
||||
break
|
||||
await asyncio.sleep(0.01)
|
||||
|
||||
|
||||
def _patch_workspace():
|
||||
"""Patch the per-job workspace seams so no real git runs."""
|
||||
return (
|
||||
patch("app.main.prepare_workspace", new=AsyncMock(return_value="/tmp/ws")),
|
||||
patch("app.main.cleanup_workspace", new=AsyncMock()),
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_execute_does_not_return_409_when_a_job_is_running(auth_header, drain):
|
||||
"""A second /execute must NOT be rejected with 409 while one is in flight."""
|
||||
probe = ConcurrencyProbe()
|
||||
pw, cw = _patch_workspace()
|
||||
with pw, cw, patch("app.main.asyncio.create_subprocess_exec", side_effect=probe.factory()):
|
||||
transport = ASGITransport(app=app)
|
||||
async with AsyncClient(transport=transport, base_url="http://test") as client:
|
||||
r1 = await client.post("/execute", json={"prompt": "a", "agent": "x"}, headers=auth_header)
|
||||
await probe.wait_started(1)
|
||||
r2 = await client.post("/execute", json={"prompt": "b", "agent": "y"}, headers=auth_header)
|
||||
probe.release.set()
|
||||
await drain()
|
||||
assert r1.status_code == 202
|
||||
assert r2.status_code == 202
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_two_execute_jobs_run_concurrently(auth_header, drain):
|
||||
"""Two /execute jobs run their subprocesses at the same time (not serialized)."""
|
||||
probe = ConcurrencyProbe()
|
||||
pw, cw = _patch_workspace()
|
||||
with pw, cw, patch("app.main.asyncio.create_subprocess_exec", side_effect=probe.factory()):
|
||||
transport = ASGITransport(app=app)
|
||||
async with AsyncClient(transport=transport, base_url="http://test") as client:
|
||||
await client.post("/execute", json={"prompt": "a", "agent": "x"}, headers=auth_header)
|
||||
await client.post("/execute", json={"prompt": "b", "agent": "y"}, headers=auth_header)
|
||||
await probe.wait_started(2)
|
||||
both_running = probe.started >= 2
|
||||
probe.release.set()
|
||||
await drain()
|
||||
assert both_running, "both jobs should have started before either finished"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_safety_queue_blocks_beyond_capacity(auth_header, drain):
|
||||
"""With capacity=1, the 2nd job is accepted but stays queued until a slot frees."""
|
||||
app_main.execution_semaphore = asyncio.Semaphore(1)
|
||||
probe = ConcurrencyProbe()
|
||||
pw, cw = _patch_workspace()
|
||||
with pw, cw, patch("app.main.asyncio.create_subprocess_exec", side_effect=probe.factory()):
|
||||
transport = ASGITransport(app=app)
|
||||
async with AsyncClient(transport=transport, base_url="http://test") as client:
|
||||
r1 = await client.post("/execute", json={"prompt": "a", "agent": "x"}, headers=auth_header)
|
||||
await probe.wait_started(1)
|
||||
r2 = await client.post("/execute", json={"prompt": "b", "agent": "y"}, headers=auth_header)
|
||||
# Give the 2nd task a chance to (not) start — capacity is 1.
|
||||
await asyncio.sleep(0.05)
|
||||
only_one_started = probe.started == 1
|
||||
job2 = (await client.get(f"/jobs/{r2.json()['job_id']}", headers=auth_header)).json()
|
||||
probe.release.set()
|
||||
await drain()
|
||||
assert r1.status_code == 202
|
||||
assert r2.status_code == 202
|
||||
assert only_one_started, "2nd job must wait while capacity is full"
|
||||
assert job2["status"] == "queued"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_two_chat_completions_run_concurrently(auth_header):
|
||||
"""Concurrent /v1/chat/completions both run — no 503 busy."""
|
||||
probe = ConcurrencyProbe()
|
||||
pw, cw = _patch_workspace()
|
||||
with pw, cw, patch("app.main.asyncio.create_subprocess_exec", side_effect=probe.factory()):
|
||||
transport = ASGITransport(app=app)
|
||||
async with AsyncClient(transport=transport, base_url="http://test") as client:
|
||||
payload = {"model": "haiku", "messages": [{"role": "user", "content": "hi"}]}
|
||||
t1 = asyncio.create_task(client.post("/v1/chat/completions", json=payload, headers=auth_header))
|
||||
t2 = asyncio.create_task(client.post("/v1/chat/completions", json=payload, headers=auth_header))
|
||||
await probe.wait_started(2)
|
||||
both_running = probe.started >= 2
|
||||
probe.release.set()
|
||||
r1, r2 = await asyncio.gather(t1, t2)
|
||||
assert both_running, "both chat calls should run concurrently"
|
||||
assert r1.status_code == 200
|
||||
assert r2.status_code == 200
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_health_reports_capacity_fields():
|
||||
transport = ASGITransport(app=app)
|
||||
async with AsyncClient(transport=transport, base_url="http://test") as client:
|
||||
body = (await client.get("/health")).json()
|
||||
assert body["status"] == "ok"
|
||||
assert body["busy"] is False
|
||||
assert body["active"] == 0
|
||||
assert body["queued"] == 0
|
||||
assert body["capacity"] == app_main.MAX_CONCURRENCY
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_each_job_gets_distinct_workspace(auth_header, drain):
|
||||
"""prepare_workspace is called per job with the job id, yielding distinct cwds."""
|
||||
seen_job_ids = []
|
||||
|
||||
async def fake_prepare(job_id):
|
||||
seen_job_ids.append(job_id)
|
||||
return f"/tmp/ws/{job_id}"
|
||||
|
||||
probe = ConcurrencyProbe()
|
||||
with patch("app.main.prepare_workspace", side_effect=fake_prepare), \
|
||||
patch("app.main.cleanup_workspace", new=AsyncMock()), \
|
||||
patch("app.main.asyncio.create_subprocess_exec", side_effect=probe.factory()):
|
||||
transport = ASGITransport(app=app)
|
||||
async with AsyncClient(transport=transport, base_url="http://test") as client:
|
||||
await client.post("/execute", json={"prompt": "a", "agent": "x"}, headers=auth_header)
|
||||
await client.post("/execute", json={"prompt": "b", "agent": "y"}, headers=auth_header)
|
||||
await probe.wait_started(2)
|
||||
probe.release.set()
|
||||
await drain()
|
||||
assert len(set(seen_job_ids)) == 2, "each job should prepare its own workspace"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_queue_depth_rejection(auth_header, drain):
|
||||
"""Beyond MAX_QUEUE_DEPTH, /execute is rejected with 429."""
|
||||
app_main.execution_semaphore = asyncio.Semaphore(1)
|
||||
app_main.MAX_QUEUE_DEPTH = 2
|
||||
probe = ConcurrencyProbe()
|
||||
pw, cw = _patch_workspace()
|
||||
with pw, cw, patch("app.main.asyncio.create_subprocess_exec", side_effect=probe.factory()):
|
||||
transport = ASGITransport(app=app)
|
||||
async with AsyncClient(transport=transport, base_url="http://test") as client:
|
||||
r1 = await client.post("/execute", json={"prompt": "a", "agent": "x"}, headers=auth_header)
|
||||
await probe.wait_started(1)
|
||||
r2 = await client.post("/execute", json={"prompt": "b", "agent": "y"}, headers=auth_header)
|
||||
r3 = await client.post("/execute", json={"prompt": "c", "agent": "z"}, headers=auth_header)
|
||||
probe.release.set()
|
||||
await drain()
|
||||
assert r1.status_code == 202 # active
|
||||
assert r2.status_code == 202 # queued
|
||||
assert r3.status_code == 429 # over depth
|
||||
|
||||
|
||||
def test_evict_old_jobs_drops_finished_past_ttl():
|
||||
"""Completed jobs older than JOB_TTL are evicted; running/queued are kept."""
|
||||
import time
|
||||
app_main.jobs.clear()
|
||||
now = time.time()
|
||||
app_main.jobs["old"] = {"status": "completed", "finished_epoch": now - 99999}
|
||||
app_main.jobs["fresh"] = {"status": "completed", "finished_epoch": now}
|
||||
app_main.jobs["running"] = {"status": "running"}
|
||||
app_main.jobs["queued"] = {"status": "queued"}
|
||||
app_main._evict_old_jobs()
|
||||
assert "old" not in app_main.jobs
|
||||
assert "fresh" in app_main.jobs
|
||||
assert "running" in app_main.jobs
|
||||
assert "queued" in app_main.jobs
|
||||
Loading…
Add table
Add a link
Reference in a new issue