claude-agent-service/tests/conftest.py

import asyncio
import os

os.environ.setdefault("API_BEARER_TOKEN", "test-token")
os.environ.setdefault("WORKSPACE_DIR", "/tmp/test-workspace")

import pytest

from app import main as app_main


@pytest.fixture(autouse=True)
def _reset_execution_state():
    """Reset concurrency state between tests.

    A fresh semaphore per test avoids the "bound to a different event loop"
    error (pytest-asyncio uses a new loop per function), and clearing the
    counters/jobs keeps tests independent.
    """
    app_main.jobs.clear()
    app_main.inflight_active = 0
    app_main.inflight_queued = 0
    app_main.execution_semaphore = asyncio.Semaphore(app_main.MAX_CONCURRENCY)
    app_main._last_fetch_epoch = 0.0
    app_main.MAX_QUEUE_DEPTH = int(os.environ.get("MAX_QUEUE_DEPTH", "100"))
    yield


@pytest.fixture
def drain():
    """Wait for all background /execute jobs to finish.

    Tests that fire `/execute` must drain before leaving the `patch(...)`
    context — otherwise a background task resumes after the mocks are torn
    down, spawns a real subprocess during loop teardown, and deadlocks the
    asyncio child-watcher.
    """
    async def _drain(timeout: float = 3.0):
        loop = asyncio.get_event_loop()
        deadline = loop.time() + timeout
        while app_main.inflight_active or app_main.inflight_queued:
            if loop.time() > deadline:
                break
            await asyncio.sleep(0.01)
    return _drain


# --------------------------------------------------------------------------- #
# AFK loop fixtures.
#
# Shared factories + in-memory fakes for the app.afk modules. EVERYTHING the AFK
# tests touch is faked here — no test ever reaches a real T3 server, GitHub /
# Forgejo, or the cluster. The fakes implement the module interfaces from the
# contract and record their calls so tests can assert on them.
# --------------------------------------------------------------------------- #
from app.afk.types import (  # noqa: E402  (after the env setup above, like app_main)
    CIStatus,
    Config,
    Issue,
    RunState,
    ThreadStatus,
)


@pytest.fixture
def make_issue():
    """Factory for ``Issue``. Defaults to a clean, dispatchable issue (trusted
    label, nothing blocking); override any field per test."""
    def _make(
        number: int = 1,
        repo: str = "infra",
        labels: list[str] | None = None,
        blocked_by: list[int] | None = None,
        labeled_by_trusted: bool = True,
        priority: int = 0,
    ) -> Issue:
        return Issue(
            number=number,
            repo=repo,
            labels=["ready-for-agent"] if labels is None else labels,
            blocked_by=[] if blocked_by is None else blocked_by,
            labeled_by_trusted=labeled_by_trusted,
            priority=priority,
        )
    return _make


@pytest.fixture
def make_config():
    """Factory for ``Config``. Defaults to an ENABLED config (kill switch off,
    a one-repo allowlist) so policy/state-machine tests exercise real behaviour;
    the disabled production default is covered separately in the config tests."""
    def _make(
        allowlist: list[str] | None = None,
        kill_switch: bool = False,
        **overrides,
    ) -> Config:
        return Config(
            allowlist=["infra"] if allowlist is None else allowlist,
            kill_switch=kill_switch,
            **overrides,
        )
    return _make


@pytest.fixture
def make_run_state():
    """Factory for ``RunState``. Defaults to a freshly-dispatched run (thread
    running, nothing pushed, no CI, no fix-forward attempts yet)."""
    def _make(
        thread_status: ThreadStatus | None = ThreadStatus.RUNNING,
        ci_status: CIStatus | None = None,
        pushed: bool = False,
        fix_forward_attempts: int = 0,
        elapsed_seconds: float = 0.0,
    ) -> RunState:
        return RunState(
            thread_status=thread_status,
            ci_status=ci_status,
            pushed=pushed,
            fix_forward_attempts=fix_forward_attempts,
            elapsed_seconds=elapsed_seconds,
        )
    return _make


class FakeT3Client:
    """In-memory stand-in for ``t3_client.T3Client``. Records each dispatch and
    hands back a deterministic thread id; ``snapshot`` returns whatever was
    staged via ``set_snapshot``."""

    def __init__(self) -> None:
        self.dispatched: list[dict] = []
        self._snapshot: dict = {"threads": []}
        self._next_id = 0

    def dispatch(self, repo: str, issue: int, prompt: str) -> str:
        thread_id = f"thread-{self._next_id}"
        self._next_id += 1
        self.dispatched.append(
            {"repo": repo, "issue": issue, "prompt": prompt, "thread_id": thread_id}
        )
        return thread_id

    def snapshot(self) -> dict:
        return self._snapshot

    def set_snapshot(self, snapshot: dict) -> None:
        self._snapshot = snapshot


class FakeTracker:
    """In-memory stand-in for ``tracker.Tracker``. ``list_ready`` returns issues
    staged via ``seed``; label/comment/close just record their calls."""

    def __init__(self) -> None:
        self._ready: dict[str, list[Issue]] = {}
        self.label_ops: list[tuple[str, str, int, str]] = []  # (op, repo, issue, label)
        self.comments: list[tuple[str, int, str]] = []
        self.closed: list[tuple[str, int]] = []

    def seed(self, repo: str, issues: list[Issue]) -> None:
        self._ready[repo] = issues

    def list_ready(self, repos: list[str]) -> list[Issue]:
        out: list[Issue] = []
        for repo in repos:
            out.extend(self._ready.get(repo, []))
        return out

    def add_label(self, repo: str, issue: int, label: str) -> None:
        self.label_ops.append(("add", repo, issue, label))

    def remove_label(self, repo: str, issue: int, label: str) -> None:
        self.label_ops.append(("remove", repo, issue, label))

    def comment(self, repo: str, issue: int, body: str) -> None:
        self.comments.append((repo, issue, body))

    def close(self, repo: str, issue: int) -> None:
        self.closed.append((repo, issue))


class FakeCIWatcher:
    """In-memory stand-in for ``ci_watcher.CIWatcher``. Returns the status staged
    per ``(repo, commit)`` via ``set_status``; unknown commits read PENDING."""

    def __init__(self) -> None:
        self._statuses: dict[tuple[str, str], CIStatus] = {}

    def set_status(self, repo: str, commit: str, status: CIStatus) -> None:
        self._statuses[(repo, commit)] = status

    def status(self, repo: str, commit: str) -> CIStatus:
        return self._statuses.get((repo, commit), CIStatus.PENDING)


class FakeNotifier:
    """In-memory stand-in for ``notifier.Notifier``. Records every notification
    so tests can assert escalations fired with the right kind/detail."""

    def __init__(self) -> None:
        self.sent: list[dict] = []

    def notify(self, kind: str, issue: Issue, thread_id: str | None, detail: str) -> None:
        self.sent.append(
            {"kind": kind, "issue": issue, "thread_id": thread_id, "detail": detail}
        )


@pytest.fixture
def fake_t3() -> FakeT3Client:
    return FakeT3Client()


@pytest.fixture
def fake_tracker() -> FakeTracker:
    return FakeTracker()


@pytest.fixture
def fake_ci() -> FakeCIWatcher:
    return FakeCIWatcher()


@pytest.fixture
def fake_notifier() -> FakeNotifier:
    return FakeNotifier()
parallel execution: replace single-flight lock with bounded semaphore + per-job workspace Multiple agent calls now run concurrently, each in its own isolated git checkout (local clone of the warm base, hardlinked objects, git-crypt re-unlocked), so concurrent jobs never share a working tree. - execution_lock (asyncio.Lock) -> execution_semaphore (default MAX_CONCURRENCY=10); excess calls queue FIFO instead of 409/503. MAX_QUEUE_DEPTH safety valve. - /execute never returns 409; jobs go queued -> running. Timeout covers execution only, not queue wait. - /v1/chat/completions queues for a slot instead of 503-busy. - /health: busy = at-capacity, plus active/queued/capacity fields. - per-job workspace prepare/cleanup under a short git lock; the agent run holds none. - in-memory job registry evicted past JOB_TTL_SECONDS. Design: docs/2026-06-02-parallel-execution-design.md Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> 2026-06-02 20:57:41 +00:00			`import asyncio`
Initial extraction from monorepo 2026-05-07 17:07:12 +00:00			`import os`

			`os.environ.setdefault("API_BEARER_TOKEN", "test-token")`
			`os.environ.setdefault("WORKSPACE_DIR", "/tmp/test-workspace")`
parallel execution: replace single-flight lock with bounded semaphore + per-job workspace Multiple agent calls now run concurrently, each in its own isolated git checkout (local clone of the warm base, hardlinked objects, git-crypt re-unlocked), so concurrent jobs never share a working tree. - execution_lock (asyncio.Lock) -> execution_semaphore (default MAX_CONCURRENCY=10); excess calls queue FIFO instead of 409/503. MAX_QUEUE_DEPTH safety valve. - /execute never returns 409; jobs go queued -> running. Timeout covers execution only, not queue wait. - /v1/chat/completions queues for a slot instead of 503-busy. - /health: busy = at-capacity, plus active/queued/capacity fields. - per-job workspace prepare/cleanup under a short git lock; the agent run holds none. - in-memory job registry evicted past JOB_TTL_SECONDS. Design: docs/2026-06-02-parallel-execution-design.md Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> 2026-06-02 20:57:41 +00:00
			`import pytest`

			`from app import main as app_main`


			`@pytest.fixture(autouse=True)`
			`def _reset_execution_state():`
			`"""Reset concurrency state between tests.`

			`A fresh semaphore per test avoids the "bound to a different event loop"`
			`error (pytest-asyncio uses a new loop per function), and clearing the`
			`counters/jobs keeps tests independent.`
			`"""`
			`app_main.jobs.clear()`
			`app_main.inflight_active = 0`
			`app_main.inflight_queued = 0`
			`app_main.execution_semaphore = asyncio.Semaphore(app_main.MAX_CONCURRENCY)`
			`app_main._last_fetch_epoch = 0.0`
			`app_main.MAX_QUEUE_DEPTH = int(os.environ.get("MAX_QUEUE_DEPTH", "100"))`
			`yield`


			`@pytest.fixture`
			`def drain():`
			`"""Wait for all background /execute jobs to finish.`

			Tests that fire `/execute` must drain before leaving the `patch(...)`
			`context — otherwise a background task resumes after the mocks are torn`
			`down, spawns a real subprocess during loop teardown, and deadlocks the`
			`asyncio child-watcher.`
			`"""`
			`async def _drain(timeout: float = 3.0):`
			`loop = asyncio.get_event_loop()`
			`deadline = loop.time() + timeout`
			`while app_main.inflight_active or app_main.inflight_queued:`
			`if loop.time() > deadline:`
			`break`
			`await asyncio.sleep(0.01)`
			`return _drain`
afk: add the autonomous issue-implementer loop (SHIPS DISABLED) Adds app/afk/ — the "away-from-keyboard" control plane that watches the issue tracker for ready-for-agent issues, dispatches each to a fresh full-access T3 thread (with the issue-implementer preamble prepended, because T3 does not honour ~/.claude/CLAUDE.md), and drives the resulting run through its lifecycle: tests-red -> green -> pushed -> CI -> deployed, escalating or fix-forwarding via a small pure state machine. The loop is split into pure cores (no I/O, exhaustively unit-tested) and thin injected adapters (the only edges that ever touch T3, the tracker, CI, or Slack — faked in every test, so nothing here talks to a real server, GitHub/Forgejo, or the cluster): pure: types, dispatch_policy, run_state_machine, phase_checklist, config, issue_implementer_prompt adapters: t3_client (two-POST dispatch + snapshot), tracker, ci_watcher, notifier loops: poller — CronJob tick #1: list_ready -> select_dispatchable -> dispatch + stamp the in-progress lock (label only AFTER a successful dispatch, so a failed dispatch never leaves a phantom lock). Per-repo lock derived from the ready set, since the CronJob is stateless between ticks. watcher — CronJob tick #2: assemble RunState from snapshot + CI -> next_action -> act (close on success; relabel ready-for-human + ring the doorbell on the two escalations; dispatch a corrective turn on fix-forward; refresh the progress checklist). SHIPS DISABLED, on purpose: Config defaults to kill_switch=True AND an empty allowlist, so a freshly-loaded config dispatches nothing and does zero I/O. The package is not imported by the running service and has no auto-enable path. Arming it is a deliberate, later, manual step requiring BOTH gates (clear the kill switch AND enrol the exact repos) so one fat-fingered env var can't arm every repo. Test-first throughout: 412 tests pass (poller + watcher add integration tests wiring the real pure cores to in-memory fakes). mypy clean. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> 2026-06-15 21:15:11 +00:00

			`# --------------------------------------------------------------------------- #`
			`# AFK loop fixtures.`
			`#`
			`# Shared factories + in-memory fakes for the app.afk modules. EVERYTHING the AFK`
			`# tests touch is faked here — no test ever reaches a real T3 server, GitHub /`
			`# Forgejo, or the cluster. The fakes implement the module interfaces from the`
			`# contract and record their calls so tests can assert on them.`
			`# --------------------------------------------------------------------------- #`
			`from app.afk.types import ( # noqa: E402 (after the env setup above, like app_main)`
			`CIStatus,`
			`Config,`
			`Issue,`
			`RunState,`
			`ThreadStatus,`
			`)`


			`@pytest.fixture`
			`def make_issue():`
			"""Factory for ``Issue``. Defaults to a clean, dispatchable issue (trusted
			`label, nothing blocking); override any field per test."""`
			`def _make(`
			`number: int = 1,`
			`repo: str = "infra",`
			`labels: list[str] \| None = None,`
			`blocked_by: list[int] \| None = None,`
			`labeled_by_trusted: bool = True,`
			`priority: int = 0,`
			`) -> Issue:`
			`return Issue(`
			`number=number,`
			`repo=repo,`
			`labels=["ready-for-agent"] if labels is None else labels,`
			`blocked_by=[] if blocked_by is None else blocked_by,`
			`labeled_by_trusted=labeled_by_trusted,`
			`priority=priority,`
			`)`
			`return _make`


			`@pytest.fixture`
			`def make_config():`
			"""Factory for ``Config``. Defaults to an ENABLED config (kill switch off,
			`a one-repo allowlist) so policy/state-machine tests exercise real behaviour;`
			`the disabled production default is covered separately in the config tests."""`
			`def _make(`
			`allowlist: list[str] \| None = None,`
			`kill_switch: bool = False,`
			`**overrides,`
			`) -> Config:`
			`return Config(`
			`allowlist=["infra"] if allowlist is None else allowlist,`
			`kill_switch=kill_switch,`
			`**overrides,`
			`)`
			`return _make`


			`@pytest.fixture`
			`def make_run_state():`
			"""Factory for ``RunState``. Defaults to a freshly-dispatched run (thread
			`running, nothing pushed, no CI, no fix-forward attempts yet)."""`
			`def _make(`
			`thread_status: ThreadStatus \| None = ThreadStatus.RUNNING,`
			`ci_status: CIStatus \| None = None,`
			`pushed: bool = False,`
			`fix_forward_attempts: int = 0,`
			`elapsed_seconds: float = 0.0,`
			`) -> RunState:`
			`return RunState(`
			`thread_status=thread_status,`
			`ci_status=ci_status,`
			`pushed=pushed,`
			`fix_forward_attempts=fix_forward_attempts,`
			`elapsed_seconds=elapsed_seconds,`
			`)`
			`return _make`


			`class FakeT3Client:`
			"""In-memory stand-in for ``t3_client.T3Client``. Records each dispatch and
			hands back a deterministic thread id; ``snapshot`` returns whatever was
			staged via ``set_snapshot``."""

			`def __init__(self) -> None:`
			`self.dispatched: list[dict] = []`
			`self._snapshot: dict = {"threads": []}`
			`self._next_id = 0`

			`def dispatch(self, repo: str, issue: int, prompt: str) -> str:`
			`thread_id = f"thread-{self._next_id}"`
			`self._next_id += 1`
			`self.dispatched.append(`
			`{"repo": repo, "issue": issue, "prompt": prompt, "thread_id": thread_id}`
			`)`
			`return thread_id`

			`def snapshot(self) -> dict:`
			`return self._snapshot`

			`def set_snapshot(self, snapshot: dict) -> None:`
			`self._snapshot = snapshot`


			`class FakeTracker:`
			"""In-memory stand-in for ``tracker.Tracker``. ``list_ready`` returns issues
			staged via ``seed``; label/comment/close just record their calls."""

			`def __init__(self) -> None:`
			`self._ready: dict[str, list[Issue]] = {}`
			`self.label_ops: list[tuple[str, str, int, str]] = [] # (op, repo, issue, label)`
			`self.comments: list[tuple[str, int, str]] = []`
			`self.closed: list[tuple[str, int]] = []`

			`def seed(self, repo: str, issues: list[Issue]) -> None:`
			`self._ready[repo] = issues`

			`def list_ready(self, repos: list[str]) -> list[Issue]:`
			`out: list[Issue] = []`
			`for repo in repos:`
			`out.extend(self._ready.get(repo, []))`
			`return out`

			`def add_label(self, repo: str, issue: int, label: str) -> None:`
			`self.label_ops.append(("add", repo, issue, label))`

			`def remove_label(self, repo: str, issue: int, label: str) -> None:`
			`self.label_ops.append(("remove", repo, issue, label))`

			`def comment(self, repo: str, issue: int, body: str) -> None:`
			`self.comments.append((repo, issue, body))`

			`def close(self, repo: str, issue: int) -> None:`
			`self.closed.append((repo, issue))`


			`class FakeCIWatcher:`
			"""In-memory stand-in for ``ci_watcher.CIWatcher``. Returns the status staged
			per ``(repo, commit)`` via ``set_status``; unknown commits read PENDING."""

			`def __init__(self) -> None:`
			`self._statuses: dict[tuple[str, str], CIStatus] = {}`

			`def set_status(self, repo: str, commit: str, status: CIStatus) -> None:`
			`self._statuses[(repo, commit)] = status`

			`def status(self, repo: str, commit: str) -> CIStatus:`
			`return self._statuses.get((repo, commit), CIStatus.PENDING)`


			`class FakeNotifier:`
			"""In-memory stand-in for ``notifier.Notifier``. Records every notification
			`so tests can assert escalations fired with the right kind/detail."""`

			`def __init__(self) -> None:`
			`self.sent: list[dict] = []`

			`def notify(self, kind: str, issue: Issue, thread_id: str \| None, detail: str) -> None:`
			`self.sent.append(`
			`{"kind": kind, "issue": issue, "thread_id": thread_id, "detail": detail}`
			`)`


			`@pytest.fixture`
			`def fake_t3() -> FakeT3Client:`
			`return FakeT3Client()`


			`@pytest.fixture`
			`def fake_tracker() -> FakeTracker:`
			`return FakeTracker()`


			`@pytest.fixture`
			`def fake_ci() -> FakeCIWatcher:`
			`return FakeCIWatcher()`


			`@pytest.fixture`
			`def fake_notifier() -> FakeNotifier:`
			`return FakeNotifier()`