claude-agent-service/tests/conftest.py
Viktor Barzin 2ef0db9a96 afk: add the autonomous issue-implementer loop (SHIPS DISABLED)
Adds app/afk/ — the "away-from-keyboard" control plane that watches the
issue tracker for ready-for-agent issues, dispatches each to a fresh
full-access T3 thread (with the issue-implementer preamble prepended,
because T3 does not honour ~/.claude/CLAUDE.md), and drives the resulting
run through its lifecycle: tests-red -> green -> pushed -> CI -> deployed,
escalating or fix-forwarding via a small pure state machine.

The loop is split into pure cores (no I/O, exhaustively unit-tested) and
thin injected adapters (the only edges that ever touch T3, the tracker,
CI, or Slack — faked in every test, so nothing here talks to a real
server, GitHub/Forgejo, or the cluster):

  pure:     types, dispatch_policy, run_state_machine, phase_checklist,
            config, issue_implementer_prompt
  adapters: t3_client (two-POST dispatch + snapshot), tracker, ci_watcher,
            notifier
  loops:    poller  — CronJob tick #1: list_ready -> select_dispatchable
                      -> dispatch + stamp the in-progress lock (label only
                      AFTER a successful dispatch, so a failed dispatch
                      never leaves a phantom lock). Per-repo lock derived
                      from the ready set, since the CronJob is stateless
                      between ticks.
            watcher — CronJob tick #2: assemble RunState from snapshot +
                      CI -> next_action -> act (close on success; relabel
                      ready-for-human + ring the doorbell on the two
                      escalations; dispatch a corrective turn on
                      fix-forward; refresh the progress checklist).

SHIPS DISABLED, on purpose: Config defaults to kill_switch=True AND an
empty allowlist, so a freshly-loaded config dispatches nothing and does
zero I/O. The package is not imported by the running service and has no
auto-enable path. Arming it is a deliberate, later, manual step requiring
BOTH gates (clear the kill switch AND enrol the exact repos) so one
fat-fingered env var can't arm every repo.

Test-first throughout: 412 tests pass (poller + watcher add integration
tests wiring the real pure cores to in-memory fakes). mypy clean.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-15 21:15:11 +00:00

228 lines
7.2 KiB
Python

import asyncio
import os
os.environ.setdefault("API_BEARER_TOKEN", "test-token")
os.environ.setdefault("WORKSPACE_DIR", "/tmp/test-workspace")
import pytest
from app import main as app_main
@pytest.fixture(autouse=True)
def _reset_execution_state():
"""Reset concurrency state between tests.
A fresh semaphore per test avoids the "bound to a different event loop"
error (pytest-asyncio uses a new loop per function), and clearing the
counters/jobs keeps tests independent.
"""
app_main.jobs.clear()
app_main.inflight_active = 0
app_main.inflight_queued = 0
app_main.execution_semaphore = asyncio.Semaphore(app_main.MAX_CONCURRENCY)
app_main._last_fetch_epoch = 0.0
app_main.MAX_QUEUE_DEPTH = int(os.environ.get("MAX_QUEUE_DEPTH", "100"))
yield
@pytest.fixture
def drain():
"""Wait for all background /execute jobs to finish.
Tests that fire `/execute` must drain before leaving the `patch(...)`
context — otherwise a background task resumes after the mocks are torn
down, spawns a real subprocess during loop teardown, and deadlocks the
asyncio child-watcher.
"""
async def _drain(timeout: float = 3.0):
loop = asyncio.get_event_loop()
deadline = loop.time() + timeout
while app_main.inflight_active or app_main.inflight_queued:
if loop.time() > deadline:
break
await asyncio.sleep(0.01)
return _drain
# --------------------------------------------------------------------------- #
# AFK loop fixtures.
#
# Shared factories + in-memory fakes for the app.afk modules. EVERYTHING the AFK
# tests touch is faked here — no test ever reaches a real T3 server, GitHub /
# Forgejo, or the cluster. The fakes implement the module interfaces from the
# contract and record their calls so tests can assert on them.
# --------------------------------------------------------------------------- #
from app.afk.types import ( # noqa: E402 (after the env setup above, like app_main)
CIStatus,
Config,
Issue,
RunState,
ThreadStatus,
)
@pytest.fixture
def make_issue():
"""Factory for ``Issue``. Defaults to a clean, dispatchable issue (trusted
label, nothing blocking); override any field per test."""
def _make(
number: int = 1,
repo: str = "infra",
labels: list[str] | None = None,
blocked_by: list[int] | None = None,
labeled_by_trusted: bool = True,
priority: int = 0,
) -> Issue:
return Issue(
number=number,
repo=repo,
labels=["ready-for-agent"] if labels is None else labels,
blocked_by=[] if blocked_by is None else blocked_by,
labeled_by_trusted=labeled_by_trusted,
priority=priority,
)
return _make
@pytest.fixture
def make_config():
"""Factory for ``Config``. Defaults to an ENABLED config (kill switch off,
a one-repo allowlist) so policy/state-machine tests exercise real behaviour;
the disabled production default is covered separately in the config tests."""
def _make(
allowlist: list[str] | None = None,
kill_switch: bool = False,
**overrides,
) -> Config:
return Config(
allowlist=["infra"] if allowlist is None else allowlist,
kill_switch=kill_switch,
**overrides,
)
return _make
@pytest.fixture
def make_run_state():
"""Factory for ``RunState``. Defaults to a freshly-dispatched run (thread
running, nothing pushed, no CI, no fix-forward attempts yet)."""
def _make(
thread_status: ThreadStatus | None = ThreadStatus.RUNNING,
ci_status: CIStatus | None = None,
pushed: bool = False,
fix_forward_attempts: int = 0,
elapsed_seconds: float = 0.0,
) -> RunState:
return RunState(
thread_status=thread_status,
ci_status=ci_status,
pushed=pushed,
fix_forward_attempts=fix_forward_attempts,
elapsed_seconds=elapsed_seconds,
)
return _make
class FakeT3Client:
"""In-memory stand-in for ``t3_client.T3Client``. Records each dispatch and
hands back a deterministic thread id; ``snapshot`` returns whatever was
staged via ``set_snapshot``."""
def __init__(self) -> None:
self.dispatched: list[dict] = []
self._snapshot: dict = {"threads": []}
self._next_id = 0
def dispatch(self, repo: str, issue: int, prompt: str) -> str:
thread_id = f"thread-{self._next_id}"
self._next_id += 1
self.dispatched.append(
{"repo": repo, "issue": issue, "prompt": prompt, "thread_id": thread_id}
)
return thread_id
def snapshot(self) -> dict:
return self._snapshot
def set_snapshot(self, snapshot: dict) -> None:
self._snapshot = snapshot
class FakeTracker:
"""In-memory stand-in for ``tracker.Tracker``. ``list_ready`` returns issues
staged via ``seed``; label/comment/close just record their calls."""
def __init__(self) -> None:
self._ready: dict[str, list[Issue]] = {}
self.label_ops: list[tuple[str, str, int, str]] = [] # (op, repo, issue, label)
self.comments: list[tuple[str, int, str]] = []
self.closed: list[tuple[str, int]] = []
def seed(self, repo: str, issues: list[Issue]) -> None:
self._ready[repo] = issues
def list_ready(self, repos: list[str]) -> list[Issue]:
out: list[Issue] = []
for repo in repos:
out.extend(self._ready.get(repo, []))
return out
def add_label(self, repo: str, issue: int, label: str) -> None:
self.label_ops.append(("add", repo, issue, label))
def remove_label(self, repo: str, issue: int, label: str) -> None:
self.label_ops.append(("remove", repo, issue, label))
def comment(self, repo: str, issue: int, body: str) -> None:
self.comments.append((repo, issue, body))
def close(self, repo: str, issue: int) -> None:
self.closed.append((repo, issue))
class FakeCIWatcher:
"""In-memory stand-in for ``ci_watcher.CIWatcher``. Returns the status staged
per ``(repo, commit)`` via ``set_status``; unknown commits read PENDING."""
def __init__(self) -> None:
self._statuses: dict[tuple[str, str], CIStatus] = {}
def set_status(self, repo: str, commit: str, status: CIStatus) -> None:
self._statuses[(repo, commit)] = status
def status(self, repo: str, commit: str) -> CIStatus:
return self._statuses.get((repo, commit), CIStatus.PENDING)
class FakeNotifier:
"""In-memory stand-in for ``notifier.Notifier``. Records every notification
so tests can assert escalations fired with the right kind/detail."""
def __init__(self) -> None:
self.sent: list[dict] = []
def notify(self, kind: str, issue: Issue, thread_id: str | None, detail: str) -> None:
self.sent.append(
{"kind": kind, "issue": issue, "thread_id": thread_id, "detail": detail}
)
@pytest.fixture
def fake_t3() -> FakeT3Client:
return FakeT3Client()
@pytest.fixture
def fake_tracker() -> FakeTracker:
return FakeTracker()
@pytest.fixture
def fake_ci() -> FakeCIWatcher:
return FakeCIWatcher()
@pytest.fixture
def fake_notifier() -> FakeNotifier:
return FakeNotifier()