2026-06-02 20:57:41 +00:00
|
|
|
import asyncio
|
2026-05-07 17:07:12 +00:00
|
|
|
import os
|
|
|
|
|
|
|
|
|
|
os.environ.setdefault("API_BEARER_TOKEN", "test-token")
|
|
|
|
|
os.environ.setdefault("WORKSPACE_DIR", "/tmp/test-workspace")
|
2026-06-02 20:57:41 +00:00
|
|
|
|
|
|
|
|
import pytest
|
|
|
|
|
|
|
|
|
|
from app import main as app_main
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture(autouse=True)
|
|
|
|
|
def _reset_execution_state():
|
|
|
|
|
"""Reset concurrency state between tests.
|
|
|
|
|
|
|
|
|
|
A fresh semaphore per test avoids the "bound to a different event loop"
|
|
|
|
|
error (pytest-asyncio uses a new loop per function), and clearing the
|
|
|
|
|
counters/jobs keeps tests independent.
|
|
|
|
|
"""
|
|
|
|
|
app_main.jobs.clear()
|
|
|
|
|
app_main.inflight_active = 0
|
|
|
|
|
app_main.inflight_queued = 0
|
|
|
|
|
app_main.execution_semaphore = asyncio.Semaphore(app_main.MAX_CONCURRENCY)
|
|
|
|
|
app_main._last_fetch_epoch = 0.0
|
|
|
|
|
app_main.MAX_QUEUE_DEPTH = int(os.environ.get("MAX_QUEUE_DEPTH", "100"))
|
|
|
|
|
yield
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture
|
|
|
|
|
def drain():
|
|
|
|
|
"""Wait for all background /execute jobs to finish.
|
|
|
|
|
|
|
|
|
|
Tests that fire `/execute` must drain before leaving the `patch(...)`
|
|
|
|
|
context — otherwise a background task resumes after the mocks are torn
|
|
|
|
|
down, spawns a real subprocess during loop teardown, and deadlocks the
|
|
|
|
|
asyncio child-watcher.
|
|
|
|
|
"""
|
|
|
|
|
async def _drain(timeout: float = 3.0):
|
|
|
|
|
loop = asyncio.get_event_loop()
|
|
|
|
|
deadline = loop.time() + timeout
|
|
|
|
|
while app_main.inflight_active or app_main.inflight_queued:
|
|
|
|
|
if loop.time() > deadline:
|
|
|
|
|
break
|
|
|
|
|
await asyncio.sleep(0.01)
|
|
|
|
|
return _drain
|
afk: add the autonomous issue-implementer loop (SHIPS DISABLED)
Adds app/afk/ — the "away-from-keyboard" control plane that watches the
issue tracker for ready-for-agent issues, dispatches each to a fresh
full-access T3 thread (with the issue-implementer preamble prepended,
because T3 does not honour ~/.claude/CLAUDE.md), and drives the resulting
run through its lifecycle: tests-red -> green -> pushed -> CI -> deployed,
escalating or fix-forwarding via a small pure state machine.
The loop is split into pure cores (no I/O, exhaustively unit-tested) and
thin injected adapters (the only edges that ever touch T3, the tracker,
CI, or Slack — faked in every test, so nothing here talks to a real
server, GitHub/Forgejo, or the cluster):
pure: types, dispatch_policy, run_state_machine, phase_checklist,
config, issue_implementer_prompt
adapters: t3_client (two-POST dispatch + snapshot), tracker, ci_watcher,
notifier
loops: poller — CronJob tick #1: list_ready -> select_dispatchable
-> dispatch + stamp the in-progress lock (label only
AFTER a successful dispatch, so a failed dispatch
never leaves a phantom lock). Per-repo lock derived
from the ready set, since the CronJob is stateless
between ticks.
watcher — CronJob tick #2: assemble RunState from snapshot +
CI -> next_action -> act (close on success; relabel
ready-for-human + ring the doorbell on the two
escalations; dispatch a corrective turn on
fix-forward; refresh the progress checklist).
SHIPS DISABLED, on purpose: Config defaults to kill_switch=True AND an
empty allowlist, so a freshly-loaded config dispatches nothing and does
zero I/O. The package is not imported by the running service and has no
auto-enable path. Arming it is a deliberate, later, manual step requiring
BOTH gates (clear the kill switch AND enrol the exact repos) so one
fat-fingered env var can't arm every repo.
Test-first throughout: 412 tests pass (poller + watcher add integration
tests wiring the real pure cores to in-memory fakes). mypy clean.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-15 21:15:11 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
# --------------------------------------------------------------------------- #
|
|
|
|
|
# AFK loop fixtures.
|
|
|
|
|
#
|
|
|
|
|
# Shared factories + in-memory fakes for the app.afk modules. EVERYTHING the AFK
|
|
|
|
|
# tests touch is faked here — no test ever reaches a real T3 server, GitHub /
|
|
|
|
|
# Forgejo, or the cluster. The fakes implement the module interfaces from the
|
|
|
|
|
# contract and record their calls so tests can assert on them.
|
|
|
|
|
# --------------------------------------------------------------------------- #
|
|
|
|
|
from app.afk.types import ( # noqa: E402 (after the env setup above, like app_main)
|
|
|
|
|
CIStatus,
|
|
|
|
|
Config,
|
|
|
|
|
Issue,
|
|
|
|
|
RunState,
|
|
|
|
|
ThreadStatus,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture
|
|
|
|
|
def make_issue():
|
|
|
|
|
"""Factory for ``Issue``. Defaults to a clean, dispatchable issue (trusted
|
|
|
|
|
label, nothing blocking); override any field per test."""
|
|
|
|
|
def _make(
|
|
|
|
|
number: int = 1,
|
|
|
|
|
repo: str = "infra",
|
|
|
|
|
labels: list[str] | None = None,
|
|
|
|
|
blocked_by: list[int] | None = None,
|
|
|
|
|
labeled_by_trusted: bool = True,
|
|
|
|
|
priority: int = 0,
|
|
|
|
|
) -> Issue:
|
|
|
|
|
return Issue(
|
|
|
|
|
number=number,
|
|
|
|
|
repo=repo,
|
|
|
|
|
labels=["ready-for-agent"] if labels is None else labels,
|
|
|
|
|
blocked_by=[] if blocked_by is None else blocked_by,
|
|
|
|
|
labeled_by_trusted=labeled_by_trusted,
|
|
|
|
|
priority=priority,
|
|
|
|
|
)
|
|
|
|
|
return _make
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture
|
|
|
|
|
def make_config():
|
|
|
|
|
"""Factory for ``Config``. Defaults to an ENABLED config (kill switch off,
|
|
|
|
|
a one-repo allowlist) so policy/state-machine tests exercise real behaviour;
|
|
|
|
|
the disabled production default is covered separately in the config tests."""
|
|
|
|
|
def _make(
|
|
|
|
|
allowlist: list[str] | None = None,
|
|
|
|
|
kill_switch: bool = False,
|
|
|
|
|
**overrides,
|
|
|
|
|
) -> Config:
|
|
|
|
|
return Config(
|
|
|
|
|
allowlist=["infra"] if allowlist is None else allowlist,
|
|
|
|
|
kill_switch=kill_switch,
|
|
|
|
|
**overrides,
|
|
|
|
|
)
|
|
|
|
|
return _make
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture
|
|
|
|
|
def make_run_state():
|
|
|
|
|
"""Factory for ``RunState``. Defaults to a freshly-dispatched run (thread
|
|
|
|
|
running, nothing pushed, no CI, no fix-forward attempts yet)."""
|
|
|
|
|
def _make(
|
|
|
|
|
thread_status: ThreadStatus | None = ThreadStatus.RUNNING,
|
|
|
|
|
ci_status: CIStatus | None = None,
|
|
|
|
|
pushed: bool = False,
|
|
|
|
|
fix_forward_attempts: int = 0,
|
|
|
|
|
elapsed_seconds: float = 0.0,
|
|
|
|
|
) -> RunState:
|
|
|
|
|
return RunState(
|
|
|
|
|
thread_status=thread_status,
|
|
|
|
|
ci_status=ci_status,
|
|
|
|
|
pushed=pushed,
|
|
|
|
|
fix_forward_attempts=fix_forward_attempts,
|
|
|
|
|
elapsed_seconds=elapsed_seconds,
|
|
|
|
|
)
|
|
|
|
|
return _make
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class FakeT3Client:
|
|
|
|
|
"""In-memory stand-in for ``t3_client.T3Client``. Records each dispatch and
|
|
|
|
|
hands back a deterministic thread id; ``snapshot`` returns whatever was
|
|
|
|
|
staged via ``set_snapshot``."""
|
|
|
|
|
|
|
|
|
|
def __init__(self) -> None:
|
|
|
|
|
self.dispatched: list[dict] = []
|
|
|
|
|
self._snapshot: dict = {"threads": []}
|
|
|
|
|
self._next_id = 0
|
|
|
|
|
|
|
|
|
|
def dispatch(self, repo: str, issue: int, prompt: str) -> str:
|
|
|
|
|
thread_id = f"thread-{self._next_id}"
|
|
|
|
|
self._next_id += 1
|
|
|
|
|
self.dispatched.append(
|
|
|
|
|
{"repo": repo, "issue": issue, "prompt": prompt, "thread_id": thread_id}
|
|
|
|
|
)
|
|
|
|
|
return thread_id
|
|
|
|
|
|
|
|
|
|
def snapshot(self) -> dict:
|
|
|
|
|
return self._snapshot
|
|
|
|
|
|
|
|
|
|
def set_snapshot(self, snapshot: dict) -> None:
|
|
|
|
|
self._snapshot = snapshot
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class FakeTracker:
|
|
|
|
|
"""In-memory stand-in for ``tracker.Tracker``. ``list_ready`` returns issues
|
|
|
|
|
staged via ``seed``; label/comment/close just record their calls."""
|
|
|
|
|
|
|
|
|
|
def __init__(self) -> None:
|
|
|
|
|
self._ready: dict[str, list[Issue]] = {}
|
|
|
|
|
self.label_ops: list[tuple[str, str, int, str]] = [] # (op, repo, issue, label)
|
|
|
|
|
self.comments: list[tuple[str, int, str]] = []
|
|
|
|
|
self.closed: list[tuple[str, int]] = []
|
|
|
|
|
|
|
|
|
|
def seed(self, repo: str, issues: list[Issue]) -> None:
|
|
|
|
|
self._ready[repo] = issues
|
|
|
|
|
|
|
|
|
|
def list_ready(self, repos: list[str]) -> list[Issue]:
|
|
|
|
|
out: list[Issue] = []
|
|
|
|
|
for repo in repos:
|
|
|
|
|
out.extend(self._ready.get(repo, []))
|
|
|
|
|
return out
|
|
|
|
|
|
|
|
|
|
def add_label(self, repo: str, issue: int, label: str) -> None:
|
|
|
|
|
self.label_ops.append(("add", repo, issue, label))
|
|
|
|
|
|
|
|
|
|
def remove_label(self, repo: str, issue: int, label: str) -> None:
|
|
|
|
|
self.label_ops.append(("remove", repo, issue, label))
|
|
|
|
|
|
|
|
|
|
def comment(self, repo: str, issue: int, body: str) -> None:
|
|
|
|
|
self.comments.append((repo, issue, body))
|
|
|
|
|
|
|
|
|
|
def close(self, repo: str, issue: int) -> None:
|
|
|
|
|
self.closed.append((repo, issue))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class FakeCIWatcher:
|
|
|
|
|
"""In-memory stand-in for ``ci_watcher.CIWatcher``. Returns the status staged
|
|
|
|
|
per ``(repo, commit)`` via ``set_status``; unknown commits read PENDING."""
|
|
|
|
|
|
|
|
|
|
def __init__(self) -> None:
|
|
|
|
|
self._statuses: dict[tuple[str, str], CIStatus] = {}
|
|
|
|
|
|
|
|
|
|
def set_status(self, repo: str, commit: str, status: CIStatus) -> None:
|
|
|
|
|
self._statuses[(repo, commit)] = status
|
|
|
|
|
|
|
|
|
|
def status(self, repo: str, commit: str) -> CIStatus:
|
|
|
|
|
return self._statuses.get((repo, commit), CIStatus.PENDING)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class FakeNotifier:
|
|
|
|
|
"""In-memory stand-in for ``notifier.Notifier``. Records every notification
|
|
|
|
|
so tests can assert escalations fired with the right kind/detail."""
|
|
|
|
|
|
|
|
|
|
def __init__(self) -> None:
|
|
|
|
|
self.sent: list[dict] = []
|
|
|
|
|
|
|
|
|
|
def notify(self, kind: str, issue: Issue, thread_id: str | None, detail: str) -> None:
|
|
|
|
|
self.sent.append(
|
|
|
|
|
{"kind": kind, "issue": issue, "thread_id": thread_id, "detail": detail}
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture
|
|
|
|
|
def fake_t3() -> FakeT3Client:
|
|
|
|
|
return FakeT3Client()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture
|
|
|
|
|
def fake_tracker() -> FakeTracker:
|
|
|
|
|
return FakeTracker()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture
|
|
|
|
|
def fake_ci() -> FakeCIWatcher:
|
|
|
|
|
return FakeCIWatcher()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture
|
|
|
|
|
def fake_notifier() -> FakeNotifier:
|
|
|
|
|
return FakeNotifier()
|