claude-agent-service/app/breakglass/agent_session.py

"""Drive the breakglass Claude agent and stream its work to the browser.

Each chat turn runs ``claude -p --output-format stream-json`` in the session's
persistent workspace; the first turn opens the session with ``--session-id`` and
later turns ``--resume`` it, so the conversation has memory across turns. The
CLI's JSON events are translated to a small, stable SSE vocabulary the UI
renders (``session`` / ``text`` / ``tool`` / ``result`` / ``error``) — we do not
leak the raw event firehose to the client.

Subprocesses use ``asyncio.create_subprocess_exec`` (list argv, no shell): the
prompt and ids are argv elements, never interpreted by a shell.
"""
import asyncio
import json
import os
from subprocess import PIPE
from typing import AsyncIterator

from . import config

# Sessions we've already opened (so the next turn resumes instead of re-creating).
_started: set[str] = set()


def _turn_argv(session_id: str, prompt: str, resume: bool, model: str) -> list[str]:
    argv = [
        "claude", "-p",
        "--agent", config.BREAKGLASS_AGENT,
        "--dangerously-skip-permissions",
        "--output-format", "stream-json",
        "--verbose",                      # required for stream-json output
        "--model", model,
    ]
    # --session-id opens a brand-new session with that id; --resume continues it.
    argv += (["--resume", session_id] if resume else ["--session-id", session_id])
    argv.append(prompt)
    return argv


def translate_event(obj: dict) -> dict | None:
    """Map one raw stream-json event to a UI event, or None to drop it.

    Pure function — the unit tests pin this contract. Keeps the noisy
    hook/thinking-token/system chatter off the wire and exposes only what an
    operator watching a recovery needs: which session, assistant prose, which
    tools ran, and the final result.
    """
    etype = obj.get("type")

    if etype == "system":
        if obj.get("subtype") == "init":
            return {"kind": "session", "session_id": obj.get("session_id", "")}
        return None  # hook_started/hook_response/thinking_tokens/etc. — noise

    if etype == "assistant":
        events: list[dict] = []
        for block in obj.get("message", {}).get("content", []) or []:
            btype = block.get("type")
            if btype == "text" and block.get("text"):
                events.append({"kind": "text", "text": block["text"]})
            elif btype == "tool_use":
                events.append({
                    "kind": "tool",
                    "name": block.get("name", ""),
                    "input": block.get("input", {}),
                })
        if not events:
            return None
        # The server flattens a "batch" into individual SSE frames.
        return events[0] if len(events) == 1 else {"kind": "batch", "events": events}

    if etype == "result":
        return {
            "kind": "result",
            "is_error": bool(obj.get("is_error")),
            "result": obj.get("result", ""),
            "duration_ms": obj.get("duration_ms"),
        }

    return None


async def run_turn(
    session_id: str, prompt: str, model: str | None = None
) -> AsyncIterator[dict]:
    """Run one chat turn, yielding translated UI events as they arrive."""
    resume = session_id in _started
    model = model or config.DEFAULT_MODEL
    workspace = os.path.join(config.SESSIONS_DIR, session_id)
    os.makedirs(workspace, exist_ok=True)

    argv = _turn_argv(session_id, prompt, resume, model)
    proc = await asyncio.create_subprocess_exec(
        *argv, cwd=workspace, stdout=PIPE, stderr=PIPE,
    )
    _started.add(session_id)
    assert proc.stdout is not None and proc.stderr is not None

    try:
        async def _pump() -> AsyncIterator[dict]:
            async for raw in proc.stdout:
                line = raw.decode(errors="replace").strip()
                if not line:
                    continue
                try:
                    obj = json.loads(line)
                except json.JSONDecodeError:
                    continue
                ev = translate_event(obj)
                if ev is None:
                    continue
                if ev.get("kind") == "batch":
                    for sub in ev["events"]:
                        yield sub
                else:
                    yield ev

        async for ev in _with_timeout(_pump(), config.TURN_TIMEOUT_SECONDS):
            yield ev
    except asyncio.TimeoutError:
        proc.kill()
        await proc.wait()
        yield {"kind": "error", "error": f"turn timed out after {config.TURN_TIMEOUT_SECONDS}s"}
        return

    await proc.wait()
    if proc.returncode not in (0, None):
        err = (await proc.stderr.read()).decode(errors="replace")
        yield {"kind": "error", "error": err.strip()[:500] or f"exit {proc.returncode}"}


async def _with_timeout(agen: AsyncIterator[dict], timeout: float) -> AsyncIterator[dict]:
    """Yield from an async generator but raise TimeoutError if the WHOLE turn
    exceeds ``timeout`` seconds (a wedged agent shouldn't stream forever)."""
    loop = asyncio.get_event_loop()
    deadline = loop.time() + timeout
    it = agen.__aiter__()
    while True:
        remaining = deadline - loop.time()
        if remaining <= 0:
            raise asyncio.TimeoutError
        try:
            yield await asyncio.wait_for(it.__anext__(), timeout=remaining)
        except StopAsyncIteration:
            return
breakglass: in-cluster emergency-recovery UI for the devvm Viktor wanted a web UI on the claude service to act as his breakglass when the devvm is down: open it, have Claude SSH in to diagnose/repair, and power-cycle the VM via the Proxmox host if needed. This is the app half (the infra stack + host bootstrap live in the infra repo). New, ISOLATED ASGI app under app/breakglass/ (never imports app.main, so the untrusted-input agents — recruiter-triage, nextcloud-todos — can't share a process with the root-on-devvm / PVE-reset SSH key): - pve.py: the LLM-independent power-verb path (status\|forensics\|reset\|stop\| start\|cycle on VM 102), whitelist-validated client-side, executed over the forced-command SSH key (list argv, no shell). - agent_session.py: multi-turn streamed chat — claude -p --session-id / --resume with --output-format stream-json, translated to a small SSE vocabulary (session/text/tool/result/error/done). - auth.py: edge Authentik header OR bearer; fail-closed. - server.py: FastAPI (session/chat-SSE/pve-verb routes) + serves the Svelte UI. - Svelte SPA (frontend/, built into app/breakglass/static/ and committed — no in-cluster build, per ADR-0002): streamed chat + danger-styled manual VM controls with confirm-on-mutate. - agents/breakglass.md: narrow tools (Bash/Read/Grep/Glob, no web), taught the ssh devvm / ssh pve aliases and cycle-vs-reset. - docker-entrypoint-breakglass.sh: ssh-agent bootstrap from the mounted key + ssh aliases, then uvicorn app.breakglass.server. The breakglass Deployment overrides the image CMD with this; the existing service is untouched. 26 new tests (verb whitelist incl. injection attempts, stream-json→SSE translation, auth gating, route behaviour); full suite 58 green. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com> 2026-06-12 21:36:05 +00:00			`"""Drive the breakglass Claude agent and stream its work to the browser.`

			Each chat turn runs ``claude -p --output-format stream-json`` in the session's
			persistent workspace; the first turn opens the session with ``--session-id`` and
			later turns ``--resume`` it, so the conversation has memory across turns. The
			`CLI's JSON events are translated to a small, stable SSE vocabulary the UI`
			renders (``session`` / ``text`` / ``tool`` / ``result`` / ``error``) — we do not
			`leak the raw event firehose to the client.`

			Subprocesses use ``asyncio.create_subprocess_exec`` (list argv, no shell): the
			`prompt and ids are argv elements, never interpreted by a shell.`
			`"""`
			`import asyncio`
			`import json`
			`import os`
			`from subprocess import PIPE`
			`from typing import AsyncIterator`

			`from . import config`

			`# Sessions we've already opened (so the next turn resumes instead of re-creating).`
			`_started: set[str] = set()`


			`def _turn_argv(session_id: str, prompt: str, resume: bool, model: str) -> list[str]:`
			`argv = [`
			`"claude", "-p",`
			`"--agent", config.BREAKGLASS_AGENT,`
			`"--dangerously-skip-permissions",`
			`"--output-format", "stream-json",`
			`"--verbose", # required for stream-json output`
			`"--model", model,`
			`]`
			`# --session-id opens a brand-new session with that id; --resume continues it.`
			`argv += (["--resume", session_id] if resume else ["--session-id", session_id])`
			`argv.append(prompt)`
			`return argv`


			`def translate_event(obj: dict) -> dict \| None:`
			`"""Map one raw stream-json event to a UI event, or None to drop it.`

			`Pure function — the unit tests pin this contract. Keeps the noisy`
			`hook/thinking-token/system chatter off the wire and exposes only what an`
			`operator watching a recovery needs: which session, assistant prose, which`
			`tools ran, and the final result.`
			`"""`
			`etype = obj.get("type")`

			`if etype == "system":`
			`if obj.get("subtype") == "init":`
			`return {"kind": "session", "session_id": obj.get("session_id", "")}`
			`return None # hook_started/hook_response/thinking_tokens/etc. — noise`

			`if etype == "assistant":`
			`events: list[dict] = []`
			`for block in obj.get("message", {}).get("content", []) or []:`
			`btype = block.get("type")`
			`if btype == "text" and block.get("text"):`
			`events.append({"kind": "text", "text": block["text"]})`
			`elif btype == "tool_use":`
			`events.append({`
			`"kind": "tool",`
			`"name": block.get("name", ""),`
			`"input": block.get("input", {}),`
			`})`
			`if not events:`
			`return None`
			`# The server flattens a "batch" into individual SSE frames.`
			`return events[0] if len(events) == 1 else {"kind": "batch", "events": events}`

			`if etype == "result":`
			`return {`
			`"kind": "result",`
			`"is_error": bool(obj.get("is_error")),`
			`"result": obj.get("result", ""),`
			`"duration_ms": obj.get("duration_ms"),`
			`}`

			`return None`


			`async def run_turn(`
			`session_id: str, prompt: str, model: str \| None = None`
			`) -> AsyncIterator[dict]:`
			`"""Run one chat turn, yielding translated UI events as they arrive."""`
			`resume = session_id in _started`
			`model = model or config.DEFAULT_MODEL`
			`workspace = os.path.join(config.SESSIONS_DIR, session_id)`
			`os.makedirs(workspace, exist_ok=True)`

			`argv = _turn_argv(session_id, prompt, resume, model)`
			`proc = await asyncio.create_subprocess_exec(`
			`*argv, cwd=workspace, stdout=PIPE, stderr=PIPE,`
			`)`
			`_started.add(session_id)`
			`assert proc.stdout is not None and proc.stderr is not None`

			`try:`
			`async def _pump() -> AsyncIterator[dict]:`
			`async for raw in proc.stdout:`
			`line = raw.decode(errors="replace").strip()`
			`if not line:`
			`continue`
			`try:`
			`obj = json.loads(line)`
			`except json.JSONDecodeError:`
			`continue`
			`ev = translate_event(obj)`
			`if ev is None:`
			`continue`
			`if ev.get("kind") == "batch":`
			`for sub in ev["events"]:`
			`yield sub`
			`else:`
			`yield ev`

			`async for ev in _with_timeout(_pump(), config.TURN_TIMEOUT_SECONDS):`
			`yield ev`
			`except asyncio.TimeoutError:`
			`proc.kill()`
			`await proc.wait()`
			`yield {"kind": "error", "error": f"turn timed out after {config.TURN_TIMEOUT_SECONDS}s"}`
			`return`

			`await proc.wait()`
			`if proc.returncode not in (0, None):`
			`err = (await proc.stderr.read()).decode(errors="replace")`
			`yield {"kind": "error", "error": err.strip()[:500] or f"exit {proc.returncode}"}`


			`async def _with_timeout(agen: AsyncIterator[dict], timeout: float) -> AsyncIterator[dict]:`
			`"""Yield from an async generator but raise TimeoutError if the WHOLE turn`
			exceeds ``timeout`` seconds (a wedged agent shouldn't stream forever)."""
			`loop = asyncio.get_event_loop()`
			`deadline = loop.time() + timeout`
			`it = agen.__aiter__()`
			`while True:`
			`remaining = deadline - loop.time()`
			`if remaining <= 0:`
			`raise asyncio.TimeoutError`
			`try:`
			`yield await asyncio.wait_for(it.__anext__(), timeout=remaining)`
			`except StopAsyncIteration:`
			`return`