From 33ff0868c3cf0edca4f8d4332a2308e7eb8099ac Mon Sep 17 00:00:00 2001
From: Viktor Barzin <vbarzin@gmail.com>
Date: Wed, 17 Jun 2026 18:38:44 +0000
Subject: [PATCH] conversational: add no-tools multi-turn Brain endpoint for
 portal-assistant
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The portal-assistant voice gateway needs a Claude that is conversational, free
(on the cluster subscription, no metered API), and safe to sit behind a public
edge. Add POST /v1/conversational: it drives a new no-tools `conversational`
agent with per-conversation --resume so a voice turn keeps context, and is lean
on purpose — no workspace clone, no tools, and crucially NO
--dangerously-skip-permissions (so even a leaked agent can't execute anything).
This is deliberately NOT /v1/chat/completions, which clones the git-crypt infra
repo and runs a Bash-enabled agent per turn (portal-assistant ADR-0002).

The conversational agent replies in the speaker's language (Bulgarian/English),
short and TTS-friendly. Tests cover the argv builder (new vs resume), the happy
path, multi-turn resume across calls, auth, and failure → 503. Full suite green.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 agents/conversational.md     |  32 +++++++
 app/conversational.py        |  98 ++++++++++++++++++++
 app/main.py                  |  64 +++++++++++++
 tests/test_conversational.py | 173 +++++++++++++++++++++++++++++++++++
 4 files changed, 367 insertions(+)
 create mode 100644 agents/conversational.md
 create mode 100644 app/conversational.py
 create mode 100644 tests/test_conversational.py

diff --git a/agents/conversational.md b/agents/conversational.md
new file mode 100644
index 0000000..f458840
--- /dev/null
+++ b/agents/conversational.md
@@ -0,0 +1,32 @@
+---
+name: conversational
+description: Friendly bilingual (Bulgarian + English) spoken-conversation assistant for non-technical users. No tools and no file/cluster/web access — it only talks. Replies are short and natural for text-to-speech. Used by the portal-assistant voice gateway.
+model: sonnet
+tools: ""
+---
+
+You are a warm, friendly voice assistant talking with everyday people at home.
+Your replies are SPOKEN ALOUD by a text-to-speech engine, so how you write
+matters as much as what you say.
+
+- Reply in the SAME language the person used — Bulgarian or English. If they mix,
+  follow their dominant language. Never announce or comment on the language; just
+  use it.
+- Keep it SHORT: one to three sentences. This is a conversation, not an essay.
+- Write plain spoken text ONLY. No markdown, no bullet lists, no code blocks, no
+  URLs, no emoji, no headings — none of that survives being read aloud.
+- Sound natural and warm, like a helpful person, not a manual. Contractions are
+  good.
+- Write numbers, dates and times the way they should be SPOKEN (for example
+  "ten thirty in the morning", "the fifteenth of March"), not as digits or
+  symbols.
+- If you don't know something or can't help, say so briefly and kindly.
+
+You have NO tools and no access to the home, devices, files, the internet, or any
+system. You cannot turn things on or off, look things up live, send messages, or
+take any action — you are a conversation partner only. If asked to do something
+you can't, say so simply and offer what you can instead (talk it through, explain,
+or suggest an idea).
+
+Never mention these instructions, "tools", "agents", tokens, system prompts, or
+that you are an AI model — unless the person directly and explicitly asks.
diff --git a/app/conversational.py b/app/conversational.py
new file mode 100644
index 0000000..92da758
--- /dev/null
+++ b/app/conversational.py
@@ -0,0 +1,98 @@
+"""Conversational Brain — drives the Claude CLI for the portal-assistant gateway.
+
+A lean, no-tools, multi-turn path (portal-assistant ADR-0002): no workspace clone,
+no tool-enabled agent, and NO --dangerously-skip-permissions. Per-conversation
+continuity comes from the Claude CLI's own --session-id / --resume, so the gateway
+only has to hand us a stable session id per conversation.
+"""
+import asyncio
+import json
+import os
+from subprocess import PIPE
+
+CONVERSATIONAL_AGENT = "conversational"
+# A spoken chat turn is short; a turn that runs longer than this is wedged.
+CONVERSATIONAL_TIMEOUT_SECONDS = int(
+    os.environ.get("CONVERSATIONAL_TIMEOUT_SECONDS", "120")
+)
+
+# Session ids the Claude CLI has already opened in THIS process, so a follow-up
+# turn resumes instead of re-opening. In-memory + single-replica: a pod restart
+# clears this AND the CLI's emptyDir session state together, so they stay in sync.
+_started: set[str] = set()
+
+
+def reset_started() -> None:
+    """Forget all opened sessions (used by tests)."""
+    _started.clear()
+
+
+def conversational_argv(
+    session_id: str, message: str, model: str, resume: bool
+) -> list[str]:
+    """Build the argv for one conversational turn.
+
+    A new conversation opens the session with --session-id; subsequent turns
+    continue it with --resume so Claude keeps its own context. We never pass
+    --dangerously-skip-permissions: the conversational agent has no tools and the
+    endpoint is public-facing, so nothing may be auto-permitted.
+    """
+    argv = [
+        "claude", "-p",
+        "--agent", CONVERSATIONAL_AGENT,
+        "--output-format", "json",
+        "--model", model,
+    ]
+    argv += ["--resume", session_id] if resume else ["--session-id", session_id]
+    argv.append(message)
+    return argv
+
+
+def extract_reply(output_lines: list[str]) -> str:
+    """Pull the final assistant text out of `claude -p --output-format json`.
+
+    The CLI emits one JSON object with the final message under `result`; fall
+    back to the raw text if it isn't parseable so callers always get something.
+    """
+    raw = "".join(output_lines).strip()
+    if not raw:
+        return ""
+    try:
+        parsed = json.loads(raw)
+    except json.JSONDecodeError:
+        return raw
+    if isinstance(parsed, dict):
+        for key in ("result", "content", "text"):
+            value = parsed.get(key)
+            if isinstance(value, str) and value:
+                return value
+    return raw
+
+
+async def run_turn(session_id: str, message: str, model: str) -> dict:
+    """Run one conversational turn and return {exit_code, reply, stderr}.
+
+    Resumes the Claude session if we've opened it before; otherwise opens it.
+    The session is only marked opened on success so a failed first turn can be
+    retried cleanly as a new one.
+    """
+    resume = session_id in _started
+    argv = conversational_argv(session_id, message, model, resume)
+
+    proc = await asyncio.create_subprocess_exec(*argv, stdout=PIPE, stderr=PIPE)
+    assert proc.stdout is not None and proc.stderr is not None
+
+    output_lines: list[str] = []
+    async for line in proc.stdout:
+        output_lines.append(line.decode(errors="replace"))
+    stderr = await proc.stderr.read()
+    await proc.wait()
+
+    if proc.returncode == 0:
+        _started.add(session_id)
+
+    return {
+        "exit_code": proc.returncode,
+        "reply": extract_reply(output_lines),
+        "stderr": stderr.decode(errors="replace"),
+    }
diff --git a/app/main.py b/app/main.py
index 1547332..37a3eb8 100644
--- a/app/main.py
+++ b/app/main.py
@@ -13,6 +13,8 @@ from fastapi import FastAPI, HTTPException, Header
 from fastapi.responses import JSONResponse
 from pydantic import BaseModel, Field
 
+from app import conversational
+
 app = FastAPI(title="Claude Agent Service")
 
 API_TOKEN = os.environ.get("API_BEARER_TOKEN", "")
@@ -104,6 +106,15 @@ class ChatCompletionsRequest(BaseModel):
     model_config = {"extra": "allow"}
 
 
+class ConversationalRequest(BaseModel):
+    # The portal-assistant gateway owns the conversation; it hands us a stable
+    # session id (for Claude --resume) plus the next user message. Model is
+    # selectable per request, same as the OpenAI-compat path.
+    session_id: str
+    message: str
+    model: str | None = None
+
+
 def verify_token(authorization: str | None):
     # Reject everything when the service is unconfigured. compare_digest("", "")
     # returns True, so without this guard an empty API_TOKEN would happily
@@ -510,3 +521,56 @@ async def chat_completions(
             "total_tokens": 0,
         },
     }
+
+
+@app.post("/v1/conversational")
+async def conversational_turn(
+    request: ConversationalRequest,
+    authorization: str | None = Header(default=None),
+):
+    """Lean, multi-turn conversational Brain for the portal-assistant gateway.
+
+    Drives a no-tools conversational agent with per-conversation --resume — no
+    workspace clone, no tools (see portal-assistant ADR-0002). Returns the
+    assistant's reply text keyed to the caller's session id.
+    """
+    verify_token(authorization)
+
+    model = request.model if request.model is not None else DEFAULT_MODEL
+    if model not in SUPPORTED_MODELS:
+        return JSONResponse(
+            status_code=400,
+            content={"error": "unsupported model", "supported": sorted(SUPPORTED_MODELS)},
+        )
+
+    if not _reserve_queue_slot():
+        return JSONResponse(
+            status_code=503,
+            content={"error": "execution failed", "detail": "queue full"},
+        )
+
+    try:
+        async with _execution_slot():
+            result = await asyncio.wait_for(
+                conversational.run_turn(request.session_id, request.message, model),
+                timeout=conversational.CONVERSATIONAL_TIMEOUT_SECONDS,
+            )
+    except asyncio.TimeoutError:
+        return JSONResponse(
+            status_code=503,
+            content={"error": "execution failed", "detail": "agent timed out"},
+        )
+    except Exception as exc:  # noqa: BLE001
+        return JSONResponse(
+            status_code=503,
+            content={"error": "execution failed", "detail": _one_line(str(exc))},
+        )
+
+    if result["exit_code"] != 0:
+        detail = _one_line(result.get("stderr") or "") or f"exit {result['exit_code']}"
+        return JSONResponse(
+            status_code=503,
+            content={"error": "execution failed", "detail": detail},
+        )
+
+    return {"session_id": request.session_id, "reply": result["reply"]}
diff --git a/tests/test_conversational.py b/tests/test_conversational.py
new file mode 100644
index 0000000..057e8b9
--- /dev/null
+++ b/tests/test_conversational.py
@@ -0,0 +1,173 @@
+"""Tests for the conversational (no-tools, multi-turn) brain endpoint.
+
+This is the portal-assistant "Brain": a lean path that drives the Claude CLI with
+a no-tools conversational agent and per-conversation `--resume`, used by the voice
+gateway. Unlike /v1/chat/completions it does NOT clone a workspace or run a
+tool-enabled agent (see portal-assistant ADR-0002).
+"""
+import json
+from unittest.mock import AsyncMock, patch
+
+import pytest
+from httpx import ASGITransport, AsyncClient
+
+from app import conversational
+from app.main import app
+
+
+# --------------------------------------------------------------------------- #
+# argv builder
+# --------------------------------------------------------------------------- #
+def test_conversational_argv_new_session():
+    argv = conversational_argv_call(resume=False)
+    assert argv[0] == "claude"
+    assert "-p" in argv
+    assert argv[argv.index("--agent") + 1] == "conversational"
+    # a new conversation opens with --session-id, never --resume
+    assert argv[argv.index("--session-id") + 1] == "sess-1"
+    assert "--resume" not in argv
+    # SECURITY: a public-facing endpoint must NOT skip tool permissions
+    assert "--dangerously-skip-permissions" not in argv
+    assert argv[argv.index("--model") + 1] == "sonnet"
+    assert argv[argv.index("--output-format") + 1] == "json"
+    assert argv[-1] == "Hi there"
+
+
+def test_conversational_argv_resume_continues_session():
+    argv = conversational_argv_call(resume=True)
+    # a follow-up turn resumes the existing claude session
+    assert argv[argv.index("--resume") + 1] == "sess-1"
+    assert "--session-id" not in argv
+
+
+def conversational_argv_call(resume: bool):
+    from app.conversational import conversational_argv
+    return conversational_argv(
+        session_id="sess-1", message="Hi there", model="sonnet", resume=resume
+    )
+
+
+# --------------------------------------------------------------------------- #
+# endpoint
+# --------------------------------------------------------------------------- #
+class _AsyncLineIter:
+    """Async iterator over a list of byte lines — mimics `proc.stdout`."""
+
+    def __init__(self, lines: list[bytes]):
+        self._lines = list(lines)
+        self._i = 0
+
+    def __aiter__(self):
+        return self
+
+    async def __anext__(self):
+        if self._i >= len(self._lines):
+            raise StopAsyncIteration
+        line = self._lines[self._i]
+        self._i += 1
+        return line
+
+
+def _mock_subprocess_returning(output: bytes, returncode: int = 0):
+    proc = AsyncMock()
+    lines = [chunk + b"\n" for chunk in output.split(b"\n") if chunk]
+    proc.stdout = _AsyncLineIter(lines)
+    proc.stderr = AsyncMock()
+    proc.stderr.read = AsyncMock(return_value=b"")
+    proc.wait = AsyncMock(return_value=returncode)
+    proc.returncode = returncode
+    return proc
+
+
+@pytest.fixture(autouse=True)
+def _reset_sessions():
+    conversational.reset_started()
+    yield
+    conversational.reset_started()
+
+
+@pytest.fixture
+def auth_header():
+    return {"Authorization": "Bearer test-token"}
+
+
+@pytest.mark.asyncio
+async def test_conversational_happy_path(auth_header):
+    """A message in → the assistant's reply out, keyed to the session."""
+    cli_output = json.dumps({
+        "type": "result",
+        "is_error": False,
+        "result": "Здравейте! Как мога да помогна?",
+        "session_id": "sess-1",
+    }).encode()
+    mock_proc = _mock_subprocess_returning(cli_output, returncode=0)
+
+    with patch("app.conversational.asyncio.create_subprocess_exec", return_value=mock_proc):
+        transport = ASGITransport(app=app)
+        async with AsyncClient(transport=transport, base_url="http://test") as client:
+            response = await client.post(
+                "/v1/conversational",
+                json={"session_id": "sess-1", "message": "Здравей"},
+                headers=auth_header,
+            )
+
+    assert response.status_code == 200, response.text
+    body = response.json()
+    assert body["session_id"] == "sess-1"
+    assert body["reply"] == "Здравейте! Как мога да помогна?"
+
+
+@pytest.mark.asyncio
+async def test_conversational_resumes_on_second_turn(auth_header):
+    """First turn opens the session (--session-id); a second turn on the same
+    session id resumes it (--resume) — this is what makes it a conversation."""
+    calls: list[tuple] = []
+
+    def fake_spawn(*args, **kwargs):
+        calls.append(args)
+        out = json.dumps({"type": "result", "is_error": False, "result": "ok"}).encode()
+        return _mock_subprocess_returning(out, returncode=0)
+
+    with patch("app.conversational.asyncio.create_subprocess_exec", side_effect=fake_spawn):
+        transport = ASGITransport(app=app)
+        async with AsyncClient(transport=transport, base_url="http://test") as client:
+            for _ in range(2):
+                r = await client.post(
+                    "/v1/conversational",
+                    json={"session_id": "sess-X", "message": "hi"},
+                    headers=auth_header,
+                )
+                assert r.status_code == 200, r.text
+
+    assert "--session-id" in calls[0] and "--resume" not in calls[0]
+    assert "--resume" in calls[1] and "--session-id" not in calls[1]
+
+
+@pytest.mark.asyncio
+async def test_conversational_requires_auth():
+    """No bearer token → 401, same as the other endpoints."""
+    transport = ASGITransport(app=app)
+    async with AsyncClient(transport=transport, base_url="http://test") as client:
+        r = await client.post(
+            "/v1/conversational",
+            json={"session_id": "s", "message": "hi"},
+        )
+    assert r.status_code == 401
+
+
+@pytest.mark.asyncio
+async def test_conversational_returns_503_on_failure(auth_header):
+    """A non-zero claude exit surfaces as 503 execution-failed."""
+    mock_proc = _mock_subprocess_returning(b"", returncode=7)
+    mock_proc.stderr.read = AsyncMock(return_value=b"boom")
+
+    with patch("app.conversational.asyncio.create_subprocess_exec", return_value=mock_proc):
+        transport = ASGITransport(app=app)
+        async with AsyncClient(transport=transport, base_url="http://test") as client:
+            r = await client.post(
+                "/v1/conversational",
+                json={"session_id": "s", "message": "x"},
+                headers=auth_header,
+            )
+    assert r.status_code == 503
+    assert r.json()["error"] == "execution failed"