conversational: add no-tools multi-turn Brain endpoint for portal-assistant
The portal-assistant voice gateway needs a Claude that is conversational, free (on the cluster subscription, no metered API), and safe to sit behind a public edge. Add POST /v1/conversational: it drives a new no-tools `conversational` agent with per-conversation --resume so a voice turn keeps context, and is lean on purpose — no workspace clone, no tools, and crucially NO --dangerously-skip-permissions (so even a leaked agent can't execute anything). This is deliberately NOT /v1/chat/completions, which clones the git-crypt infra repo and runs a Bash-enabled agent per turn (portal-assistant ADR-0002). The conversational agent replies in the speaker's language (Bulgarian/English), short and TTS-friendly. Tests cover the argv builder (new vs resume), the happy path, multi-turn resume across calls, auth, and failure → 503. Full suite green. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
parent
e34640cc47
commit
33ff0868c3
4 changed files with 367 additions and 0 deletions
98
app/conversational.py
Normal file
98
app/conversational.py
Normal file
|
|
@ -0,0 +1,98 @@
|
|||
"""Conversational Brain — drives the Claude CLI for the portal-assistant gateway.
|
||||
|
||||
A lean, no-tools, multi-turn path (portal-assistant ADR-0002): no workspace clone,
|
||||
no tool-enabled agent, and NO --dangerously-skip-permissions. Per-conversation
|
||||
continuity comes from the Claude CLI's own --session-id / --resume, so the gateway
|
||||
only has to hand us a stable session id per conversation.
|
||||
"""
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
from subprocess import PIPE
|
||||
|
||||
CONVERSATIONAL_AGENT = "conversational"
|
||||
# A spoken chat turn is short; a turn that runs longer than this is wedged.
|
||||
CONVERSATIONAL_TIMEOUT_SECONDS = int(
|
||||
os.environ.get("CONVERSATIONAL_TIMEOUT_SECONDS", "120")
|
||||
)
|
||||
|
||||
# Session ids the Claude CLI has already opened in THIS process, so a follow-up
|
||||
# turn resumes instead of re-opening. In-memory + single-replica: a pod restart
|
||||
# clears this AND the CLI's emptyDir session state together, so they stay in sync.
|
||||
_started: set[str] = set()
|
||||
|
||||
|
||||
def reset_started() -> None:
|
||||
"""Forget all opened sessions (used by tests)."""
|
||||
_started.clear()
|
||||
|
||||
|
||||
def conversational_argv(
|
||||
session_id: str, message: str, model: str, resume: bool
|
||||
) -> list[str]:
|
||||
"""Build the argv for one conversational turn.
|
||||
|
||||
A new conversation opens the session with --session-id; subsequent turns
|
||||
continue it with --resume so Claude keeps its own context. We never pass
|
||||
--dangerously-skip-permissions: the conversational agent has no tools and the
|
||||
endpoint is public-facing, so nothing may be auto-permitted.
|
||||
"""
|
||||
argv = [
|
||||
"claude", "-p",
|
||||
"--agent", CONVERSATIONAL_AGENT,
|
||||
"--output-format", "json",
|
||||
"--model", model,
|
||||
]
|
||||
argv += ["--resume", session_id] if resume else ["--session-id", session_id]
|
||||
argv.append(message)
|
||||
return argv
|
||||
|
||||
|
||||
def extract_reply(output_lines: list[str]) -> str:
|
||||
"""Pull the final assistant text out of `claude -p --output-format json`.
|
||||
|
||||
The CLI emits one JSON object with the final message under `result`; fall
|
||||
back to the raw text if it isn't parseable so callers always get something.
|
||||
"""
|
||||
raw = "".join(output_lines).strip()
|
||||
if not raw:
|
||||
return ""
|
||||
try:
|
||||
parsed = json.loads(raw)
|
||||
except json.JSONDecodeError:
|
||||
return raw
|
||||
if isinstance(parsed, dict):
|
||||
for key in ("result", "content", "text"):
|
||||
value = parsed.get(key)
|
||||
if isinstance(value, str) and value:
|
||||
return value
|
||||
return raw
|
||||
|
||||
|
||||
async def run_turn(session_id: str, message: str, model: str) -> dict:
|
||||
"""Run one conversational turn and return {exit_code, reply, stderr}.
|
||||
|
||||
Resumes the Claude session if we've opened it before; otherwise opens it.
|
||||
The session is only marked opened on success so a failed first turn can be
|
||||
retried cleanly as a new one.
|
||||
"""
|
||||
resume = session_id in _started
|
||||
argv = conversational_argv(session_id, message, model, resume)
|
||||
|
||||
proc = await asyncio.create_subprocess_exec(*argv, stdout=PIPE, stderr=PIPE)
|
||||
assert proc.stdout is not None and proc.stderr is not None
|
||||
|
||||
output_lines: list[str] = []
|
||||
async for line in proc.stdout:
|
||||
output_lines.append(line.decode(errors="replace"))
|
||||
stderr = await proc.stderr.read()
|
||||
await proc.wait()
|
||||
|
||||
if proc.returncode == 0:
|
||||
_started.add(session_id)
|
||||
|
||||
return {
|
||||
"exit_code": proc.returncode,
|
||||
"reply": extract_reply(output_lines),
|
||||
"stderr": stderr.decode(errors="replace"),
|
||||
}
|
||||
64
app/main.py
64
app/main.py
|
|
@ -13,6 +13,8 @@ from fastapi import FastAPI, HTTPException, Header
|
|||
from fastapi.responses import JSONResponse
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from app import conversational
|
||||
|
||||
app = FastAPI(title="Claude Agent Service")
|
||||
|
||||
API_TOKEN = os.environ.get("API_BEARER_TOKEN", "")
|
||||
|
|
@ -104,6 +106,15 @@ class ChatCompletionsRequest(BaseModel):
|
|||
model_config = {"extra": "allow"}
|
||||
|
||||
|
||||
class ConversationalRequest(BaseModel):
|
||||
# The portal-assistant gateway owns the conversation; it hands us a stable
|
||||
# session id (for Claude --resume) plus the next user message. Model is
|
||||
# selectable per request, same as the OpenAI-compat path.
|
||||
session_id: str
|
||||
message: str
|
||||
model: str | None = None
|
||||
|
||||
|
||||
def verify_token(authorization: str | None):
|
||||
# Reject everything when the service is unconfigured. compare_digest("", "")
|
||||
# returns True, so without this guard an empty API_TOKEN would happily
|
||||
|
|
@ -510,3 +521,56 @@ async def chat_completions(
|
|||
"total_tokens": 0,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@app.post("/v1/conversational")
|
||||
async def conversational_turn(
|
||||
request: ConversationalRequest,
|
||||
authorization: str | None = Header(default=None),
|
||||
):
|
||||
"""Lean, multi-turn conversational Brain for the portal-assistant gateway.
|
||||
|
||||
Drives a no-tools conversational agent with per-conversation --resume — no
|
||||
workspace clone, no tools (see portal-assistant ADR-0002). Returns the
|
||||
assistant's reply text keyed to the caller's session id.
|
||||
"""
|
||||
verify_token(authorization)
|
||||
|
||||
model = request.model if request.model is not None else DEFAULT_MODEL
|
||||
if model not in SUPPORTED_MODELS:
|
||||
return JSONResponse(
|
||||
status_code=400,
|
||||
content={"error": "unsupported model", "supported": sorted(SUPPORTED_MODELS)},
|
||||
)
|
||||
|
||||
if not _reserve_queue_slot():
|
||||
return JSONResponse(
|
||||
status_code=503,
|
||||
content={"error": "execution failed", "detail": "queue full"},
|
||||
)
|
||||
|
||||
try:
|
||||
async with _execution_slot():
|
||||
result = await asyncio.wait_for(
|
||||
conversational.run_turn(request.session_id, request.message, model),
|
||||
timeout=conversational.CONVERSATIONAL_TIMEOUT_SECONDS,
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
return JSONResponse(
|
||||
status_code=503,
|
||||
content={"error": "execution failed", "detail": "agent timed out"},
|
||||
)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
return JSONResponse(
|
||||
status_code=503,
|
||||
content={"error": "execution failed", "detail": _one_line(str(exc))},
|
||||
)
|
||||
|
||||
if result["exit_code"] != 0:
|
||||
detail = _one_line(result.get("stderr") or "") or f"exit {result['exit_code']}"
|
||||
return JSONResponse(
|
||||
status_code=503,
|
||||
content={"error": "execution failed", "detail": detail},
|
||||
)
|
||||
|
||||
return {"session_id": request.session_id, "reply": result["reply"]}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue