import asyncio import contextlib import hmac import logging import os from collections.abc import AsyncIterator, Awaitable, Callable from contextlib import asynccontextmanager from typing import Any from fastapi import FastAPI, Header, HTTPException, status from prometheus_fastapi_instrumentator import Instrumentator from sqlalchemy.ext.asyncio import async_sessionmaker from payslip_ingest.db import create_engine_from_env, make_session_factory from payslip_ingest.extractor import ClaudeExtractor from payslip_ingest.paperless import PaperlessClient from payslip_ingest.processor import process_document from payslip_ingest.schema import WebhookPayload log = logging.getLogger(__name__) REQUIRED_ENV = [ "PAPERLESS_URL", "PAPERLESS_API_TOKEN", "CLAUDE_AGENT_URL", "CLAUDE_AGENT_BEARER_TOKEN", "DB_CONNECTION_STRING", "WEBHOOK_BEARER_TOKEN", ] # Type alias for the processor function — makes monkeypatching in tests explicit. ProcessorFn = Callable[ [int, async_sessionmaker[Any], PaperlessClient, ClaudeExtractor], Awaitable[Any], ] def _verify_env() -> None: missing = [k for k in REQUIRED_ENV if not os.environ.get(k)] if missing: raise RuntimeError(f"Missing required env vars: {', '.join(missing)}") def _verify_bearer(authorization: str | None, expected: str) -> None: if not expected: raise HTTPException(status_code=401, detail="Service unauthenticated") if not authorization or not authorization.startswith("Bearer "): raise HTTPException(status_code=401, detail="Missing bearer token") token = authorization.removeprefix("Bearer ") if not hmac.compare_digest(token, expected): raise HTTPException(status_code=401, detail="Invalid token") @asynccontextmanager async def lifespan(app: FastAPI) -> AsyncIterator[None]: _verify_env() engine = create_engine_from_env() session_factory = make_session_factory(engine) paperless = PaperlessClient( base_url=os.environ["PAPERLESS_URL"], api_token=os.environ["PAPERLESS_API_TOKEN"], ) extractor = ClaudeExtractor( base_url=os.environ["CLAUDE_AGENT_URL"], bearer_token=os.environ["CLAUDE_AGENT_BEARER_TOKEN"], ) queue: asyncio.Queue[int] = asyncio.Queue() processor: ProcessorFn = app.state.__dict__.get("processor_fn", process_document) async def worker() -> None: while True: doc_id = await queue.get() try: await processor(doc_id, session_factory, paperless, extractor) except Exception: log.exception("processing failed for doc_id=%s", doc_id) finally: queue.task_done() worker_task = asyncio.create_task(worker()) app.state.queue = queue app.state.session_factory = session_factory app.state.paperless = paperless app.state.extractor = extractor try: yield finally: worker_task.cancel() with contextlib.suppress(asyncio.CancelledError): await worker_task await paperless.aclose() await extractor.aclose() await engine.dispose() app = FastAPI(title="Payslip Ingest", lifespan=lifespan) Instrumentator().instrument(app).expose(app, endpoint="/metrics") @app.post("/webhook", status_code=status.HTTP_202_ACCEPTED) async def webhook( payload: WebhookPayload, authorization: str | None = Header(default=None), ) -> dict[str, Any]: _verify_bearer(authorization, os.environ.get("WEBHOOK_BEARER_TOKEN", "")) queue: asyncio.Queue[int] = app.state.queue await queue.put(payload.document_id) return {"status": "accepted", "document_id": payload.document_id} @app.get("/healthz") async def healthz() -> dict[str, Any]: queue: asyncio.Queue[int] | None = getattr(app.state, "queue", None) depth = queue.qsize() if queue is not None else 0 return {"status": "ok", "queue_depth": depth}