Extracted from /home/wizard/code monorepo into its own repo so Woodpecker CI can watch it. Identical content to /home/wizard/code commit e426028. See README.md for overview, env vars, and Paperless workflow config. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
118 lines
3.9 KiB
Python
118 lines
3.9 KiB
Python
import asyncio
|
|
import contextlib
|
|
import hmac
|
|
import logging
|
|
import os
|
|
from collections.abc import AsyncIterator, Awaitable, Callable
|
|
from contextlib import asynccontextmanager
|
|
from typing import Any
|
|
|
|
from fastapi import FastAPI, Header, HTTPException, status
|
|
from prometheus_fastapi_instrumentator import Instrumentator
|
|
from sqlalchemy.ext.asyncio import async_sessionmaker
|
|
|
|
from payslip_ingest.db import create_engine_from_env, make_session_factory
|
|
from payslip_ingest.extractor import ClaudeExtractor
|
|
from payslip_ingest.paperless import PaperlessClient
|
|
from payslip_ingest.processor import process_document
|
|
from payslip_ingest.schema import WebhookPayload
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
REQUIRED_ENV = [
|
|
"PAPERLESS_URL",
|
|
"PAPERLESS_API_TOKEN",
|
|
"CLAUDE_AGENT_URL",
|
|
"CLAUDE_AGENT_BEARER_TOKEN",
|
|
"DB_CONNECTION_STRING",
|
|
"WEBHOOK_BEARER_TOKEN",
|
|
]
|
|
|
|
# Type alias for the processor function — makes monkeypatching in tests explicit.
|
|
ProcessorFn = Callable[
|
|
[int, async_sessionmaker[Any], PaperlessClient, ClaudeExtractor],
|
|
Awaitable[Any],
|
|
]
|
|
|
|
|
|
def _verify_env() -> None:
|
|
missing = [k for k in REQUIRED_ENV if not os.environ.get(k)]
|
|
if missing:
|
|
raise RuntimeError(f"Missing required env vars: {', '.join(missing)}")
|
|
|
|
|
|
def _verify_bearer(authorization: str | None, expected: str) -> None:
|
|
if not expected:
|
|
raise HTTPException(status_code=401, detail="Service unauthenticated")
|
|
if not authorization or not authorization.startswith("Bearer "):
|
|
raise HTTPException(status_code=401, detail="Missing bearer token")
|
|
token = authorization.removeprefix("Bearer ")
|
|
if not hmac.compare_digest(token, expected):
|
|
raise HTTPException(status_code=401, detail="Invalid token")
|
|
|
|
|
|
@asynccontextmanager
|
|
async def lifespan(app: FastAPI) -> AsyncIterator[None]:
|
|
_verify_env()
|
|
|
|
engine = create_engine_from_env()
|
|
session_factory = make_session_factory(engine)
|
|
paperless = PaperlessClient(
|
|
base_url=os.environ["PAPERLESS_URL"],
|
|
api_token=os.environ["PAPERLESS_API_TOKEN"],
|
|
)
|
|
extractor = ClaudeExtractor(
|
|
base_url=os.environ["CLAUDE_AGENT_URL"],
|
|
bearer_token=os.environ["CLAUDE_AGENT_BEARER_TOKEN"],
|
|
)
|
|
queue: asyncio.Queue[int] = asyncio.Queue()
|
|
|
|
processor: ProcessorFn = app.state.__dict__.get("processor_fn", process_document)
|
|
|
|
async def worker() -> None:
|
|
while True:
|
|
doc_id = await queue.get()
|
|
try:
|
|
await processor(doc_id, session_factory, paperless, extractor)
|
|
except Exception:
|
|
log.exception("processing failed for doc_id=%s", doc_id)
|
|
finally:
|
|
queue.task_done()
|
|
|
|
worker_task = asyncio.create_task(worker())
|
|
app.state.queue = queue
|
|
app.state.session_factory = session_factory
|
|
app.state.paperless = paperless
|
|
app.state.extractor = extractor
|
|
|
|
try:
|
|
yield
|
|
finally:
|
|
worker_task.cancel()
|
|
with contextlib.suppress(asyncio.CancelledError):
|
|
await worker_task
|
|
await paperless.aclose()
|
|
await extractor.aclose()
|
|
await engine.dispose()
|
|
|
|
|
|
app = FastAPI(title="Payslip Ingest", lifespan=lifespan)
|
|
Instrumentator().instrument(app).expose(app, endpoint="/metrics")
|
|
|
|
|
|
@app.post("/webhook", status_code=status.HTTP_202_ACCEPTED)
|
|
async def webhook(
|
|
payload: WebhookPayload,
|
|
authorization: str | None = Header(default=None),
|
|
) -> dict[str, Any]:
|
|
_verify_bearer(authorization, os.environ.get("WEBHOOK_BEARER_TOKEN", ""))
|
|
queue: asyncio.Queue[int] = app.state.queue
|
|
await queue.put(payload.document_id)
|
|
return {"status": "accepted", "document_id": payload.document_id}
|
|
|
|
|
|
@app.get("/healthz")
|
|
async def healthz() -> dict[str, Any]:
|
|
queue: asyncio.Queue[int] | None = getattr(app.state, "queue", None)
|
|
depth = queue.qsize() if queue is not None else 0
|
|
return {"status": "ok", "queue_depth": depth}
|