Initial commit: event-driven UK payslip ingest service

Extracted from /home/wizard/code monorepo into its own repo so Woodpecker CI
can watch it. Identical content to /home/wizard/code commit e426028.

See README.md for overview, env vars, and Paperless workflow config.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Viktor Barzin 2026-04-18 22:10:23 +00:00
commit 57484619c1
27 changed files with 2878 additions and 0 deletions

111
tests/test_webhook.py Normal file
View file

@ -0,0 +1,111 @@
import asyncio
import contextlib
import os
from collections.abc import AsyncIterator, Iterator
from contextlib import asynccontextmanager
import pytest
from fastapi import FastAPI, Header, HTTPException, status
from fastapi.testclient import TestClient
from payslip_ingest.app import _verify_bearer
from payslip_ingest.schema import WebhookPayload
def _build_app() -> tuple[FastAPI, list[int]]:
"""Build a minimal FastAPI app that mirrors the real /webhook behaviour.
Mirroring rather than importing lets us avoid booting SQLAlchemy / httpx
clients that the real `lifespan` constructs on startup.
"""
seen: list[int] = []
@asynccontextmanager
async def lifespan(app: FastAPI) -> AsyncIterator[None]:
queue: asyncio.Queue[int] = asyncio.Queue()
app.state.queue = queue
async def worker() -> None:
while True:
doc_id = await queue.get()
seen.append(doc_id)
queue.task_done()
task = asyncio.create_task(worker())
try:
yield
finally:
task.cancel()
with contextlib.suppress(asyncio.CancelledError):
await task
app = FastAPI(lifespan=lifespan)
@app.post("/webhook", status_code=status.HTTP_202_ACCEPTED)
async def webhook(
payload: WebhookPayload,
authorization: str | None = Header(default=None),
) -> dict[str, object]:
_verify_bearer(authorization, os.environ.get("WEBHOOK_BEARER_TOKEN", ""))
queue: asyncio.Queue[int] = app.state.queue
await queue.put(payload.document_id)
return {"status": "accepted", "document_id": payload.document_id}
return app, seen
@pytest.fixture()
def client() -> Iterator[TestClient]:
app, seen = _build_app()
app.state.seen = seen
with TestClient(app) as tc:
yield tc
def test_webhook_rejects_missing_auth(client: TestClient) -> None:
resp = client.post("/webhook", json={"document_id": 42})
assert resp.status_code == 401
def test_webhook_rejects_wrong_bearer(client: TestClient) -> None:
resp = client.post(
"/webhook",
json={"document_id": 42},
headers={"Authorization": "Bearer wrong"},
)
assert resp.status_code == 401
def test_webhook_accepts_valid_request(client: TestClient) -> None:
resp = client.post(
"/webhook",
json={"document_id": 42},
headers={"Authorization": f"Bearer {os.environ['WEBHOOK_BEARER_TOKEN']}"},
)
assert resp.status_code == 202
assert resp.json() == {"status": "accepted", "document_id": 42}
queue: asyncio.Queue[int] = client.app.state.queue # type: ignore[attr-defined]
# Join the queue so the worker actually picks up our enqueued doc.
loop = asyncio.new_event_loop()
try:
loop.run_until_complete(asyncio.wait_for(queue.join(), timeout=2.0))
finally:
loop.close()
seen: list[int] = client.app.state.seen # type: ignore[attr-defined]
assert 42 in seen
def test_webhook_rejects_malformed_body(client: TestClient) -> None:
resp = client.post(
"/webhook",
json={"document_id": "not-an-int"},
headers={"Authorization": f"Bearer {os.environ['WEBHOOK_BEARER_TOKEN']}"},
)
assert resp.status_code == 422
def test_verify_bearer_rejects_unconfigured_service() -> None:
with pytest.raises(HTTPException):
_verify_bearer("Bearer anything", "")