Initial commit: event-driven UK payslip ingest service
Extracted from /home/wizard/code monorepo into its own repo so Woodpecker CI can watch it. Identical content to /home/wizard/code commit e426028. See README.md for overview, env vars, and Paperless workflow config. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
commit
57484619c1
27 changed files with 2878 additions and 0 deletions
138
tests/test_extractor.py
Normal file
138
tests/test_extractor.py
Normal file
|
|
@ -0,0 +1,138 @@
|
|||
import json
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
import respx
|
||||
|
||||
from payslip_ingest import extractor as extractor_module
|
||||
from payslip_ingest.extractor import ClaudeExtractor, ExtractorError
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _tighten_retries(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
monkeypatch.setattr(extractor_module, "POLL_INTERVAL_SECONDS", 0)
|
||||
monkeypatch.setattr(extractor_module, "MAX_POLL_SECONDS", 1)
|
||||
monkeypatch.setattr(extractor_module, "BUSY_RETRY_DELAY_SECONDS", 0)
|
||||
|
||||
|
||||
def _sample_extraction() -> dict[str, object]:
|
||||
return {
|
||||
"pay_date": "2026-03-28",
|
||||
"pay_period_start": "2026-03-01",
|
||||
"pay_period_end": "2026-03-31",
|
||||
"employer": "Acme Ltd",
|
||||
"currency": "GBP",
|
||||
"gross_pay": 5000.0,
|
||||
"income_tax": 800.0,
|
||||
"national_insurance": 350.0,
|
||||
"pension_employee": 250.0,
|
||||
"pension_employer": 150.0,
|
||||
"student_loan": 100.0,
|
||||
"other_deductions": {
|
||||
"cycle_to_work": 50.0
|
||||
},
|
||||
"net_pay": 3450.0,
|
||||
}
|
||||
|
||||
|
||||
def _agent_output(payload: dict[str, object]) -> list[str]:
|
||||
"""Simulate claude CLI --output-format json stdout."""
|
||||
return [
|
||||
json.dumps({
|
||||
"type": "system",
|
||||
"subtype": "init"
|
||||
}) + "\n",
|
||||
json.dumps({
|
||||
"type": "assistant",
|
||||
"message": {
|
||||
"content": [{
|
||||
"type": "text",
|
||||
"text": json.dumps(payload)
|
||||
}],
|
||||
},
|
||||
}) + "\n",
|
||||
json.dumps({
|
||||
"type": "result",
|
||||
"result": json.dumps(payload)
|
||||
}) + "\n",
|
||||
]
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def client() -> ClaudeExtractor:
|
||||
return ClaudeExtractor(base_url="http://agent.test", bearer_token="tok")
|
||||
|
||||
|
||||
async def test_extract_happy_path(client: ClaudeExtractor) -> None:
|
||||
payload = _sample_extraction()
|
||||
with respx.mock(base_url="http://agent.test") as mock:
|
||||
mock.post("/execute").mock(
|
||||
return_value=httpx.Response(202, json={
|
||||
"job_id": "abc123",
|
||||
"status": "running"
|
||||
}))
|
||||
mock.get("/jobs/abc123").mock(return_value=httpx.Response(
|
||||
200, json={
|
||||
"status": "completed",
|
||||
"output": _agent_output(payload)
|
||||
}))
|
||||
extracted = await client.extract(b"PDFDATA", {"id": 42})
|
||||
assert float(extracted.gross_pay) == 5000.0
|
||||
assert extracted.employer == "Acme Ltd"
|
||||
|
||||
|
||||
async def test_extract_retries_on_409(client: ClaudeExtractor) -> None:
|
||||
payload = _sample_extraction()
|
||||
with respx.mock(base_url="http://agent.test") as mock:
|
||||
route = mock.post("/execute")
|
||||
route.side_effect = [
|
||||
httpx.Response(409, json={"detail": "busy"}),
|
||||
httpx.Response(202, json={"job_id": "abc123"}),
|
||||
]
|
||||
mock.get("/jobs/abc123").mock(return_value=httpx.Response(
|
||||
200, json={
|
||||
"status": "completed",
|
||||
"output": _agent_output(payload)
|
||||
}))
|
||||
extracted = await client.extract(b"PDFDATA", {"id": 42})
|
||||
assert extracted.net_pay.is_finite()
|
||||
assert route.call_count == 2
|
||||
|
||||
|
||||
async def test_extract_polling_timeout_raises(client: ClaudeExtractor) -> None:
|
||||
with respx.mock(base_url="http://agent.test") as mock:
|
||||
mock.post("/execute").mock(return_value=httpx.Response(202, json={"job_id": "abc123"}))
|
||||
mock.get("/jobs/abc123").mock(
|
||||
return_value=httpx.Response(200, json={
|
||||
"status": "running",
|
||||
"output": []
|
||||
}))
|
||||
with pytest.raises(TimeoutError):
|
||||
await client.extract(b"PDFDATA", {"id": 42})
|
||||
|
||||
|
||||
async def test_extract_malformed_json_raises(client: ClaudeExtractor) -> None:
|
||||
with respx.mock(base_url="http://agent.test") as mock:
|
||||
mock.post("/execute").mock(return_value=httpx.Response(202, json={"job_id": "abc123"}))
|
||||
mock.get("/jobs/abc123").mock(return_value=httpx.Response(
|
||||
200,
|
||||
json={
|
||||
"status": "completed",
|
||||
"output": ["this is not json\n", "still not json\n"],
|
||||
},
|
||||
))
|
||||
with pytest.raises(ExtractorError):
|
||||
await client.extract(b"PDFDATA", {"id": 42})
|
||||
|
||||
|
||||
async def test_extract_failed_status_raises(client: ClaudeExtractor) -> None:
|
||||
with respx.mock(base_url="http://agent.test") as mock:
|
||||
mock.post("/execute").mock(return_value=httpx.Response(202, json={"job_id": "abc123"}))
|
||||
mock.get("/jobs/abc123").mock(return_value=httpx.Response(200,
|
||||
json={
|
||||
"status": "failed",
|
||||
"output": [],
|
||||
"exit_code": 1
|
||||
}))
|
||||
with pytest.raises(ExtractorError):
|
||||
await client.extract(b"PDFDATA", {"id": 42})
|
||||
Loading…
Add table
Add a link
Reference in a new issue