Extracted from /home/wizard/code monorepo into its own repo so Woodpecker CI can watch it. Identical content to /home/wizard/code commit e426028. See README.md for overview, env vars, and Paperless workflow config. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
138 lines
4.9 KiB
Python
138 lines
4.9 KiB
Python
import json
|
|
|
|
import httpx
|
|
import pytest
|
|
import respx
|
|
|
|
from payslip_ingest import extractor as extractor_module
|
|
from payslip_ingest.extractor import ClaudeExtractor, ExtractorError
|
|
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def _tighten_retries(monkeypatch: pytest.MonkeyPatch) -> None:
|
|
monkeypatch.setattr(extractor_module, "POLL_INTERVAL_SECONDS", 0)
|
|
monkeypatch.setattr(extractor_module, "MAX_POLL_SECONDS", 1)
|
|
monkeypatch.setattr(extractor_module, "BUSY_RETRY_DELAY_SECONDS", 0)
|
|
|
|
|
|
def _sample_extraction() -> dict[str, object]:
|
|
return {
|
|
"pay_date": "2026-03-28",
|
|
"pay_period_start": "2026-03-01",
|
|
"pay_period_end": "2026-03-31",
|
|
"employer": "Acme Ltd",
|
|
"currency": "GBP",
|
|
"gross_pay": 5000.0,
|
|
"income_tax": 800.0,
|
|
"national_insurance": 350.0,
|
|
"pension_employee": 250.0,
|
|
"pension_employer": 150.0,
|
|
"student_loan": 100.0,
|
|
"other_deductions": {
|
|
"cycle_to_work": 50.0
|
|
},
|
|
"net_pay": 3450.0,
|
|
}
|
|
|
|
|
|
def _agent_output(payload: dict[str, object]) -> list[str]:
|
|
"""Simulate claude CLI --output-format json stdout."""
|
|
return [
|
|
json.dumps({
|
|
"type": "system",
|
|
"subtype": "init"
|
|
}) + "\n",
|
|
json.dumps({
|
|
"type": "assistant",
|
|
"message": {
|
|
"content": [{
|
|
"type": "text",
|
|
"text": json.dumps(payload)
|
|
}],
|
|
},
|
|
}) + "\n",
|
|
json.dumps({
|
|
"type": "result",
|
|
"result": json.dumps(payload)
|
|
}) + "\n",
|
|
]
|
|
|
|
|
|
@pytest.fixture()
|
|
def client() -> ClaudeExtractor:
|
|
return ClaudeExtractor(base_url="http://agent.test", bearer_token="tok")
|
|
|
|
|
|
async def test_extract_happy_path(client: ClaudeExtractor) -> None:
|
|
payload = _sample_extraction()
|
|
with respx.mock(base_url="http://agent.test") as mock:
|
|
mock.post("/execute").mock(
|
|
return_value=httpx.Response(202, json={
|
|
"job_id": "abc123",
|
|
"status": "running"
|
|
}))
|
|
mock.get("/jobs/abc123").mock(return_value=httpx.Response(
|
|
200, json={
|
|
"status": "completed",
|
|
"output": _agent_output(payload)
|
|
}))
|
|
extracted = await client.extract(b"PDFDATA", {"id": 42})
|
|
assert float(extracted.gross_pay) == 5000.0
|
|
assert extracted.employer == "Acme Ltd"
|
|
|
|
|
|
async def test_extract_retries_on_409(client: ClaudeExtractor) -> None:
|
|
payload = _sample_extraction()
|
|
with respx.mock(base_url="http://agent.test") as mock:
|
|
route = mock.post("/execute")
|
|
route.side_effect = [
|
|
httpx.Response(409, json={"detail": "busy"}),
|
|
httpx.Response(202, json={"job_id": "abc123"}),
|
|
]
|
|
mock.get("/jobs/abc123").mock(return_value=httpx.Response(
|
|
200, json={
|
|
"status": "completed",
|
|
"output": _agent_output(payload)
|
|
}))
|
|
extracted = await client.extract(b"PDFDATA", {"id": 42})
|
|
assert extracted.net_pay.is_finite()
|
|
assert route.call_count == 2
|
|
|
|
|
|
async def test_extract_polling_timeout_raises(client: ClaudeExtractor) -> None:
|
|
with respx.mock(base_url="http://agent.test") as mock:
|
|
mock.post("/execute").mock(return_value=httpx.Response(202, json={"job_id": "abc123"}))
|
|
mock.get("/jobs/abc123").mock(
|
|
return_value=httpx.Response(200, json={
|
|
"status": "running",
|
|
"output": []
|
|
}))
|
|
with pytest.raises(TimeoutError):
|
|
await client.extract(b"PDFDATA", {"id": 42})
|
|
|
|
|
|
async def test_extract_malformed_json_raises(client: ClaudeExtractor) -> None:
|
|
with respx.mock(base_url="http://agent.test") as mock:
|
|
mock.post("/execute").mock(return_value=httpx.Response(202, json={"job_id": "abc123"}))
|
|
mock.get("/jobs/abc123").mock(return_value=httpx.Response(
|
|
200,
|
|
json={
|
|
"status": "completed",
|
|
"output": ["this is not json\n", "still not json\n"],
|
|
},
|
|
))
|
|
with pytest.raises(ExtractorError):
|
|
await client.extract(b"PDFDATA", {"id": 42})
|
|
|
|
|
|
async def test_extract_failed_status_raises(client: ClaudeExtractor) -> None:
|
|
with respx.mock(base_url="http://agent.test") as mock:
|
|
mock.post("/execute").mock(return_value=httpx.Response(202, json={"job_id": "abc123"}))
|
|
mock.get("/jobs/abc123").mock(return_value=httpx.Response(200,
|
|
json={
|
|
"status": "failed",
|
|
"output": [],
|
|
"exit_code": 1
|
|
}))
|
|
with pytest.raises(ExtractorError):
|
|
await client.extract(b"PDFDATA", {"id": 42})
|