import json import httpx import pytest import respx from payslip_ingest import extractor as extractor_module from payslip_ingest.extractor import ClaudeExtractor, ExtractorError @pytest.fixture(autouse=True) def _tighten_retries(monkeypatch: pytest.MonkeyPatch) -> None: monkeypatch.setattr(extractor_module, "POLL_INTERVAL_SECONDS", 0) monkeypatch.setattr(extractor_module, "MAX_POLL_SECONDS", 1) monkeypatch.setattr(extractor_module, "BUSY_RETRY_DELAY_SECONDS", 0) def _sample_extraction() -> dict[str, object]: return { "pay_date": "2026-03-28", "pay_period_start": "2026-03-01", "pay_period_end": "2026-03-31", "employer": "Acme Ltd", "currency": "GBP", "gross_pay": 5000.0, "income_tax": 800.0, "national_insurance": 350.0, "pension_employee": 250.0, "pension_employer": 150.0, "student_loan": 100.0, "other_deductions": { "cycle_to_work": 50.0 }, "net_pay": 3450.0, } def _agent_output(payload: dict[str, object]) -> list[str]: """Simulate claude CLI --output-format json stdout.""" return [ json.dumps({ "type": "system", "subtype": "init" }) + "\n", json.dumps({ "type": "assistant", "message": { "content": [{ "type": "text", "text": json.dumps(payload) }], }, }) + "\n", json.dumps({ "type": "result", "result": json.dumps(payload) }) + "\n", ] @pytest.fixture() def client() -> ClaudeExtractor: return ClaudeExtractor(base_url="http://agent.test", bearer_token="tok") async def test_extract_happy_path(client: ClaudeExtractor) -> None: payload = _sample_extraction() with respx.mock(base_url="http://agent.test") as mock: mock.post("/execute").mock( return_value=httpx.Response(202, json={ "job_id": "abc123", "status": "running" })) mock.get("/jobs/abc123").mock(return_value=httpx.Response( 200, json={ "status": "completed", "output": _agent_output(payload) })) extracted = await client.extract(b"PDFDATA", {"id": 42}) assert float(extracted.gross_pay) == 5000.0 assert extracted.employer == "Acme Ltd" async def test_extract_retries_on_409(client: ClaudeExtractor) -> None: payload = _sample_extraction() with respx.mock(base_url="http://agent.test") as mock: route = mock.post("/execute") route.side_effect = [ httpx.Response(409, json={"detail": "busy"}), httpx.Response(202, json={"job_id": "abc123"}), ] mock.get("/jobs/abc123").mock(return_value=httpx.Response( 200, json={ "status": "completed", "output": _agent_output(payload) })) extracted = await client.extract(b"PDFDATA", {"id": 42}) assert extracted.net_pay.is_finite() assert route.call_count == 2 async def test_extract_polling_timeout_raises(client: ClaudeExtractor) -> None: with respx.mock(base_url="http://agent.test") as mock: mock.post("/execute").mock(return_value=httpx.Response(202, json={"job_id": "abc123"})) mock.get("/jobs/abc123").mock( return_value=httpx.Response(200, json={ "status": "running", "output": [] })) with pytest.raises(TimeoutError): await client.extract(b"PDFDATA", {"id": 42}) async def test_extract_malformed_json_raises(client: ClaudeExtractor) -> None: with respx.mock(base_url="http://agent.test") as mock: mock.post("/execute").mock(return_value=httpx.Response(202, json={"job_id": "abc123"})) mock.get("/jobs/abc123").mock(return_value=httpx.Response( 200, json={ "status": "completed", "output": ["this is not json\n", "still not json\n"], }, )) with pytest.raises(ExtractorError): await client.extract(b"PDFDATA", {"id": 42}) async def test_extract_failed_status_raises(client: ClaudeExtractor) -> None: with respx.mock(base_url="http://agent.test") as mock: mock.post("/execute").mock(return_value=httpx.Response(202, json={"job_id": "abc123"})) mock.get("/jobs/abc123").mock(return_value=httpx.Response(200, json={ "status": "failed", "output": [], "exit_code": 1 })) with pytest.raises(ExtractorError): await client.extract(b"PDFDATA", {"id": 42})