Initial commit: event-driven UK payslip ingest service
Extracted from /home/wizard/code monorepo into its own repo so Woodpecker CI can watch it. Identical content to /home/wizard/code commit e426028. See README.md for overview, env vars, and Paperless workflow config. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
commit
57484619c1
27 changed files with 2878 additions and 0 deletions
127
tests/test_processor.py
Normal file
127
tests/test_processor.py
Normal file
|
|
@ -0,0 +1,127 @@
|
|||
from datetime import date
|
||||
from decimal import Decimal
|
||||
from typing import Any
|
||||
from unittest.mock import AsyncMock, MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
from payslip_ingest.processor import process_document
|
||||
from payslip_ingest.schema import ExtractedPayslip
|
||||
|
||||
|
||||
def _sample_extraction() -> ExtractedPayslip:
|
||||
return ExtractedPayslip(
|
||||
pay_date=date(2026, 3, 28),
|
||||
pay_period_start=date(2026, 3, 1),
|
||||
pay_period_end=date(2026, 3, 31),
|
||||
employer="Acme Ltd",
|
||||
currency="GBP",
|
||||
gross_pay=Decimal("5000.00"),
|
||||
income_tax=Decimal("800.00"),
|
||||
national_insurance=Decimal("350.00"),
|
||||
pension_employee=Decimal("250.00"),
|
||||
pension_employer=Decimal("150.00"),
|
||||
student_loan=Decimal("100.00"),
|
||||
other_deductions={"cycle_to_work": Decimal("50.00")},
|
||||
net_pay=Decimal("3450.00"),
|
||||
)
|
||||
|
||||
|
||||
class _FakeSession:
|
||||
"""Minimal AsyncSession stand-in that records flushes and execute calls."""
|
||||
|
||||
def __init__(self, existing_ids: list[int]):
|
||||
self._existing_ids = existing_ids
|
||||
self.added: list[Any] = []
|
||||
self.begin_calls = 0
|
||||
|
||||
async def __aenter__(self) -> "_FakeSession":
|
||||
return self
|
||||
|
||||
async def __aexit__(self, *exc: object) -> None:
|
||||
return None
|
||||
|
||||
def begin(self) -> "_FakeSession":
|
||||
self.begin_calls += 1
|
||||
return self
|
||||
|
||||
async def execute(self, stmt: Any) -> Any:
|
||||
result = MagicMock()
|
||||
# scalar() returns None when we treat the row as missing.
|
||||
result.scalar.return_value = self._existing_ids.pop(0) if self._existing_ids else None
|
||||
return result
|
||||
|
||||
def add(self, row: Any) -> None:
|
||||
row.id = 1
|
||||
self.added.append(row)
|
||||
|
||||
async def flush(self) -> None:
|
||||
return None
|
||||
|
||||
|
||||
class _SessionFactory:
|
||||
|
||||
def __init__(self, sessions: list[_FakeSession]):
|
||||
self._sessions = list(sessions)
|
||||
self.used: list[_FakeSession] = []
|
||||
|
||||
def __call__(self) -> _FakeSession:
|
||||
session = self._sessions.pop(0)
|
||||
self.used.append(session)
|
||||
return session
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def paperless() -> AsyncMock:
|
||||
mock = AsyncMock()
|
||||
mock.get_document.return_value = {"id": 42, "title": "Payslip"}
|
||||
mock.download_document.return_value = b"PDFDATA"
|
||||
return mock
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def extractor() -> AsyncMock:
|
||||
mock = AsyncMock()
|
||||
mock.extract.return_value = _sample_extraction()
|
||||
return mock
|
||||
|
||||
|
||||
async def test_process_document_inserts_new(paperless: AsyncMock, extractor: AsyncMock) -> None:
|
||||
factory = _SessionFactory([_FakeSession(existing_ids=[]), _FakeSession(existing_ids=[])])
|
||||
|
||||
result = await process_document(42, factory, paperless, extractor)
|
||||
|
||||
assert result.status == "inserted"
|
||||
assert result.validated is True
|
||||
paperless.get_document.assert_awaited_once_with(42)
|
||||
paperless.download_document.assert_awaited_once_with(42)
|
||||
extractor.extract.assert_awaited_once()
|
||||
inserted_row = factory.used[1].added[0]
|
||||
assert inserted_row.paperless_doc_id == 42
|
||||
assert inserted_row.tax_year == "2025/26"
|
||||
|
||||
|
||||
async def test_process_document_skips_existing(paperless: AsyncMock, extractor: AsyncMock) -> None:
|
||||
factory = _SessionFactory([_FakeSession(existing_ids=[99])])
|
||||
|
||||
result = await process_document(42, factory, paperless, extractor)
|
||||
|
||||
assert result.status == "skipped"
|
||||
paperless.get_document.assert_not_called()
|
||||
extractor.extract.assert_not_called()
|
||||
|
||||
|
||||
async def test_process_document_flags_validation_failure(paperless: AsyncMock,
|
||||
extractor: AsyncMock) -> None:
|
||||
bad = _sample_extraction()
|
||||
bad_dict = bad.model_dump()
|
||||
bad_dict["net_pay"] = Decimal("9999.00")
|
||||
extractor.extract.return_value = ExtractedPayslip.model_validate(bad_dict)
|
||||
|
||||
factory = _SessionFactory([_FakeSession(existing_ids=[]), _FakeSession(existing_ids=[])])
|
||||
|
||||
result = await process_document(42, factory, paperless, extractor)
|
||||
|
||||
assert result.status == "inserted"
|
||||
assert result.validated is False
|
||||
assert factory.used[1].added[0].validated is False
|
||||
Loading…
Add table
Add a link
Reference in a new issue