UK payslips for equity-comp employees report RSU vests as notional pay for HMRC only. A paired same-magnitude deduction (Shares Retained / Stock Tax Withholding / RSU Offset) nets it back out of cash. The UK payslip's income_tax line shows tax on the grossed-up total, but the actual RSU tax is handled by Schwab (US broker) via share sale. No cash flows through UK payroll for RSU. Previously the extractor folded RSU notional into gross_pay and income_tax, which inflated the dashboard numbers — a payslip with £25k RSU vest looked like 2x salary with 80% tax rate. Changes: - schema: add rsu_vest + rsu_offset fields (default 0). - db + alembic 0002: add two new NUMERIC(12,2) columns with server default 0 (backward-compatible; existing rows get 0). - validate_totals: include rsu_offset in deductions sum so the gross + rsu_vest inflation is properly netted out. - extraction prompt: explicit rules for identifying RSU lines by the common Meta/Sage/Workday labels, and to NOT put them in other_deductions. Dashboards in a follow-up commit: cash_gross = gross_pay - rsu_vest, effective tax rate based on cash metrics. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
166 lines
5.5 KiB
Python
166 lines
5.5 KiB
Python
from datetime import date
|
|
from decimal import Decimal
|
|
from typing import Any
|
|
from unittest.mock import AsyncMock, MagicMock
|
|
|
|
import pytest
|
|
|
|
from payslip_ingest.processor import process_document
|
|
from payslip_ingest.schema import ExtractedPayslip
|
|
|
|
|
|
def _sample_extraction() -> ExtractedPayslip:
|
|
return ExtractedPayslip(
|
|
pay_date=date(2026, 3, 28),
|
|
pay_period_start=date(2026, 3, 1),
|
|
pay_period_end=date(2026, 3, 31),
|
|
employer="Acme Ltd",
|
|
currency="GBP",
|
|
gross_pay=Decimal("5000.00"),
|
|
income_tax=Decimal("800.00"),
|
|
national_insurance=Decimal("350.00"),
|
|
pension_employee=Decimal("250.00"),
|
|
pension_employer=Decimal("150.00"),
|
|
student_loan=Decimal("100.00"),
|
|
rsu_vest=Decimal("0.00"),
|
|
rsu_offset=Decimal("0.00"),
|
|
other_deductions={"cycle_to_work": Decimal("50.00")},
|
|
net_pay=Decimal("3450.00"),
|
|
)
|
|
|
|
|
|
class _FakeSession:
|
|
"""Minimal AsyncSession stand-in that records flushes and execute calls."""
|
|
|
|
def __init__(self, existing_ids: list[int]):
|
|
self._existing_ids = existing_ids
|
|
self.added: list[Any] = []
|
|
self.begin_calls = 0
|
|
|
|
async def __aenter__(self) -> "_FakeSession":
|
|
return self
|
|
|
|
async def __aexit__(self, *exc: object) -> None:
|
|
return None
|
|
|
|
def begin(self) -> "_FakeSession":
|
|
self.begin_calls += 1
|
|
return self
|
|
|
|
async def execute(self, stmt: Any) -> Any:
|
|
result = MagicMock()
|
|
# scalar() returns None when we treat the row as missing.
|
|
result.scalar.return_value = self._existing_ids.pop(0) if self._existing_ids else None
|
|
return result
|
|
|
|
def add(self, row: Any) -> None:
|
|
row.id = 1
|
|
self.added.append(row)
|
|
|
|
async def flush(self) -> None:
|
|
return None
|
|
|
|
|
|
class _SessionFactory:
|
|
|
|
def __init__(self, sessions: list[_FakeSession]):
|
|
self._sessions = list(sessions)
|
|
self.used: list[_FakeSession] = []
|
|
|
|
def __call__(self) -> _FakeSession:
|
|
session = self._sessions.pop(0)
|
|
self.used.append(session)
|
|
return session
|
|
|
|
|
|
@pytest.fixture()
|
|
def paperless() -> AsyncMock:
|
|
mock = AsyncMock()
|
|
mock.get_document.return_value = {"id": 42, "title": "Payslip"}
|
|
mock.download_document.return_value = b"PDFDATA"
|
|
return mock
|
|
|
|
|
|
@pytest.fixture()
|
|
def extractor() -> AsyncMock:
|
|
mock = AsyncMock()
|
|
mock.extract.return_value = _sample_extraction()
|
|
return mock
|
|
|
|
|
|
async def test_process_document_inserts_new(paperless: AsyncMock, extractor: AsyncMock) -> None:
|
|
factory = _SessionFactory([_FakeSession(existing_ids=[]), _FakeSession(existing_ids=[])])
|
|
|
|
result = await process_document(42, factory, paperless, extractor)
|
|
|
|
assert result.status == "inserted"
|
|
assert result.validated is True
|
|
paperless.get_document.assert_awaited_once_with(42)
|
|
paperless.download_document.assert_awaited_once_with(42)
|
|
extractor.extract.assert_awaited_once()
|
|
inserted_row = factory.used[1].added[0]
|
|
assert inserted_row.paperless_doc_id == 42
|
|
assert inserted_row.tax_year == "2025/26"
|
|
|
|
|
|
async def test_process_document_skips_existing(paperless: AsyncMock, extractor: AsyncMock) -> None:
|
|
factory = _SessionFactory([_FakeSession(existing_ids=[99])])
|
|
|
|
result = await process_document(42, factory, paperless, extractor)
|
|
|
|
assert result.status == "skipped"
|
|
paperless.get_document.assert_not_called()
|
|
extractor.extract.assert_not_called()
|
|
|
|
|
|
@pytest.mark.parametrize("title", [
|
|
"p60-meta-2025",
|
|
"20001_Tax_254680_P60_2021_To_2022",
|
|
"2024_Performance@_Year-end Letter_Viktor Barzin_1",
|
|
"254680_Viktor_Barzin_18 Compensation_EMEA_20230311_2022 YE PSC",
|
|
"2024-comp-letter",
|
|
"RSU Grant Agreement 2024",
|
|
])
|
|
async def test_process_document_skips_non_payslip_by_title(paperless: AsyncMock,
|
|
extractor: AsyncMock,
|
|
title: str) -> None:
|
|
paperless.get_document.return_value = {"id": 42, "title": title}
|
|
factory = _SessionFactory([_FakeSession(existing_ids=[])])
|
|
|
|
result = await process_document(42, factory, paperless, extractor)
|
|
|
|
assert result.status == "skipped_non_payslip"
|
|
paperless.download_document.assert_not_called()
|
|
extractor.extract.assert_not_called()
|
|
|
|
|
|
@pytest.mark.parametrize("title", [
|
|
"Payslip_2026-02-27",
|
|
"20001_PY_254680_Jan_2022",
|
|
"UKPY_254680_31_Jul_2019",
|
|
])
|
|
async def test_process_document_keeps_real_payslips(paperless: AsyncMock, extractor: AsyncMock,
|
|
title: str) -> None:
|
|
paperless.get_document.return_value = {"id": 42, "title": title}
|
|
factory = _SessionFactory([_FakeSession(existing_ids=[]), _FakeSession(existing_ids=[])])
|
|
|
|
result = await process_document(42, factory, paperless, extractor)
|
|
|
|
assert result.status == "inserted"
|
|
extractor.extract.assert_awaited_once()
|
|
|
|
|
|
async def test_process_document_flags_validation_failure(paperless: AsyncMock,
|
|
extractor: AsyncMock) -> None:
|
|
bad = _sample_extraction()
|
|
bad_dict = bad.model_dump()
|
|
bad_dict["net_pay"] = Decimal("9999.00")
|
|
extractor.extract.return_value = ExtractedPayslip.model_validate(bad_dict)
|
|
|
|
factory = _SessionFactory([_FakeSession(existing_ids=[]), _FakeSession(existing_ids=[])])
|
|
|
|
result = await process_document(42, factory, paperless, extractor)
|
|
|
|
assert result.status == "inserted"
|
|
assert result.validated is False
|
|
assert factory.used[1].added[0].validated is False
|