parser + P60 ingest: split income_tax cash/RSU, add P60 ground-truth
Meta variant-B payslips gross up Taxable Pay for RSU and compute PAYE on the grossed-up figure, so `income_tax` on the slip is the total PAYE (cash + RSU-attributed). Dashboards that stacked the raw figure made vest-month tax look ~2x higher than "cash tax paid". Introduce `cash_income_tax = income_tax * (gross_pay - pension_sacrifice) / taxable_pay` as a derived column alongside the raw figure. Dashboards can now stack cash vs RSU-attributed tax as separate segments. Also capture YTD column values of `RSU Tax Offset` and `RSU Excs Refund` from the Payments grid — needed for reconciliation against HMRC annual figures. P60 ingest: new parser under `parsers/p60.py` anchoring on statutory HMRC line labels (`Tax year to 5 April YYYY`, `Employer PAYE reference`, `In this employment` pay/tax row, NI letter bands). Processor routes documents carrying the `p60` Paperless tag to `_handle_p60` which writes to the new `payslip_ingest.p60_reference` table (one row per tax_year+employer). App lifespan resolves the tag id at startup; missing tag disables dispatch without breaking payslip ingest. Paperless tag creation + webhook config are manual follow-ups. Migrations: - 0004 — cash_income_tax + ytd_rsu_tax_offset + ytd_rsu_excs_refund on payslip, all nullable. - 0005 — p60_reference table with (tax_year, employer) unique + paperless_doc_id unique for idempotent re-uploads. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
d91f34ddb4
commit
26e43b1055
14 changed files with 644 additions and 15 deletions
51
tests/fixtures/meta_uk_p60_2024_25.txt
vendored
Normal file
51
tests/fixtures/meta_uk_p60_2024_25.txt
vendored
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
P60 End of Year Certificate
|
||||
|
||||
Tax year to 5 April 2025
|
||||
|
||||
Employee's details
|
||||
|
||||
Surname BARZIN
|
||||
First two forenames VIKTOR
|
||||
National Insurance number AA 12 34 56 A
|
||||
Works/Payroll number 254680
|
||||
|
||||
Pay and Income Tax details
|
||||
|
||||
Pay Tax deducted
|
||||
|
||||
In previous employment(s) £0.00 £0.00
|
||||
In this employment £232,630.34 £95,820.11
|
||||
|
||||
Total for year £232,630.34 £95,820.11
|
||||
|
||||
Final tax code 1257L
|
||||
|
||||
National Insurance contributions in this employment
|
||||
|
||||
NI table letter Earnings at Earnings above Total of employee's
|
||||
LEL LEL up to UEL contributions in
|
||||
this employment
|
||||
A £6,396.00 £47,268.00 £5,172.40
|
||||
|
||||
Statutory payments included in the pay 'In this employment' figure
|
||||
|
||||
Statutory Maternity Pay £0.00
|
||||
Statutory Paternity Pay £0.00
|
||||
|
||||
Student Loan deductions in this employment £0.00
|
||||
|
||||
Other details
|
||||
|
||||
Your employer's full name and address
|
||||
|
||||
Facebook UK Limited
|
||||
10 Brock Street
|
||||
London
|
||||
NW1 3FG
|
||||
|
||||
Employer PAYE reference 120/FA12345
|
||||
|
||||
This form shows your total pay for Income Tax purposes in this employment
|
||||
for the year. Any overtime, bonus, commission etc, Statutory Sick Pay,
|
||||
Statutory Maternity Pay, Statutory Paternity Pay or Shared Parental Pay,
|
||||
Statutory Parental Bereavement Pay is included.
|
||||
|
|
@ -38,6 +38,15 @@ def test_parses_variant_b_modern() -> None:
|
|||
assert result.ytd_taxable_pay == Decimal("373601.64")
|
||||
assert result.ytd_gross == Decimal("232630.34")
|
||||
|
||||
# Derived cash-only PAYE: income_tax * (gross - pension_sacrifice) / taxable_pay
|
||||
# = 31311.90 * 39282.69 / 72096.92 = 17060.59 (vs 31311.90 total PAYE)
|
||||
assert result.cash_income_tax is not None
|
||||
assert abs(result.cash_income_tax - Decimal("17060.59")) <= Decimal("0.02")
|
||||
|
||||
# YTD column of RSU lines in the Payments grid
|
||||
assert result.ytd_rsu_tax_offset == Decimal("124674.27")
|
||||
assert result.ytd_rsu_excs_refund == Decimal("3221.32")
|
||||
|
||||
|
||||
def test_parses_variant_b_with_bonus() -> None:
|
||||
"""March 2025 — variant B, bonus + RSU + multiple other deductions."""
|
||||
|
|
@ -145,6 +154,28 @@ def test_parses_variant_a_2021_08() -> None:
|
|||
assert result.taxable_pay == Decimal("15323.16")
|
||||
|
||||
|
||||
def test_cash_income_tax_falls_back_when_taxable_pay_missing() -> None:
|
||||
"""When taxable_pay is None, cash_income_tax == income_tax (no RSU grossing)."""
|
||||
from payslip_ingest.parsers.meta_uk import _cash_income_tax
|
||||
|
||||
assert _cash_income_tax(Decimal("1000"), Decimal("5000"), Decimal("100"),
|
||||
None) == Decimal("1000")
|
||||
assert _cash_income_tax(Decimal("1000"), Decimal("5000"), Decimal("100"),
|
||||
Decimal("0")) == Decimal("1000")
|
||||
|
||||
|
||||
def test_variant_a_cash_income_tax_pro_rata() -> None:
|
||||
"""Variant A fixture with taxable_pay → cash_income_tax is pro-rata.
|
||||
|
||||
2021-06 has taxable_pay=5095.86 (= gross_pay), pension_sacrifice=152.90,
|
||||
income_tax=1410.07 → cash_income_tax = 1410.07 * 4942.96 / 5095.86 = 1367.76.
|
||||
"""
|
||||
result = parse_meta_uk(_load("meta_uk_2021_06_variant_a_bik.txt"))
|
||||
assert result.taxable_pay == Decimal("5095.86")
|
||||
assert result.cash_income_tax is not None
|
||||
assert abs(result.cash_income_tax - Decimal("1367.76")) <= Decimal("0.02")
|
||||
|
||||
|
||||
def test_raises_on_non_meta_payslip() -> None:
|
||||
with pytest.raises(ParserError):
|
||||
parse_meta_uk("This is not a Meta payslip\nRandom text\n")
|
||||
|
|
|
|||
74
tests/test_p60_parser.py
Normal file
74
tests/test_p60_parser.py
Normal file
|
|
@ -0,0 +1,74 @@
|
|||
from decimal import Decimal
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from payslip_ingest.parsers.p60 import P60ParserError, parse_p60
|
||||
|
||||
FIXTURES = Path(__file__).parent / "fixtures"
|
||||
|
||||
|
||||
def _load(name: str) -> str:
|
||||
return (FIXTURES / name).read_text(encoding="utf-8")
|
||||
|
||||
|
||||
def test_parses_meta_uk_p60_2024_25() -> None:
|
||||
result = parse_p60(_load("meta_uk_p60_2024_25.txt"))
|
||||
|
||||
assert result.tax_year == "2024/25"
|
||||
assert result.employer == "Facebook UK Limited"
|
||||
assert result.employer_paye_ref == "120/FA12345"
|
||||
assert result.gross_pay == Decimal("232630.34")
|
||||
assert result.income_tax == Decimal("95820.11")
|
||||
assert result.national_insurance == Decimal("5172.40")
|
||||
assert result.student_loan == Decimal("0.00")
|
||||
assert result.tax_code == "1257L"
|
||||
|
||||
|
||||
def test_parse_p60_raises_on_non_p60_text() -> None:
|
||||
with pytest.raises(P60ParserError, match="does not look like a P60"):
|
||||
parse_p60("Payslip for March 2025\nGross: £1000\n")
|
||||
|
||||
|
||||
def test_parse_p60_raises_on_empty_text() -> None:
|
||||
with pytest.raises(P60ParserError):
|
||||
parse_p60("")
|
||||
|
||||
|
||||
def test_parse_p60_raises_without_tax_year_anchor() -> None:
|
||||
with pytest.raises(P60ParserError, match="Tax year"):
|
||||
parse_p60("P60\nSome other content without the required anchor\n")
|
||||
|
||||
|
||||
def test_parse_p60_handles_old_facebook_uk_ltd_spelling() -> None:
|
||||
"""Pre-2022 P60s list the employer as `Facebook UK Ltd` (no `Limited`)."""
|
||||
text = _load("meta_uk_p60_2024_25.txt").replace("Facebook UK Limited", "Facebook UK Ltd")
|
||||
result = parse_p60(text)
|
||||
assert result.employer == "Facebook UK Ltd"
|
||||
|
||||
|
||||
def test_parse_p60_student_loan_missing_is_none() -> None:
|
||||
"""P60s for years without student-loan deductions omit that line entirely."""
|
||||
text = _load("meta_uk_p60_2024_25.txt")
|
||||
# Strip the Student Loan line (simulating a year pre-loan).
|
||||
stripped = "\n".join(line for line in text.splitlines() if "Student Loan" not in line)
|
||||
result = parse_p60(stripped)
|
||||
assert result.student_loan is None
|
||||
|
||||
|
||||
def test_parse_p60_tax_code_missing_is_none() -> None:
|
||||
"""Some historical P60s may not print a `Final tax code` line."""
|
||||
text = _load("meta_uk_p60_2024_25.txt").replace("Final tax code", "XXX")
|
||||
result = parse_p60(text)
|
||||
assert result.tax_code is None
|
||||
|
||||
|
||||
def test_parse_p60_sums_ni_across_letter_bands() -> None:
|
||||
"""Employees who cross NI letter bands mid-year get one row per letter."""
|
||||
text = _load("meta_uk_p60_2024_25.txt")
|
||||
# Append a second NI letter row — same shape as the A row in the fixture.
|
||||
extra = "C £6,396.00 £47,268.00 £1,000.00\n"
|
||||
augmented = text + "\n" + extra
|
||||
result = parse_p60(augmented)
|
||||
# 5172.40 (letter A, in fixture) + 1000.00 (letter C, appended)
|
||||
assert result.national_insurance == Decimal("6172.40")
|
||||
|
|
@ -246,3 +246,45 @@ async def test_rejects_zero_gross_zero_net(paperless: AsyncMock, extractor: Asyn
|
|||
factory = _SessionFactory([_FakeSession(existing_ids=[])])
|
||||
with pytest.raises(ValueError, match="zero gross and net"):
|
||||
await process_document(42, factory, paperless, extractor)
|
||||
|
||||
|
||||
async def test_p60_tag_routes_to_p60_handler(paperless: AsyncMock, extractor: AsyncMock,
|
||||
monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
"""A doc carrying the P60 tag id goes to _handle_p60 (not the payslip path)."""
|
||||
p60_text = (FIXTURES / "meta_uk_p60_2024_25.txt").read_text(encoding="utf-8")
|
||||
monkeypatch.setattr(processor, "_pdftotext", lambda _: p60_text)
|
||||
paperless.get_document.return_value = {"id": 42, "title": "P60 2024-25", "tags": [7]}
|
||||
|
||||
# Two sessions: one for combined dedup, one for the P60 insert.
|
||||
factory = _SessionFactory([
|
||||
_FakeSession(existing_ids=[]),
|
||||
_FakeSession(existing_ids=[]),
|
||||
])
|
||||
result = await process_document(42, factory, paperless, extractor, p60_tag_id=7)
|
||||
|
||||
assert result.status == "inserted"
|
||||
assert result.extractor == "p60_regex"
|
||||
assert result.p60_id == 1
|
||||
# Extractor (Claude) must not be called for a P60.
|
||||
extractor.extract.assert_not_called()
|
||||
inserted_row = factory.used[1].added[0]
|
||||
assert inserted_row.tax_year == "2024/25"
|
||||
assert inserted_row.gross_pay == Decimal("232630.34")
|
||||
assert inserted_row.income_tax == Decimal("95820.11")
|
||||
|
||||
|
||||
async def test_p60_tag_absent_follows_payslip_path(paperless: AsyncMock, extractor: AsyncMock,
|
||||
monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
"""A regular payslip (no P60 tag) still goes through the payslip path."""
|
||||
meta_text = (FIXTURES / "meta_uk_2026_02.txt").read_text(encoding="utf-8")
|
||||
monkeypatch.setattr(processor, "_pdftotext", lambda _: meta_text)
|
||||
paperless.get_document.return_value = {"id": 42, "title": "Payslip", "tags": [3]}
|
||||
|
||||
factory = _SessionFactory([
|
||||
_FakeSession(existing_ids=[]),
|
||||
_FakeSession(existing_ids=[]),
|
||||
])
|
||||
result = await process_document(42, factory, paperless, extractor, p60_tag_id=7)
|
||||
assert result.status == "inserted"
|
||||
assert result.extractor == "meta_uk_regex"
|
||||
assert result.p60_id is None
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue