payslip-ingest/payslip_ingest/schema.py

from datetime import date
from decimal import Decimal

from pydantic import BaseModel, ConfigDict, Field

TOTALS_TOLERANCE = Decimal("0.02")


class ExtractedPayslip(BaseModel):
    model_config = ConfigDict(extra="forbid")

    pay_date: date
    pay_period_start: date | None = None
    pay_period_end: date | None = None
    employer: str | None = None
    currency: str = "GBP"
    gross_pay: Decimal
    income_tax: Decimal = Field(default=Decimal("0"))
    national_insurance: Decimal = Field(default=Decimal("0"))
    pension_employee: Decimal = Field(default=Decimal("0"))
    pension_employer: Decimal = Field(default=Decimal("0"))
    student_loan: Decimal = Field(default=Decimal("0"))
    # RSU vest reported on the UK payslip is notional — the share grant is
    # handled by Schwab which withholds US-side tax by selling shares. The
    # UK payslip only lists it for HMRC reporting; no cash flows through
    # UK payroll. Track it separately so dashboards can derive cash-only
    # gross = gross_pay - rsu_vest.
    rsu_vest: Decimal = Field(default=Decimal("0"))
    # Corresponding offset deduction that nets the RSU out of cash pay on the
    # UK slip (labels vary: "Shares Retained", "Stock Tax Withholding",
    # "RSU Offset", "Notional Pay Offset"). Same as rsu_vest in magnitude.
    # Meta's template doesn't carry one — rsu_vest grosses up Taxable Pay
    # directly and PAYE is computed on the grossed-up figure.
    rsu_offset: Decimal = Field(default=Decimal("0"))
    # v2 additions: earnings decomposition + YTD snapshot for accurate
    # cash-vs-RSU tax attribution. All default to 0/None so v1 extractor
    # output continues to validate.
    salary: Decimal = Field(default=Decimal("0"))
    bonus: Decimal = Field(default=Decimal("0"))
    # Absolute value of negative "AE Pension EE" in Payments block — the
    # employee-side salary-sacrifice contribution that reduces gross before
    # PAYE. pension_employee stays reserved for the rare case where pension
    # is posted as a positive Deduction.
    pension_sacrifice: Decimal = Field(default=Decimal("0"))
    # Post-sacrifice Taxable Pay = gross_pay + rsu_vest (PAYE base). Nullable
    # because variant A payslips (pre-2022) don't surface the summary block.
    taxable_pay: Decimal | None = None
    # YTD values from the summary block — powers the ytd-effective-tax-rate
    # formula used by the dashboard.
    ytd_tax_paid: Decimal | None = None
    ytd_taxable_pay: Decimal | None = None
    ytd_gross: Decimal | None = None
    # Derived pro-rata share of income_tax attributable to cash pay
    # (= income_tax * (gross_pay - pension_sacrifice) / taxable_pay). Nullable
    # because variant A doesn't surface taxable_pay and we fall back to
    # full income_tax in that case.
    cash_income_tax: Decimal | None = None
    # YTD Year-to-Date column values of RSU Tax Offset / RSU Excs Refund rows
    # in the Payments block — captured for reconciliation with HMRC annual
    # figures (P60 + Individual Tax API).
    ytd_rsu_tax_offset: Decimal | None = None
    ytd_rsu_excs_refund: Decimal | None = None
    other_deductions: dict[str, Decimal] = Field(default_factory=dict)
    net_pay: Decimal


class WebhookPayload(BaseModel):
    model_config = ConfigDict(extra="forbid")

    document_id: int


def validate_totals(p: ExtractedPayslip) -> bool:
    """Check that gross - deductions ≈ net within a 2p tolerance.

    - Employer pension is excluded — it never leaves the employer's books.
    - `rsu_offset` is included as a deduction: it's the line that nets
      the RSU notional back out of cash pay on UK payslips with stock comp.
      The gross + rsu_vest inflation is offset by rsu_offset of equal size.
      Meta's template doesn't carry rsu_offset — the grossing happens via
      Taxable Pay and PAYE, so `gross_pay` already excludes the RSU uplift.
    """
    deductions = (p.income_tax + p.national_insurance + p.pension_employee + p.student_loan +
                  p.rsu_offset + sum(p.other_deductions.values(), start=Decimal("0")))
    diff = abs(p.gross_pay - deductions - p.net_pay)
    return diff < TOTALS_TOLERANCE