extractor: track rsu_vest + rsu_offset separately from cash pay

UK payslips for equity-comp employees report RSU vests as notional pay
for HMRC only. A paired same-magnitude deduction (Shares Retained /
Stock Tax Withholding / RSU Offset) nets it back out of cash. The UK
payslip's income_tax line shows tax on the grossed-up total, but the
actual RSU tax is handled by Schwab (US broker) via share sale. No
cash flows through UK payroll for RSU.

Previously the extractor folded RSU notional into gross_pay and
income_tax, which inflated the dashboard numbers — a payslip with
£25k RSU vest looked like 2x salary with 80% tax rate.

Changes:
- schema: add rsu_vest + rsu_offset fields (default 0).
- db + alembic 0002: add two new NUMERIC(12,2) columns with server
  default 0 (backward-compatible; existing rows get 0).
- validate_totals: include rsu_offset in deductions sum so the
  gross + rsu_vest inflation is properly netted out.
- extraction prompt: explicit rules for identifying RSU lines by the
  common Meta/Sage/Workday labels, and to NOT put them in
  other_deductions.

Dashboards in a follow-up commit: cash_gross = gross_pay - rsu_vest,
effective tax rate based on cash metrics.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Viktor Barzin 2026-04-18 23:37:25 +00:00
parent 86cac65572
commit 9105b6b79d
6 changed files with 72 additions and 3 deletions

View file

@ -0,0 +1,33 @@
"""Add rsu_vest and rsu_offset columns.
UK payslips for Meta report RSU grants as notional pay (gross inflation)
and offset them via a same-magnitude deduction. The cash gross Viktor
cares about for dashboarding is gross_pay - rsu_vest. Track both for
reporting + exactness; cash and tax-rate charts compute from them.
"""
import sqlalchemy as sa
from alembic import op
revision = "0002_add_rsu_columns"
down_revision = "0001_initial"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.add_column(
"payslip",
sa.Column("rsu_vest", sa.Numeric(12, 2), nullable=False, server_default=sa.text("0")),
schema="payslip_ingest",
)
op.add_column(
"payslip",
sa.Column("rsu_offset", sa.Numeric(12, 2), nullable=False, server_default=sa.text("0")),
schema="payslip_ingest",
)
def downgrade() -> None:
op.drop_column("payslip", "rsu_offset", schema="payslip_ingest")
op.drop_column("payslip", "rsu_vest", schema="payslip_ingest")

View file

@ -46,6 +46,12 @@ class Payslip(Base):
student_loan: Mapped[Decimal] = mapped_column(Numeric(12, 2),
nullable=False,
server_default=text("0"))
rsu_vest: Mapped[Decimal] = mapped_column(Numeric(12, 2),
nullable=False,
server_default=text("0"))
rsu_offset: Mapped[Decimal] = mapped_column(Numeric(12, 2),
nullable=False,
server_default=text("0"))
other_deductions: Mapped[dict[str, Any] | None] = mapped_column(JSON_TYPE, nullable=True)
net_pay: Mapped[Decimal] = mapped_column(Numeric(12, 2), nullable=False)
tax_year: Mapped[str] = mapped_column(String, nullable=False)

View file

@ -30,6 +30,8 @@ EXTRACTION_PROMPT = (
' "pension_employee": number,\n'
' "pension_employer": number,\n'
' "student_loan": number,\n'
' "rsu_vest": number,\n'
' "rsu_offset": number,\n'
' "other_deductions": {"label": number, ...},\n'
' "net_pay": number\n'
"}\n"
@ -37,8 +39,19 @@ EXTRACTION_PROMPT = (
"Rules:\n"
"- Report numbers as the payslip shows them; do not compute sums.\n"
"- Unknown numeric fields → 0, not null.\n"
"- `rsu_vest`: any notional/reporting entry in the EARNINGS block labelled "
'"RSU Vest", "Restricted Stock Units", "Stock Value", "Notional Pay", '
'"Share Award", "Equity Vest", "GSU Vest". For Meta UK payslips this is '
"the grossed-up RSU value reported for HMRC only; Schwab handles actual "
"tax withholding via share sale.\n"
"- `rsu_offset`: the matching DEDUCTION that nets the RSU out of cash pay — "
'labels vary: "Shares Retained", "Stock Tax Withholding", "RSU Offset", '
'"Notional Pay Offset", "Shares Withheld". For Meta this is typically equal '
"in magnitude to rsu_vest so cash net is unaffected.\n"
"- If either rsu_vest or rsu_offset is present, BOTH should be populated; "
"do NOT put them in `other_deductions`.\n"
"- `other_deductions` covers cycle-to-work, share-save, benefits-in-kind, court orders, "
"anything not in the main fields.\n"
"anything not in the main fields (and NOT RSU — those have dedicated fields).\n"
"- All money in GBP unless the payslip is denominated otherwise.\n"
'- If a field\'s value is ambiguous, pick the value from the "this period" column, not YTD.')

View file

@ -107,6 +107,8 @@ async def _insert_payslip(
pension_employee=extracted.pension_employee,
pension_employer=extracted.pension_employer,
student_loan=extracted.student_loan,
rsu_vest=extracted.rsu_vest,
rsu_offset=extracted.rsu_offset,
other_deductions=_decimals_to_float(extracted.other_deductions),
net_pay=extracted.net_pay,
tax_year=derive_tax_year(extracted.pay_date),

View file

@ -20,6 +20,16 @@ class ExtractedPayslip(BaseModel):
pension_employee: Decimal = Field(default=Decimal("0"))
pension_employer: Decimal = Field(default=Decimal("0"))
student_loan: Decimal = Field(default=Decimal("0"))
# RSU vest reported on the UK payslip is notional — the share grant is
# handled by Schwab which withholds US-side tax by selling shares. The
# UK payslip only lists it for HMRC reporting; no cash flows through
# UK payroll. Track it separately so dashboards can derive cash-only
# gross = gross_pay - rsu_vest.
rsu_vest: Decimal = Field(default=Decimal("0"))
# Corresponding offset deduction that nets the RSU out of cash pay on the
# UK slip (labels vary: "Shares Retained", "Stock Tax Withholding",
# "RSU Offset", "Notional Pay Offset"). Same as rsu_vest in magnitude.
rsu_offset: Decimal = Field(default=Decimal("0"))
other_deductions: dict[str, Decimal] = Field(default_factory=dict)
net_pay: Decimal
@ -33,10 +43,13 @@ class WebhookPayload(BaseModel):
def validate_totals(p: ExtractedPayslip) -> bool:
"""Check that gross - deductions ≈ net within a 2p tolerance.
Employer pension is excluded it never leaves the employer's books and
doesn't affect take-home pay arithmetic.
- Employer pension is excluded it never leaves the employer's books.
- `rsu_offset` is included as a deduction: it's the line that nets
the RSU notional back out of cash pay on UK payslips with stock comp.
The gross + rsu_vest inflation is offset by rsu_offset of equal size.
"""
deductions = (p.income_tax + p.national_insurance + p.pension_employee + p.student_loan +
p.rsu_offset +
sum(p.other_deductions.values(), start=Decimal("0")))
diff = abs(p.gross_pay - deductions - p.net_pay)
return diff < TOTALS_TOLERANCE

View file

@ -22,6 +22,8 @@ def _sample_extraction() -> ExtractedPayslip:
pension_employee=Decimal("250.00"),
pension_employer=Decimal("150.00"),
student_loan=Decimal("100.00"),
rsu_vest=Decimal("0.00"),
rsu_offset=Decimal("0.00"),
other_deductions={"cycle_to_work": Decimal("50.00")},
net_pay=Decimal("3450.00"),
)