UK payslips for equity-comp employees report RSU vests as notional pay for HMRC only. A paired same-magnitude deduction (Shares Retained / Stock Tax Withholding / RSU Offset) nets it back out of cash. The UK payslip's income_tax line shows tax on the grossed-up total, but the actual RSU tax is handled by Schwab (US broker) via share sale. No cash flows through UK payroll for RSU. Previously the extractor folded RSU notional into gross_pay and income_tax, which inflated the dashboard numbers — a payslip with £25k RSU vest looked like 2x salary with 80% tax rate. Changes: - schema: add rsu_vest + rsu_offset fields (default 0). - db + alembic 0002: add two new NUMERIC(12,2) columns with server default 0 (backward-compatible; existing rows get 0). - validate_totals: include rsu_offset in deductions sum so the gross + rsu_vest inflation is properly netted out. - extraction prompt: explicit rules for identifying RSU lines by the common Meta/Sage/Workday labels, and to NOT put them in other_deductions. Dashboards in a follow-up commit: cash_gross = gross_pay - rsu_vest, effective tax rate based on cash metrics. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
71 lines
3.7 KiB
Python
71 lines
3.7 KiB
Python
import os
|
|
from datetime import date, datetime
|
|
from decimal import Decimal
|
|
from typing import Any
|
|
|
|
from sqlalchemy import JSON, TIMESTAMP, Boolean, Date, Integer, Numeric, String, text
|
|
from sqlalchemy.dialects.postgresql import JSONB
|
|
from sqlalchemy.ext.asyncio import AsyncEngine, async_sessionmaker, create_async_engine
|
|
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
|
|
|
|
SCHEMA_NAME = "payslip_ingest"
|
|
|
|
|
|
class Base(DeclarativeBase):
|
|
pass
|
|
|
|
|
|
# JSONB on Postgres, plain JSON (as text) on SQLite — tests use SQLite, prod uses Postgres.
|
|
JSON_TYPE = JSONB().with_variant(JSON(), "sqlite")
|
|
|
|
|
|
class Payslip(Base):
|
|
__tablename__ = "payslip"
|
|
__table_args__ = {"schema": SCHEMA_NAME} # noqa: RUF012
|
|
|
|
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
|
paperless_doc_id: Mapped[int] = mapped_column(Integer, unique=True, nullable=False)
|
|
pay_date: Mapped[date] = mapped_column(Date, nullable=False)
|
|
pay_period_start: Mapped[date | None] = mapped_column(Date, nullable=True)
|
|
pay_period_end: Mapped[date | None] = mapped_column(Date, nullable=True)
|
|
employer: Mapped[str | None] = mapped_column(String, nullable=True)
|
|
currency: Mapped[str] = mapped_column(String(3), nullable=False, server_default="GBP")
|
|
gross_pay: Mapped[Decimal] = mapped_column(Numeric(12, 2), nullable=False)
|
|
income_tax: Mapped[Decimal] = mapped_column(Numeric(12, 2),
|
|
nullable=False,
|
|
server_default=text("0"))
|
|
national_insurance: Mapped[Decimal] = mapped_column(Numeric(12, 2),
|
|
nullable=False,
|
|
server_default=text("0"))
|
|
pension_employee: Mapped[Decimal] = mapped_column(Numeric(12, 2),
|
|
nullable=False,
|
|
server_default=text("0"))
|
|
pension_employer: Mapped[Decimal] = mapped_column(Numeric(12, 2),
|
|
nullable=False,
|
|
server_default=text("0"))
|
|
student_loan: Mapped[Decimal] = mapped_column(Numeric(12, 2),
|
|
nullable=False,
|
|
server_default=text("0"))
|
|
rsu_vest: Mapped[Decimal] = mapped_column(Numeric(12, 2),
|
|
nullable=False,
|
|
server_default=text("0"))
|
|
rsu_offset: Mapped[Decimal] = mapped_column(Numeric(12, 2),
|
|
nullable=False,
|
|
server_default=text("0"))
|
|
other_deductions: Mapped[dict[str, Any] | None] = mapped_column(JSON_TYPE, nullable=True)
|
|
net_pay: Mapped[Decimal] = mapped_column(Numeric(12, 2), nullable=False)
|
|
tax_year: Mapped[str] = mapped_column(String, nullable=False)
|
|
raw_extraction: Mapped[dict[str, Any]] = mapped_column(JSON_TYPE, nullable=False)
|
|
validated: Mapped[bool] = mapped_column(Boolean, nullable=False, server_default=text("true"))
|
|
created_at: Mapped[datetime] = mapped_column(TIMESTAMP(timezone=True),
|
|
nullable=False,
|
|
server_default=text("now()"))
|
|
|
|
|
|
def create_engine_from_env() -> AsyncEngine:
|
|
url = os.environ["DB_CONNECTION_STRING"]
|
|
return create_async_engine(url, pool_pre_ping=True)
|
|
|
|
|
|
def make_session_factory(engine: AsyncEngine) -> async_sessionmaker[Any]:
|
|
return async_sessionmaker(engine, expire_on_commit=False)
|