payslip-ingest/payslip_ingest/db.py
Viktor Barzin 3a62a38069 rsu_vest_events: schema + ORM for Schwab vest ground truth (Phase D)
Migration 0008 + ORM model for payslip_ingest.rsu_vest_events.

Purpose: broker-sync (separate repo) will parse Schwab "Release
Confirmation" emails and populate this table, enabling Panel 15 of
the UK payslip dashboard to reconcile:
  payslip.rsu_vest   ↔ SUM(rsu_vest_events.gross_value_gbp)
  RSU-attributed PAYE ↔ SUM(rsu_vest_events.tax_withheld_gbp)

Schema carries both the raw USD figures (fmv_at_vest_usd,
tax_withheld_usd, shares_*) and the GBP-translated values
(gross_value_gbp, tax_withheld_gbp) plus the FX rate used — the
dashboard joins on GBP, audits keep USD.

Idempotent on `external_id` — broker-sync emits a stable
`schwab:{date}:{ticker}:VEST:{shares_vested}` for each vest event.

The broker-sync postgres sink that writes here is pending a real email
fixture (current parser is heuristic-only) and a cross-service DB grant
for broker-sync's K8s ServiceAccount. Follow-up under code-860.

Part of: code-860
2026-04-19 18:27:41 +00:00

162 lines
9.2 KiB
Python

import os
from datetime import date, datetime
from decimal import Decimal
from typing import Any
from sqlalchemy import JSON, TIMESTAMP, Boolean, Date, Integer, Numeric, String, text
from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy.ext.asyncio import AsyncEngine, async_sessionmaker, create_async_engine
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
SCHEMA_NAME = "payslip_ingest"
class Base(DeclarativeBase):
pass
# JSONB on Postgres, plain JSON (as text) on SQLite — tests use SQLite, prod uses Postgres.
JSON_TYPE = JSONB().with_variant(JSON(), "sqlite")
class Payslip(Base):
__tablename__ = "payslip"
__table_args__ = {"schema": SCHEMA_NAME} # noqa: RUF012
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
paperless_doc_id: Mapped[int] = mapped_column(Integer, unique=True, nullable=False)
pay_date: Mapped[date] = mapped_column(Date, nullable=False)
pay_period_start: Mapped[date | None] = mapped_column(Date, nullable=True)
pay_period_end: Mapped[date | None] = mapped_column(Date, nullable=True)
employer: Mapped[str | None] = mapped_column(String, nullable=True)
currency: Mapped[str] = mapped_column(String(3), nullable=False, server_default="GBP")
gross_pay: Mapped[Decimal] = mapped_column(Numeric(12, 2), nullable=False)
income_tax: Mapped[Decimal] = mapped_column(Numeric(12, 2),
nullable=False,
server_default=text("0"))
national_insurance: Mapped[Decimal] = mapped_column(Numeric(12, 2),
nullable=False,
server_default=text("0"))
pension_employee: Mapped[Decimal] = mapped_column(Numeric(12, 2),
nullable=False,
server_default=text("0"))
pension_employer: Mapped[Decimal] = mapped_column(Numeric(12, 2),
nullable=False,
server_default=text("0"))
student_loan: Mapped[Decimal] = mapped_column(Numeric(12, 2),
nullable=False,
server_default=text("0"))
rsu_vest: Mapped[Decimal] = mapped_column(Numeric(12, 2),
nullable=False,
server_default=text("0"))
rsu_offset: Mapped[Decimal] = mapped_column(Numeric(12, 2),
nullable=False,
server_default=text("0"))
salary: Mapped[Decimal] = mapped_column(Numeric(12, 2),
nullable=False,
server_default=text("0"))
bonus: Mapped[Decimal] = mapped_column(Numeric(12, 2), nullable=False, server_default=text("0"))
pension_sacrifice: Mapped[Decimal] = mapped_column(Numeric(12, 2),
nullable=False,
server_default=text("0"))
taxable_pay: Mapped[Decimal | None] = mapped_column(Numeric(12, 2), nullable=True)
ytd_tax_paid: Mapped[Decimal | None] = mapped_column(Numeric(12, 2), nullable=True)
ytd_taxable_pay: Mapped[Decimal | None] = mapped_column(Numeric(12, 2), nullable=True)
ytd_gross: Mapped[Decimal | None] = mapped_column(Numeric(12, 2), nullable=True)
cash_income_tax: Mapped[Decimal | None] = mapped_column(Numeric(12, 2), nullable=True)
cash_income_tax_source: Mapped[str | None] = mapped_column(String(16), nullable=True)
ytd_rsu_tax_offset: Mapped[Decimal | None] = mapped_column(Numeric(12, 2), nullable=True)
ytd_rsu_excs_refund: Mapped[Decimal | None] = mapped_column(Numeric(12, 2), nullable=True)
other_deductions: Mapped[dict[str, Any] | None] = mapped_column(JSON_TYPE, nullable=True)
net_pay: Mapped[Decimal] = mapped_column(Numeric(12, 2), nullable=False)
tax_year: Mapped[str] = mapped_column(String, nullable=False)
raw_extraction: Mapped[dict[str, Any]] = mapped_column(JSON_TYPE, nullable=False)
validated: Mapped[bool] = mapped_column(Boolean, nullable=False, server_default=text("true"))
created_at: Mapped[datetime] = mapped_column(TIMESTAMP(timezone=True),
nullable=False,
server_default=text("now()"))
class ExternalMetaDeposit(Base):
"""Meta payroll deposit as recorded by ActualBudget — ground-truth against
`payslip.net_pay`. Synced daily by a CronJob that reads from the
jhonderson/actual-http-api sidecar.
Idempotent on `actualbudget_tx_id` — same transaction id from AB means the
same deposit, re-runs are no-ops. Deletions in AB are not propagated.
"""
__tablename__ = "external_meta_deposits"
__table_args__ = {"schema": SCHEMA_NAME} # noqa: RUF012
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
actualbudget_tx_id: Mapped[str] = mapped_column(String, unique=True, nullable=False)
deposit_date: Mapped[date] = mapped_column(Date, nullable=False)
amount: Mapped[Decimal] = mapped_column(Numeric(12, 2), nullable=False)
payee: Mapped[str | None] = mapped_column(String, nullable=True)
memo: Mapped[str | None] = mapped_column(String, nullable=True)
synced_at: Mapped[datetime] = mapped_column(TIMESTAMP(timezone=True),
nullable=False,
server_default=text("now()"))
class RsuVestEvent(Base):
"""Schwab RSU vest event — ground truth against payslip.rsu_vest.
One row per vest. `external_id` is stable across IMAP re-runs
(`schwab:{date}:{ticker}:VEST:{shares_vested}`). USD → GBP conversion
happens at write time using the daily ECB rate.
"""
__tablename__ = "rsu_vest_events"
__table_args__ = {"schema": SCHEMA_NAME} # noqa: RUF012
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
external_id: Mapped[str] = mapped_column(String, unique=True, nullable=False)
vest_date: Mapped[date] = mapped_column(Date, nullable=False)
ticker: Mapped[str] = mapped_column(String, nullable=False)
shares_vested: Mapped[Decimal] = mapped_column(Numeric(14, 4), nullable=False)
shares_sold_to_cover: Mapped[Decimal | None] = mapped_column(Numeric(14, 4), nullable=True)
fmv_at_vest_usd: Mapped[Decimal] = mapped_column(Numeric(12, 4), nullable=False)
tax_withheld_usd: Mapped[Decimal | None] = mapped_column(Numeric(12, 2), nullable=True)
fx_rate_gbp: Mapped[Decimal | None] = mapped_column(Numeric(10, 6), nullable=True)
gross_value_gbp: Mapped[Decimal | None] = mapped_column(Numeric(12, 2), nullable=True)
tax_withheld_gbp: Mapped[Decimal | None] = mapped_column(Numeric(12, 2), nullable=True)
source: Mapped[str] = mapped_column(String(32), nullable=False)
raw_extraction: Mapped[dict[str, Any] | None] = mapped_column(JSON_TYPE, nullable=True)
created_at: Mapped[datetime] = mapped_column(TIMESTAMP(timezone=True),
nullable=False,
server_default=text("now()"))
class P60Reference(Base):
"""HMRC-issued annual P60. One row per (tax_year, employer).
Source of truth for annual PAYE/NI — lets the dashboard reconcile
`SUM(payslip_ingest.payslip)` against the figures HMRC actually has on
file, catching both missing-month gaps and parser drift.
"""
__tablename__ = "p60_reference"
__table_args__ = {"schema": SCHEMA_NAME} # noqa: RUF012
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
tax_year: Mapped[str] = mapped_column(String, nullable=False, index=True)
employer: Mapped[str] = mapped_column(String, nullable=False)
employer_paye_ref: Mapped[str | None] = mapped_column(String, nullable=True)
gross_pay: Mapped[Decimal] = mapped_column(Numeric(12, 2), nullable=False)
income_tax: Mapped[Decimal] = mapped_column(Numeric(12, 2), nullable=False)
national_insurance: Mapped[Decimal] = mapped_column(Numeric(12, 2), nullable=False)
student_loan: Mapped[Decimal | None] = mapped_column(Numeric(12, 2), nullable=True)
tax_code: Mapped[str | None] = mapped_column(String, nullable=True)
paperless_doc_id: Mapped[int] = mapped_column(Integer, unique=True, nullable=False)
raw_extraction: Mapped[dict[str, Any]] = mapped_column(JSON_TYPE, nullable=False)
created_at: Mapped[datetime] = mapped_column(TIMESTAMP(timezone=True),
nullable=False,
server_default=text("now()"))
def create_engine_from_env() -> AsyncEngine:
url = os.environ["DB_CONNECTION_STRING"]
return create_async_engine(url, pool_pre_ping=True)
def make_session_factory(engine: AsyncEngine) -> async_sessionmaker[Any]:
return async_sessionmaker(engine, expire_on_commit=False)