payslip-ingest/payslip_ingest/db.py

import os
from datetime import date, datetime
from decimal import Decimal
from typing import Any

from sqlalchemy import JSON, TIMESTAMP, Boolean, Date, Integer, Numeric, String, text
from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy.ext.asyncio import AsyncEngine, async_sessionmaker, create_async_engine
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column

SCHEMA_NAME = "payslip_ingest"


class Base(DeclarativeBase):
    pass


# JSONB on Postgres, plain JSON (as text) on SQLite — tests use SQLite, prod uses Postgres.
JSON_TYPE = JSONB().with_variant(JSON(), "sqlite")


class Payslip(Base):
    __tablename__ = "payslip"
    __table_args__ = {"schema": SCHEMA_NAME}  # noqa: RUF012

    id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
    paperless_doc_id: Mapped[int] = mapped_column(Integer, unique=True, nullable=False)
    pay_date: Mapped[date] = mapped_column(Date, nullable=False)
    pay_period_start: Mapped[date | None] = mapped_column(Date, nullable=True)
    pay_period_end: Mapped[date | None] = mapped_column(Date, nullable=True)
    employer: Mapped[str | None] = mapped_column(String, nullable=True)
    currency: Mapped[str] = mapped_column(String(3), nullable=False, server_default="GBP")
    gross_pay: Mapped[Decimal] = mapped_column(Numeric(12, 2), nullable=False)
    income_tax: Mapped[Decimal] = mapped_column(Numeric(12, 2),
                                                nullable=False,
                                                server_default=text("0"))
    national_insurance: Mapped[Decimal] = mapped_column(Numeric(12, 2),
                                                        nullable=False,
                                                        server_default=text("0"))
    pension_employee: Mapped[Decimal] = mapped_column(Numeric(12, 2),
                                                      nullable=False,
                                                      server_default=text("0"))
    pension_employer: Mapped[Decimal] = mapped_column(Numeric(12, 2),
                                                      nullable=False,
                                                      server_default=text("0"))
    student_loan: Mapped[Decimal] = mapped_column(Numeric(12, 2),
                                                  nullable=False,
                                                  server_default=text("0"))
    rsu_vest: Mapped[Decimal] = mapped_column(Numeric(12, 2),
                                              nullable=False,
                                              server_default=text("0"))
    rsu_offset: Mapped[Decimal] = mapped_column(Numeric(12, 2),
                                                nullable=False,
                                                server_default=text("0"))
    salary: Mapped[Decimal] = mapped_column(Numeric(12, 2),
                                            nullable=False,
                                            server_default=text("0"))
    bonus: Mapped[Decimal] = mapped_column(Numeric(12, 2), nullable=False, server_default=text("0"))
    pension_sacrifice: Mapped[Decimal] = mapped_column(Numeric(12, 2),
                                                       nullable=False,
                                                       server_default=text("0"))
    taxable_pay: Mapped[Decimal | None] = mapped_column(Numeric(12, 2), nullable=True)
    ytd_tax_paid: Mapped[Decimal | None] = mapped_column(Numeric(12, 2), nullable=True)
    ytd_taxable_pay: Mapped[Decimal | None] = mapped_column(Numeric(12, 2), nullable=True)
    ytd_gross: Mapped[Decimal | None] = mapped_column(Numeric(12, 2), nullable=True)
    cash_income_tax: Mapped[Decimal | None] = mapped_column(Numeric(12, 2), nullable=True)
    cash_income_tax_source: Mapped[str | None] = mapped_column(String(16), nullable=True)
    ytd_rsu_tax_offset: Mapped[Decimal | None] = mapped_column(Numeric(12, 2), nullable=True)
    ytd_rsu_excs_refund: Mapped[Decimal | None] = mapped_column(Numeric(12, 2), nullable=True)
    other_deductions: Mapped[dict[str, Any] | None] = mapped_column(JSON_TYPE, nullable=True)
    net_pay: Mapped[Decimal] = mapped_column(Numeric(12, 2), nullable=False)
    tax_year: Mapped[str] = mapped_column(String, nullable=False)
    raw_extraction: Mapped[dict[str, Any]] = mapped_column(JSON_TYPE, nullable=False)
    validated: Mapped[bool] = mapped_column(Boolean, nullable=False, server_default=text("true"))
    created_at: Mapped[datetime] = mapped_column(TIMESTAMP(timezone=True),
                                                 nullable=False,
                                                 server_default=text("now()"))


class ExternalMetaDeposit(Base):
    """Meta payroll deposit as recorded by ActualBudget — ground-truth against
    `payslip.net_pay`. Synced daily by a CronJob that reads from the
    jhonderson/actual-http-api sidecar.

    Idempotent on `actualbudget_tx_id` — same transaction id from AB means the
    same deposit, re-runs are no-ops. Deletions in AB are not propagated.
    """
    __tablename__ = "external_meta_deposits"
    __table_args__ = {"schema": SCHEMA_NAME}  # noqa: RUF012

    id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
    actualbudget_tx_id: Mapped[str] = mapped_column(String, unique=True, nullable=False)
    deposit_date: Mapped[date] = mapped_column(Date, nullable=False)
    amount: Mapped[Decimal] = mapped_column(Numeric(12, 2), nullable=False)
    payee: Mapped[str | None] = mapped_column(String, nullable=True)
    memo: Mapped[str | None] = mapped_column(String, nullable=True)
    synced_at: Mapped[datetime] = mapped_column(TIMESTAMP(timezone=True),
                                                nullable=False,
                                                server_default=text("now()"))


class RsuVestEvent(Base):
    """Schwab RSU vest event — ground truth against payslip.rsu_vest.

    One row per vest. `external_id` is stable across IMAP re-runs
    (`schwab:{date}:{ticker}:VEST:{shares_vested}`). USD → GBP conversion
    happens at write time using the daily ECB rate.
    """
    __tablename__ = "rsu_vest_events"
    __table_args__ = {"schema": SCHEMA_NAME}  # noqa: RUF012

    id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
    external_id: Mapped[str] = mapped_column(String, unique=True, nullable=False)
    vest_date: Mapped[date] = mapped_column(Date, nullable=False)
    ticker: Mapped[str] = mapped_column(String, nullable=False)
    shares_vested: Mapped[Decimal] = mapped_column(Numeric(14, 4), nullable=False)
    shares_sold_to_cover: Mapped[Decimal | None] = mapped_column(Numeric(14, 4), nullable=True)
    fmv_at_vest_usd: Mapped[Decimal] = mapped_column(Numeric(12, 4), nullable=False)
    tax_withheld_usd: Mapped[Decimal | None] = mapped_column(Numeric(12, 2), nullable=True)
    fx_rate_gbp: Mapped[Decimal | None] = mapped_column(Numeric(10, 6), nullable=True)
    gross_value_gbp: Mapped[Decimal | None] = mapped_column(Numeric(12, 2), nullable=True)
    tax_withheld_gbp: Mapped[Decimal | None] = mapped_column(Numeric(12, 2), nullable=True)
    source: Mapped[str] = mapped_column(String(32), nullable=False)
    raw_extraction: Mapped[dict[str, Any] | None] = mapped_column(JSON_TYPE, nullable=True)
    created_at: Mapped[datetime] = mapped_column(TIMESTAMP(timezone=True),
                                                 nullable=False,
                                                 server_default=text("now()"))


class P60Reference(Base):
    """HMRC-issued annual P60. One row per (tax_year, employer).

    Source of truth for annual PAYE/NI — lets the dashboard reconcile
    `SUM(payslip_ingest.payslip)` against the figures HMRC actually has on
    file, catching both missing-month gaps and parser drift.
    """
    __tablename__ = "p60_reference"
    __table_args__ = {"schema": SCHEMA_NAME}  # noqa: RUF012

    id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
    tax_year: Mapped[str] = mapped_column(String, nullable=False, index=True)
    employer: Mapped[str] = mapped_column(String, nullable=False)
    employer_paye_ref: Mapped[str | None] = mapped_column(String, nullable=True)
    gross_pay: Mapped[Decimal] = mapped_column(Numeric(12, 2), nullable=False)
    income_tax: Mapped[Decimal] = mapped_column(Numeric(12, 2), nullable=False)
    national_insurance: Mapped[Decimal] = mapped_column(Numeric(12, 2), nullable=False)
    student_loan: Mapped[Decimal | None] = mapped_column(Numeric(12, 2), nullable=True)
    tax_code: Mapped[str | None] = mapped_column(String, nullable=True)
    paperless_doc_id: Mapped[int] = mapped_column(Integer, unique=True, nullable=False)
    raw_extraction: Mapped[dict[str, Any]] = mapped_column(JSON_TYPE, nullable=False)
    created_at: Mapped[datetime] = mapped_column(TIMESTAMP(timezone=True),
                                                 nullable=False,
                                                 server_default=text("now()"))


def create_engine_from_env() -> AsyncEngine:
    url = os.environ["DB_CONNECTION_STRING"]
    return create_async_engine(url, pool_pre_ping=True)


def make_session_factory(engine: AsyncEngine) -> async_sessionmaker[Any]:
    return async_sessionmaker(engine, expire_on_commit=False)