import os from datetime import date, datetime from decimal import Decimal from typing import Any from sqlalchemy import JSON, TIMESTAMP, Boolean, Date, Integer, Numeric, String, text from sqlalchemy.dialects.postgresql import JSONB from sqlalchemy.ext.asyncio import AsyncEngine, async_sessionmaker, create_async_engine from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column SCHEMA_NAME = "payslip_ingest" class Base(DeclarativeBase): pass # JSONB on Postgres, plain JSON (as text) on SQLite — tests use SQLite, prod uses Postgres. JSON_TYPE = JSONB().with_variant(JSON(), "sqlite") class Payslip(Base): __tablename__ = "payslip" __table_args__ = {"schema": SCHEMA_NAME} # noqa: RUF012 id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) paperless_doc_id: Mapped[int] = mapped_column(Integer, unique=True, nullable=False) pay_date: Mapped[date] = mapped_column(Date, nullable=False) pay_period_start: Mapped[date | None] = mapped_column(Date, nullable=True) pay_period_end: Mapped[date | None] = mapped_column(Date, nullable=True) employer: Mapped[str | None] = mapped_column(String, nullable=True) currency: Mapped[str] = mapped_column(String(3), nullable=False, server_default="GBP") gross_pay: Mapped[Decimal] = mapped_column(Numeric(12, 2), nullable=False) income_tax: Mapped[Decimal] = mapped_column(Numeric(12, 2), nullable=False, server_default=text("0")) national_insurance: Mapped[Decimal] = mapped_column(Numeric(12, 2), nullable=False, server_default=text("0")) pension_employee: Mapped[Decimal] = mapped_column(Numeric(12, 2), nullable=False, server_default=text("0")) pension_employer: Mapped[Decimal] = mapped_column(Numeric(12, 2), nullable=False, server_default=text("0")) student_loan: Mapped[Decimal] = mapped_column(Numeric(12, 2), nullable=False, server_default=text("0")) rsu_vest: Mapped[Decimal] = mapped_column(Numeric(12, 2), nullable=False, server_default=text("0")) rsu_offset: Mapped[Decimal] = mapped_column(Numeric(12, 2), nullable=False, server_default=text("0")) salary: Mapped[Decimal] = mapped_column(Numeric(12, 2), nullable=False, server_default=text("0")) bonus: Mapped[Decimal] = mapped_column(Numeric(12, 2), nullable=False, server_default=text("0")) pension_sacrifice: Mapped[Decimal] = mapped_column(Numeric(12, 2), nullable=False, server_default=text("0")) taxable_pay: Mapped[Decimal | None] = mapped_column(Numeric(12, 2), nullable=True) ytd_tax_paid: Mapped[Decimal | None] = mapped_column(Numeric(12, 2), nullable=True) ytd_taxable_pay: Mapped[Decimal | None] = mapped_column(Numeric(12, 2), nullable=True) ytd_gross: Mapped[Decimal | None] = mapped_column(Numeric(12, 2), nullable=True) cash_income_tax: Mapped[Decimal | None] = mapped_column(Numeric(12, 2), nullable=True) cash_income_tax_source: Mapped[str | None] = mapped_column(String(16), nullable=True) ytd_rsu_tax_offset: Mapped[Decimal | None] = mapped_column(Numeric(12, 2), nullable=True) ytd_rsu_excs_refund: Mapped[Decimal | None] = mapped_column(Numeric(12, 2), nullable=True) other_deductions: Mapped[dict[str, Any] | None] = mapped_column(JSON_TYPE, nullable=True) net_pay: Mapped[Decimal] = mapped_column(Numeric(12, 2), nullable=False) tax_year: Mapped[str] = mapped_column(String, nullable=False) raw_extraction: Mapped[dict[str, Any]] = mapped_column(JSON_TYPE, nullable=False) validated: Mapped[bool] = mapped_column(Boolean, nullable=False, server_default=text("true")) created_at: Mapped[datetime] = mapped_column(TIMESTAMP(timezone=True), nullable=False, server_default=text("now()")) class ExternalMetaDeposit(Base): """Meta payroll deposit as recorded by ActualBudget — ground-truth against `payslip.net_pay`. Synced daily by a CronJob that reads from the jhonderson/actual-http-api sidecar. Idempotent on `actualbudget_tx_id` — same transaction id from AB means the same deposit, re-runs are no-ops. Deletions in AB are not propagated. """ __tablename__ = "external_meta_deposits" __table_args__ = {"schema": SCHEMA_NAME} # noqa: RUF012 id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) actualbudget_tx_id: Mapped[str] = mapped_column(String, unique=True, nullable=False) deposit_date: Mapped[date] = mapped_column(Date, nullable=False) amount: Mapped[Decimal] = mapped_column(Numeric(12, 2), nullable=False) payee: Mapped[str | None] = mapped_column(String, nullable=True) memo: Mapped[str | None] = mapped_column(String, nullable=True) synced_at: Mapped[datetime] = mapped_column(TIMESTAMP(timezone=True), nullable=False, server_default=text("now()")) class RsuVestEvent(Base): """Schwab RSU vest event — ground truth against payslip.rsu_vest. One row per vest. `external_id` is stable across IMAP re-runs (`schwab:{date}:{ticker}:VEST:{shares_vested}`). USD → GBP conversion happens at write time using the daily ECB rate. """ __tablename__ = "rsu_vest_events" __table_args__ = {"schema": SCHEMA_NAME} # noqa: RUF012 id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) external_id: Mapped[str] = mapped_column(String, unique=True, nullable=False) vest_date: Mapped[date] = mapped_column(Date, nullable=False) ticker: Mapped[str] = mapped_column(String, nullable=False) shares_vested: Mapped[Decimal] = mapped_column(Numeric(14, 4), nullable=False) shares_sold_to_cover: Mapped[Decimal | None] = mapped_column(Numeric(14, 4), nullable=True) fmv_at_vest_usd: Mapped[Decimal] = mapped_column(Numeric(12, 4), nullable=False) tax_withheld_usd: Mapped[Decimal | None] = mapped_column(Numeric(12, 2), nullable=True) fx_rate_gbp: Mapped[Decimal | None] = mapped_column(Numeric(10, 6), nullable=True) gross_value_gbp: Mapped[Decimal | None] = mapped_column(Numeric(12, 2), nullable=True) tax_withheld_gbp: Mapped[Decimal | None] = mapped_column(Numeric(12, 2), nullable=True) source: Mapped[str] = mapped_column(String(32), nullable=False) raw_extraction: Mapped[dict[str, Any] | None] = mapped_column(JSON_TYPE, nullable=True) created_at: Mapped[datetime] = mapped_column(TIMESTAMP(timezone=True), nullable=False, server_default=text("now()")) class P60Reference(Base): """HMRC-issued annual P60. One row per (tax_year, employer). Source of truth for annual PAYE/NI — lets the dashboard reconcile `SUM(payslip_ingest.payslip)` against the figures HMRC actually has on file, catching both missing-month gaps and parser drift. """ __tablename__ = "p60_reference" __table_args__ = {"schema": SCHEMA_NAME} # noqa: RUF012 id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) tax_year: Mapped[str] = mapped_column(String, nullable=False, index=True) employer: Mapped[str] = mapped_column(String, nullable=False) employer_paye_ref: Mapped[str | None] = mapped_column(String, nullable=True) gross_pay: Mapped[Decimal] = mapped_column(Numeric(12, 2), nullable=False) income_tax: Mapped[Decimal] = mapped_column(Numeric(12, 2), nullable=False) national_insurance: Mapped[Decimal] = mapped_column(Numeric(12, 2), nullable=False) student_loan: Mapped[Decimal | None] = mapped_column(Numeric(12, 2), nullable=True) tax_code: Mapped[str | None] = mapped_column(String, nullable=True) paperless_doc_id: Mapped[int] = mapped_column(Integer, unique=True, nullable=False) raw_extraction: Mapped[dict[str, Any]] = mapped_column(JSON_TYPE, nullable=False) created_at: Mapped[datetime] = mapped_column(TIMESTAMP(timezone=True), nullable=False, server_default=text("now()")) def create_engine_from_env() -> AsyncEngine: url = os.environ["DB_CONNECTION_STRING"] return create_async_engine(url, pool_pre_ping=True) def make_session_factory(engine: AsyncEngine) -> async_sessionmaker[Any]: return async_sessionmaker(engine, expire_on_commit=False)