64 lines
2.2 KiB
Python
64 lines
2.2 KiB
Python
|
|
"""Add p60_reference table for HMRC annual ground-truth reconciliation.
|
||
|
|
|
||
|
|
P60 is the authoritative end-of-year certificate HMRC issues; its figures
|
||
|
|
match what HMRC has on file. Storing one row per (tax_year, employer) lets
|
||
|
|
the dashboard compare `SUM(payslip)` against the P60 totals and surface
|
||
|
|
missing-month gaps or parser drift.
|
||
|
|
|
||
|
|
Columns mirror what the P60 explicitly prints; everything derived (effective
|
||
|
|
rate, deltas) stays in the dashboard SQL. `paperless_doc_id` is unique so
|
||
|
|
re-uploading the same PDF is idempotent. `raw_extraction` keeps the full
|
||
|
|
parsed dict for debugging parser regressions.
|
||
|
|
"""
|
||
|
|
import sqlalchemy as sa
|
||
|
|
from sqlalchemy.dialects import postgresql
|
||
|
|
|
||
|
|
from alembic import op
|
||
|
|
|
||
|
|
revision = "0005"
|
||
|
|
down_revision = "0004"
|
||
|
|
branch_labels = None
|
||
|
|
depends_on = None
|
||
|
|
|
||
|
|
SCHEMA = "payslip_ingest"
|
||
|
|
|
||
|
|
|
||
|
|
def upgrade() -> None:
|
||
|
|
op.create_table(
|
||
|
|
"p60_reference",
|
||
|
|
sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
|
||
|
|
sa.Column("tax_year", sa.String(), nullable=False),
|
||
|
|
sa.Column("employer", sa.String(), nullable=False),
|
||
|
|
sa.Column("employer_paye_ref", sa.String(), nullable=True),
|
||
|
|
sa.Column("gross_pay", sa.Numeric(12, 2), nullable=False),
|
||
|
|
sa.Column("income_tax", sa.Numeric(12, 2), nullable=False),
|
||
|
|
sa.Column("national_insurance", sa.Numeric(12, 2), nullable=False),
|
||
|
|
sa.Column("student_loan", sa.Numeric(12, 2), nullable=True),
|
||
|
|
sa.Column("tax_code", sa.String(), nullable=True),
|
||
|
|
sa.Column("paperless_doc_id", sa.Integer(), nullable=False, unique=True),
|
||
|
|
sa.Column(
|
||
|
|
"raw_extraction",
|
||
|
|
postgresql.JSONB().with_variant(sa.JSON(), "sqlite"),
|
||
|
|
nullable=False,
|
||
|
|
),
|
||
|
|
sa.Column(
|
||
|
|
"created_at",
|
||
|
|
sa.TIMESTAMP(timezone=True),
|
||
|
|
nullable=False,
|
||
|
|
server_default=sa.text("now()"),
|
||
|
|
),
|
||
|
|
sa.UniqueConstraint("tax_year", "employer", name="uq_p60_tax_year_employer"),
|
||
|
|
schema=SCHEMA,
|
||
|
|
)
|
||
|
|
op.create_index(
|
||
|
|
"ix_p60_reference_tax_year",
|
||
|
|
"p60_reference",
|
||
|
|
["tax_year"],
|
||
|
|
schema=SCHEMA,
|
||
|
|
)
|
||
|
|
|
||
|
|
|
||
|
|
def downgrade() -> None:
|
||
|
|
op.drop_index("ix_p60_reference_tax_year", table_name="p60_reference", schema=SCHEMA)
|
||
|
|
op.drop_table("p60_reference", schema=SCHEMA)
|