Initial commit: event-driven UK payslip ingest service

Extracted from /home/wizard/code monorepo into its own repo so Woodpecker CI
can watch it. Identical content to /home/wizard/code commit e426028.

See README.md for overview, env vars, and Paperless workflow config.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Viktor Barzin 2026-04-18 22:10:23 +00:00
commit 57484619c1
27 changed files with 2878 additions and 0 deletions

57
alembic/env.py Normal file
View file

@ -0,0 +1,57 @@
import asyncio
import os
from logging.config import fileConfig
from sqlalchemy.engine import Connection
from sqlalchemy.ext.asyncio import async_engine_from_config
from alembic import context
from payslip_ingest.db import SCHEMA_NAME, Base
config = context.config
if config.config_file_name is not None:
fileConfig(config.config_file_name)
db_url = os.environ.get("DB_CONNECTION_STRING")
if db_url:
config.set_main_option("sqlalchemy.url", db_url)
target_metadata = Base.metadata
def do_run_migrations(connection: Connection) -> None:
context.configure(
connection=connection,
target_metadata=target_metadata,
version_table_schema=SCHEMA_NAME,
include_schemas=True,
)
with context.begin_transaction():
context.run_migrations()
async def run_migrations_online() -> None:
configuration = config.get_section(config.config_ini_section, {})
connectable = async_engine_from_config(configuration, prefix="sqlalchemy.")
async with connectable.connect() as connection:
await connection.run_sync(do_run_migrations)
await connectable.dispose()
def run_migrations_offline() -> None:
context.configure(
url=config.get_main_option("sqlalchemy.url"),
target_metadata=target_metadata,
literal_binds=True,
version_table_schema=SCHEMA_NAME,
include_schemas=True,
dialect_opts={"paramstyle": "named"},
)
with context.begin_transaction():
context.run_migrations()
if context.is_offline_mode():
run_migrations_offline()
else:
asyncio.run(run_migrations_online())

25
alembic/script.py.mako Normal file
View file

@ -0,0 +1,25 @@
"""${message}
Revision ID: ${up_revision}
Revises: ${down_revision | comma,n}
Create Date: ${create_date}
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
${imports if imports else ""}
revision: str = ${repr(up_revision)}
down_revision: Union[str, None] = ${repr(down_revision)}
branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
def upgrade() -> None:
${upgrades if upgrades else "pass"}
def downgrade() -> None:
${downgrades if downgrades else "pass"}

View file

@ -0,0 +1,72 @@
"""initial schema
Revision ID: 0001
Revises:
Create Date: 2026-04-18 00:00:00.000000
"""
from collections.abc import Sequence
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
from alembic import op
revision: str = "0001"
down_revision: str | None = None
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
SCHEMA = "payslip_ingest"
def upgrade() -> None:
op.execute(f"CREATE SCHEMA IF NOT EXISTS {SCHEMA}")
op.create_table(
"payslip",
sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
sa.Column("paperless_doc_id", sa.Integer(), nullable=False, unique=True),
sa.Column("pay_date", sa.Date(), nullable=False),
sa.Column("pay_period_start", sa.Date(), nullable=True),
sa.Column("pay_period_end", sa.Date(), nullable=True),
sa.Column("employer", sa.Text(), nullable=True),
sa.Column("currency", sa.CHAR(3), nullable=False, server_default="GBP"),
sa.Column("gross_pay", sa.Numeric(12, 2), nullable=False),
sa.Column("income_tax", sa.Numeric(12, 2), nullable=False, server_default=sa.text("0")),
sa.Column(
"national_insurance", sa.Numeric(12, 2), nullable=False, server_default=sa.text("0")
),
sa.Column(
"pension_employee", sa.Numeric(12, 2), nullable=False, server_default=sa.text("0")
),
sa.Column(
"pension_employer", sa.Numeric(12, 2), nullable=False, server_default=sa.text("0")
),
sa.Column("student_loan", sa.Numeric(12, 2), nullable=False, server_default=sa.text("0")),
sa.Column("other_deductions", postgresql.JSONB(), nullable=True),
sa.Column("net_pay", sa.Numeric(12, 2), nullable=False),
sa.Column("tax_year", sa.Text(), nullable=False),
sa.Column("raw_extraction", postgresql.JSONB(), nullable=False),
sa.Column("validated", sa.Boolean(), nullable=False, server_default=sa.text("true")),
sa.Column(
"created_at",
sa.TIMESTAMP(timezone=True),
nullable=False,
server_default=sa.text("now()"),
),
schema=SCHEMA,
)
op.create_index(
"idx_payslip_pay_date", "payslip", ["pay_date"], schema=SCHEMA
)
op.create_index(
"idx_payslip_tax_year", "payslip", ["tax_year"], schema=SCHEMA
)
def downgrade() -> None:
op.drop_index("idx_payslip_tax_year", table_name="payslip", schema=SCHEMA)
op.drop_index("idx_payslip_pay_date", table_name="payslip", schema=SCHEMA)
op.drop_table("payslip", schema=SCHEMA)
op.execute(f"DROP SCHEMA IF EXISTS {SCHEMA}")