Initial extraction from monorepo

2026-05-07 17:06:19 +00:00 · 2026-05-07 17:06:19 +00:00 · f7ef7ca4ab
commit f7ef7ca4ab
56 changed files with 6163 additions and 0 deletions
--- a/fire_planner/ingest/init.py
+++ b/fire_planner/ingest/init.py
@ -0,0 +1 @@
+"""Ingest layers — Wealthfolio, payslip-ingest, hmrc-sync."""
--- a/fire_planner/ingest/hmrc.py
+++ b/fire_planner/ingest/hmrc.py
@ -0,0 +1,25 @@
+"""HMRC sync read-only consumer (placeholder).
+
+`hmrc-sync` is in flight (per project memory id=1106) — prod credentials
+hadn't landed at the time of writing fire-planner. When they do, this
+module reads `hmrc_sync.income_record` (or whatever the final schema is)
+to corroborate payslip-derived income and tax against HMRC ground truth.
+
+For v1 this is a stub. The CLI's `ingest --source=hmrc` command exits
+0 with a `pending` log line.
+"""
+from __future__ import annotations
+
+from dataclasses import dataclass
+
+
+@dataclass(frozen=True)
+class HmrcStatus:
+    available: bool
+    note: str
+
+
+def status() -> HmrcStatus:
+    """Return whether the HMRC sync data is available. v1 always
+    reports `pending`."""
+    return HmrcStatus(available=False, note="hmrc-sync prod creds pending — see memory id=1106")
--- a/fire_planner/ingest/payslip.py
+++ b/fire_planner/ingest/payslip.py
@ -0,0 +1,77 @@
+"""Read the deployed payslip-ingest schema for income + RSU vest cadence.
+
+Read-only: we never write to `payslip_ingest.*`. The DB role
+`pg-fire-planner` only needs SELECT on payslip_ingest.payslip and
+payslip_ingest.rsu_vest_events.
+
+Outputs feed scenario calibration:
+- savings_per_year_gbp: median monthly net_pay × 12 less the £100k
+  baseline spend (the planner allocates the surplus to portfolio).
+- annual_rsu_gross_gbp: median annual RSU vest value, used to validate
+  the savings rate against expected gross compensation.
+"""
+from __future__ import annotations
+
+from dataclasses import dataclass
+from datetime import date
+from decimal import Decimal
+
+from sqlalchemy import text
+from sqlalchemy.ext.asyncio import AsyncSession
+
+
+@dataclass(frozen=True)
+class IncomeSummary:
+    median_monthly_net_pay_gbp: Decimal
+    median_annual_rsu_gbp: Decimal
+    earliest_date: date | None
+    latest_date: date | None
+    payslip_count: int
+    rsu_count: int
+
+
+async def read_income_summary(session: AsyncSession, months: int = 24) -> IncomeSummary:
+    """Aggregate the most-recent `months` of payslips + RSU vests."""
+    payslip_rows = (await session.execute(
+        text(
+            """
+            SELECT pay_date, net_pay
+              FROM payslip_ingest.payslip
+             WHERE pay_date >= CURRENT_DATE - (:months || ' months')::interval
+             ORDER BY pay_date DESC
+            """, ),
+        {"months": months},
+    )).all()
+
+    rsu_rows = (await session.execute(
+        text(
+            """
+            SELECT vest_date, gross_value_gbp
+              FROM payslip_ingest.rsu_vest_events
+             WHERE vest_date >= CURRENT_DATE - (:months || ' months')::interval
+             ORDER BY vest_date DESC
+            """, ),
+        {"months": months},
+    )).all()
+
+    monthly_nets = sorted(Decimal(str(r[1] or 0)) for r in payslip_rows)
+    median_monthly_net = (monthly_nets[len(monthly_nets) // 2] if monthly_nets else Decimal("0"))
+
+    rsu_total_gbp = sum((Decimal(str(r[1] or 0)) for r in rsu_rows), start=Decimal("0"))
+    months_span = max(1, months)
+    annual_rsu = rsu_total_gbp * 12 / months_span
+
+    pay_dates = [r[0] for r in payslip_rows]
+    rsu_dates = [r[0] for r in rsu_rows]
+    all_dates = pay_dates + rsu_dates
+    earliest = min(all_dates) if all_dates else None
+    latest = max(all_dates) if all_dates else None
+
+    return IncomeSummary(
+        median_monthly_net_pay_gbp=median_monthly_net,
+        median_annual_rsu_gbp=annual_rsu,
+        earliest_date=earliest,
+        latest_date=latest,
+        payslip_count=len(payslip_rows),
+        rsu_count=len(rsu_rows),
+    )
--- a/fire_planner/ingest/wealthfolio.py
+++ b/fire_planner/ingest/wealthfolio.py
@ -0,0 +1,126 @@
+"""Wealthfolio ingest — kubectl exec into the wealthfolio pod, read the
+SQLite DB read-only, parse account snapshots, upsert into
+`fire_planner.account_snapshot`.
+
+Wealthfolio stores every account's NW + holdings in
+`/data/app.db` (SQLite). The published schema (post-2025) keeps a
+`holdings_snapshot` table per (account_id, date). For the planner we
+fold to total NW per account per day.
+
+Phase 0 prerequisite: `wealthfolio-sync` must record a snapshot for
+every active account every day. Until that lands the Schwab and
+InvestEngine accounts read as stale snapshots from years ago and the
+planner anchors on £154k instead of the real ~£1M. See
+`fire-planner/README.md` and the parent CLAUDE.md project memory.
+
+This module does NOT shell out to kubectl — that's the operator's job.
+Instead, callers pass an already-fetched local SQLite file path
+(typically `/tmp/wealthfolio.db`). The CLI wraps the kubectl exec.
+"""
+from __future__ import annotations
+
+import sqlite3
+from datetime import date
+from decimal import Decimal
+from pathlib import Path
+from typing import Any
+
+from sqlalchemy.dialects.postgresql import insert as pg_insert
+from sqlalchemy.dialects.sqlite import insert as sqlite_insert
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from fire_planner.db import AccountSnapshot
+
+
+def _dialect_insert(session: AsyncSession) -> Any:
+    bind = session.get_bind()
+    if bind.dialect.name == "sqlite":
+        return sqlite_insert
+    return pg_insert
+
+
+def read_account_snapshots(db_path: str | Path, as_of: date | None = None) -> list[dict[str, Any]]:
+    """Read the latest snapshot row per account.
+
+    Returns a list of dicts ready for upsert into `account_snapshot`.
+    Each dict has: external_id, snapshot_date, account_id, account_name,
+    account_type, currency, market_value, market_value_gbp.
+    """
+    db_path = Path(db_path)
+    if not db_path.exists():
+        raise FileNotFoundError(f"Wealthfolio sqlite db not found: {db_path}")
+    conn = sqlite3.connect(f"file:{db_path}?mode=ro", uri=True)
+    conn.row_factory = sqlite3.Row
+    try:
+        rows = list(_query_snapshots(conn, as_of))
+    finally:
+        conn.close()
+    return rows
+
+
+def _query_snapshots(conn: sqlite3.Connection, as_of: date | None) -> list[dict[str, Any]]:
+    """Wealthfolio's actual schema is opaque (different versions ship
+    different tables). We try the v1 layout first (`accounts` +
+    `holdings_snapshot`); if that fails, return empty and let the CLI
+    surface the error to the operator.
+    """
+    cur = conn.cursor()
+    try:
+        if as_of is None:
+            cur.execute("SELECT MAX(snapshot_date) FROM holdings_snapshot", )
+            row = cur.fetchone()
+            as_of_str = row[0] if row and row[0] else date.today().isoformat()
+        else:
+            as_of_str = as_of.isoformat()
+
+        cur.execute(
+            """
+            SELECT a.id          AS account_id,
+                   a.name        AS account_name,
+                   a.type        AS account_type,
+                   a.currency    AS currency,
+                   SUM(h.market_value)        AS market_value,
+                   SUM(h.market_value_gbp)    AS market_value_gbp,
+                   ?             AS snapshot_date
+              FROM holdings_snapshot h
+              JOIN accounts a ON a.id = h.account_id
+             WHERE h.snapshot_date = ?
+             GROUP BY a.id
+            """,
+            (as_of_str, as_of_str),
+        )
+    except sqlite3.OperationalError:
+        # Fallback: empty list — the operator should run wealthfolio-sync
+        # to populate snapshots and try again.
+        return []
+    rows = []
+    for row in cur.fetchall():
+        snap_date = date.fromisoformat(row["snapshot_date"])
+        rows.append({
+            "external_id": f"wealthfolio:{row['account_id']}:{row['snapshot_date']}",
+            "snapshot_date": snap_date,
+            "account_id": str(row["account_id"]),
+            "account_name": row["account_name"] or "",
+            "account_type": row["account_type"] or "unknown",
+            "currency": row["currency"] or "GBP",
+            "market_value": Decimal(str(row["market_value"] or 0)),
+            "market_value_gbp": Decimal(str(row["market_value_gbp"] or 0)),
+        })
+    return rows
+
+
+async def upsert_snapshots(session: AsyncSession, rows: list[dict[str, Any]]) -> int:
+    if not rows:
+        return 0
+    insert_ = _dialect_insert(session)
+    stmt = insert_(AccountSnapshot).values(rows)
+    update_cols = {
+        "market_value": stmt.excluded.market_value,
+        "market_value_gbp": stmt.excluded.market_value_gbp,
+        "snapshot_date": stmt.excluded.snapshot_date,
+        "account_name": stmt.excluded.account_name,
+        "account_type": stmt.excluded.account_type,
+    }
+    stmt = stmt.on_conflict_do_update(index_elements=["external_id"], set_=update_cols)
+    await session.execute(stmt)
+    return len(rows)
				`@ -0,0 +1 @@`
				`"""Ingest layers — Wealthfolio, payslip-ingest, hmrc-sync."""`