Initial extraction from monorepo
This commit is contained in:
commit
f7ef7ca4ab
56 changed files with 6163 additions and 0 deletions
1
fire_planner/ingest/__init__.py
Normal file
1
fire_planner/ingest/__init__.py
Normal file
|
|
@ -0,0 +1 @@
|
|||
"""Ingest layers — Wealthfolio, payslip-ingest, hmrc-sync."""
|
||||
25
fire_planner/ingest/hmrc.py
Normal file
25
fire_planner/ingest/hmrc.py
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
"""HMRC sync read-only consumer (placeholder).
|
||||
|
||||
`hmrc-sync` is in flight (per project memory id=1106) — prod credentials
|
||||
hadn't landed at the time of writing fire-planner. When they do, this
|
||||
module reads `hmrc_sync.income_record` (or whatever the final schema is)
|
||||
to corroborate payslip-derived income and tax against HMRC ground truth.
|
||||
|
||||
For v1 this is a stub. The CLI's `ingest --source=hmrc` command exits
|
||||
0 with a `pending` log line.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class HmrcStatus:
|
||||
available: bool
|
||||
note: str
|
||||
|
||||
|
||||
def status() -> HmrcStatus:
|
||||
"""Return whether the HMRC sync data is available. v1 always
|
||||
reports `pending`."""
|
||||
return HmrcStatus(available=False, note="hmrc-sync prod creds pending — see memory id=1106")
|
||||
77
fire_planner/ingest/payslip.py
Normal file
77
fire_planner/ingest/payslip.py
Normal file
|
|
@ -0,0 +1,77 @@
|
|||
"""Read the deployed payslip-ingest schema for income + RSU vest cadence.
|
||||
|
||||
Read-only: we never write to `payslip_ingest.*`. The DB role
|
||||
`pg-fire-planner` only needs SELECT on payslip_ingest.payslip and
|
||||
payslip_ingest.rsu_vest_events.
|
||||
|
||||
Outputs feed scenario calibration:
|
||||
- savings_per_year_gbp: median monthly net_pay × 12 less the £100k
|
||||
baseline spend (the planner allocates the surplus to portfolio).
|
||||
- annual_rsu_gross_gbp: median annual RSU vest value, used to validate
|
||||
the savings rate against expected gross compensation.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from datetime import date
|
||||
from decimal import Decimal
|
||||
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class IncomeSummary:
|
||||
median_monthly_net_pay_gbp: Decimal
|
||||
median_annual_rsu_gbp: Decimal
|
||||
earliest_date: date | None
|
||||
latest_date: date | None
|
||||
payslip_count: int
|
||||
rsu_count: int
|
||||
|
||||
|
||||
async def read_income_summary(session: AsyncSession, months: int = 24) -> IncomeSummary:
|
||||
"""Aggregate the most-recent `months` of payslips + RSU vests."""
|
||||
payslip_rows = (await session.execute(
|
||||
text(
|
||||
"""
|
||||
SELECT pay_date, net_pay
|
||||
FROM payslip_ingest.payslip
|
||||
WHERE pay_date >= CURRENT_DATE - (:months || ' months')::interval
|
||||
ORDER BY pay_date DESC
|
||||
""", ),
|
||||
{"months": months},
|
||||
)).all()
|
||||
|
||||
rsu_rows = (await session.execute(
|
||||
text(
|
||||
"""
|
||||
SELECT vest_date, gross_value_gbp
|
||||
FROM payslip_ingest.rsu_vest_events
|
||||
WHERE vest_date >= CURRENT_DATE - (:months || ' months')::interval
|
||||
ORDER BY vest_date DESC
|
||||
""", ),
|
||||
{"months": months},
|
||||
)).all()
|
||||
|
||||
monthly_nets = sorted(Decimal(str(r[1] or 0)) for r in payslip_rows)
|
||||
median_monthly_net = (monthly_nets[len(monthly_nets) // 2] if monthly_nets else Decimal("0"))
|
||||
|
||||
rsu_total_gbp = sum((Decimal(str(r[1] or 0)) for r in rsu_rows), start=Decimal("0"))
|
||||
months_span = max(1, months)
|
||||
annual_rsu = rsu_total_gbp * 12 / months_span
|
||||
|
||||
pay_dates = [r[0] for r in payslip_rows]
|
||||
rsu_dates = [r[0] for r in rsu_rows]
|
||||
all_dates = pay_dates + rsu_dates
|
||||
earliest = min(all_dates) if all_dates else None
|
||||
latest = max(all_dates) if all_dates else None
|
||||
|
||||
return IncomeSummary(
|
||||
median_monthly_net_pay_gbp=median_monthly_net,
|
||||
median_annual_rsu_gbp=annual_rsu,
|
||||
earliest_date=earliest,
|
||||
latest_date=latest,
|
||||
payslip_count=len(payslip_rows),
|
||||
rsu_count=len(rsu_rows),
|
||||
)
|
||||
126
fire_planner/ingest/wealthfolio.py
Normal file
126
fire_planner/ingest/wealthfolio.py
Normal file
|
|
@ -0,0 +1,126 @@
|
|||
"""Wealthfolio ingest — kubectl exec into the wealthfolio pod, read the
|
||||
SQLite DB read-only, parse account snapshots, upsert into
|
||||
`fire_planner.account_snapshot`.
|
||||
|
||||
Wealthfolio stores every account's NW + holdings in
|
||||
`/data/app.db` (SQLite). The published schema (post-2025) keeps a
|
||||
`holdings_snapshot` table per (account_id, date). For the planner we
|
||||
fold to total NW per account per day.
|
||||
|
||||
Phase 0 prerequisite: `wealthfolio-sync` must record a snapshot for
|
||||
every active account every day. Until that lands the Schwab and
|
||||
InvestEngine accounts read as stale snapshots from years ago and the
|
||||
planner anchors on £154k instead of the real ~£1M. See
|
||||
`fire-planner/README.md` and the parent CLAUDE.md project memory.
|
||||
|
||||
This module does NOT shell out to kubectl — that's the operator's job.
|
||||
Instead, callers pass an already-fetched local SQLite file path
|
||||
(typically `/tmp/wealthfolio.db`). The CLI wraps the kubectl exec.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import sqlite3
|
||||
from datetime import date
|
||||
from decimal import Decimal
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy.dialects.postgresql import insert as pg_insert
|
||||
from sqlalchemy.dialects.sqlite import insert as sqlite_insert
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from fire_planner.db import AccountSnapshot
|
||||
|
||||
|
||||
def _dialect_insert(session: AsyncSession) -> Any:
|
||||
bind = session.get_bind()
|
||||
if bind.dialect.name == "sqlite":
|
||||
return sqlite_insert
|
||||
return pg_insert
|
||||
|
||||
|
||||
def read_account_snapshots(db_path: str | Path, as_of: date | None = None) -> list[dict[str, Any]]:
|
||||
"""Read the latest snapshot row per account.
|
||||
|
||||
Returns a list of dicts ready for upsert into `account_snapshot`.
|
||||
Each dict has: external_id, snapshot_date, account_id, account_name,
|
||||
account_type, currency, market_value, market_value_gbp.
|
||||
"""
|
||||
db_path = Path(db_path)
|
||||
if not db_path.exists():
|
||||
raise FileNotFoundError(f"Wealthfolio sqlite db not found: {db_path}")
|
||||
conn = sqlite3.connect(f"file:{db_path}?mode=ro", uri=True)
|
||||
conn.row_factory = sqlite3.Row
|
||||
try:
|
||||
rows = list(_query_snapshots(conn, as_of))
|
||||
finally:
|
||||
conn.close()
|
||||
return rows
|
||||
|
||||
|
||||
def _query_snapshots(conn: sqlite3.Connection, as_of: date | None) -> list[dict[str, Any]]:
|
||||
"""Wealthfolio's actual schema is opaque (different versions ship
|
||||
different tables). We try the v1 layout first (`accounts` +
|
||||
`holdings_snapshot`); if that fails, return empty and let the CLI
|
||||
surface the error to the operator.
|
||||
"""
|
||||
cur = conn.cursor()
|
||||
try:
|
||||
if as_of is None:
|
||||
cur.execute("SELECT MAX(snapshot_date) FROM holdings_snapshot", )
|
||||
row = cur.fetchone()
|
||||
as_of_str = row[0] if row and row[0] else date.today().isoformat()
|
||||
else:
|
||||
as_of_str = as_of.isoformat()
|
||||
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT a.id AS account_id,
|
||||
a.name AS account_name,
|
||||
a.type AS account_type,
|
||||
a.currency AS currency,
|
||||
SUM(h.market_value) AS market_value,
|
||||
SUM(h.market_value_gbp) AS market_value_gbp,
|
||||
? AS snapshot_date
|
||||
FROM holdings_snapshot h
|
||||
JOIN accounts a ON a.id = h.account_id
|
||||
WHERE h.snapshot_date = ?
|
||||
GROUP BY a.id
|
||||
""",
|
||||
(as_of_str, as_of_str),
|
||||
)
|
||||
except sqlite3.OperationalError:
|
||||
# Fallback: empty list — the operator should run wealthfolio-sync
|
||||
# to populate snapshots and try again.
|
||||
return []
|
||||
rows = []
|
||||
for row in cur.fetchall():
|
||||
snap_date = date.fromisoformat(row["snapshot_date"])
|
||||
rows.append({
|
||||
"external_id": f"wealthfolio:{row['account_id']}:{row['snapshot_date']}",
|
||||
"snapshot_date": snap_date,
|
||||
"account_id": str(row["account_id"]),
|
||||
"account_name": row["account_name"] or "",
|
||||
"account_type": row["account_type"] or "unknown",
|
||||
"currency": row["currency"] or "GBP",
|
||||
"market_value": Decimal(str(row["market_value"] or 0)),
|
||||
"market_value_gbp": Decimal(str(row["market_value_gbp"] or 0)),
|
||||
})
|
||||
return rows
|
||||
|
||||
|
||||
async def upsert_snapshots(session: AsyncSession, rows: list[dict[str, Any]]) -> int:
|
||||
if not rows:
|
||||
return 0
|
||||
insert_ = _dialect_insert(session)
|
||||
stmt = insert_(AccountSnapshot).values(rows)
|
||||
update_cols = {
|
||||
"market_value": stmt.excluded.market_value,
|
||||
"market_value_gbp": stmt.excluded.market_value_gbp,
|
||||
"snapshot_date": stmt.excluded.snapshot_date,
|
||||
"account_name": stmt.excluded.account_name,
|
||||
"account_type": stmt.excluded.account_type,
|
||||
}
|
||||
stmt = stmt.on_conflict_do_update(index_elements=["external_id"], set_=update_cols)
|
||||
await session.execute(stmt)
|
||||
return len(rows)
|
||||
Loading…
Add table
Add a link
Reference in a new issue