returns: 3 models — Shiller bootstrap (default), manual %, Wealthfolio history

Adds a "Returns model" picker on /what-if that switches how the simulator's `paths` (n_paths × n_years × 3) is built: 1. shiller (default) — current behaviour, block-bootstrap of the Shiller 1871+ historical series (or its synthetic-calibrated fallback when the CSV isn't mounted). 2. manual — every year of every path = the user's "real return %" input. Deterministic, no fan, useful for sanity checks. New helper `constant_real_return_paths` constructs the (n_paths, n_years, 3) tensor with stock=bond=real, cpi=0 so the simulator's `(1+nominal)/(1+cpi)-1` short-circuits to exactly the input. 3. wealthfolio — pulls daily_account_valuation from the wealthfolio_sync PG mirror, sums total_value + net_contribution across accounts per day (FX-adjusted), strips contribution deltas to isolate market return, compounds daily returns into per-calendar-year samples, block-bootstraps with block_size=1 (only ~6 distinct samples available, no serial-correlation signal to preserve). Glide path is a no-op in this mode — the user's actual blended portfolio is treated as a single asset. API: SimulateRequest gains `returns_mode` ("shiller"|"manual"| "wealthfolio") + `manual_real_return_pct`. simulate.py's `_build_paths` dispatches; wealthfolio mode opens a transient session against the mirror DB. UI: new Field on the form (next to Strategy / Glide path) with a contextual hint that explains each option's tradeoff. The "About the model" panel at the bottom now has a "Returns model" section mirroring the same content. The Manual % input only shows when returns_mode='manual'. 10 new tests on the Wealthfolio helper (contribution-stripping, multi-account aggregation, FX, partial-year drop, TOTAL filter, empty-input, plus 3 deterministic-paths tests). 198 backend tests + 7 frontend tests. mypy strict + ruff + tsc strict all pass. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-10 01:04:25 +00:00 · 2026-05-10 01:04:25 +00:00 · 00ec874889
commit 00ec874889
parent f2c36bc4a3
6 changed files with 515 additions and 11 deletions
--- a/fire_planner/api/schemas.py
+++ b/fire_planner/api/schemas.py
@ -226,6 +226,18 @@ class SimulateRequest(BaseModel):
    n_paths: int = Field(ge=100, le=50_000, default=5_000)
    seed: int = 42
    life_events: list[LifeEventInput] = Field(default_factory=list)
+    # Returns model — controls how `paths` (n_paths × n_years × 3) is built:
+    #   "shiller"     — block-bootstrap of Shiller 1871+ historical returns
+    #                   (or the synthetic Shiller-calibrated stream when the
+    #                   CSV isn't mounted). The default; broadest regime
+    #                   coverage including 1929/1973/2000/2008.
+    #   "manual"      — every year of every path = `manual_real_return_pct`.
+    #                   Deterministic, no fan, useful for sanity checks.
+    #   "wealthfolio" — block-bootstrap of the user's actual blended real
+    #                   returns derived from wealthfolio_sync. Reflects the
+    #                   recent regime only (~6 years). Glide path is moot.
+    returns_mode: str = Field(default="shiller", pattern="^(shiller|manual|wealthfolio)$")
+    manual_real_return_pct: Decimal | None = None


 class SimulateResult(BaseModel):
--- a/fire_planner/api/simulate.py
+++ b/fire_planner/api/simulate.py
@ -16,6 +16,7 @@ from pathlib import Path

 import numpy as np
 from fastapi import APIRouter, HTTPException
+from sqlalchemy.ext.asyncio import async_sessionmaker

 from fire_planner.api.schemas import (
    CompareRequest,
@ -25,9 +26,14 @@ from fire_planner.api.schemas import (
    SimulateResult,
 )
 from fire_planner.glide_path import get as get_glide
+from fire_planner.ingest.wealthfolio_pg import create_wf_sync_engine_from_env
 from fire_planner.life_events import EventInput, events_to_cashflow_array
 from fire_planner.returns.bootstrap import block_bootstrap
 from fire_planner.returns.shiller import load_from_csv, synthetic_returns
+from fire_planner.returns.wealthfolio_returns import (
+    compute_annual_returns_from_pg,
+    constant_real_return_paths,
+)
 from fire_planner.scenarios import build_regime_schedule, build_strategy
 from fire_planner.simulator import SimulationResult, simulate

@ -36,14 +42,51 @@ router = APIRouter(tags=["simulate"])
 _RETURNS_CSV = Path("/data/shiller_returns.csv")


-def _load_paths(seed: int, n_paths: int, n_years: int) -> np.ndarray:
+def _shiller_paths(seed: int, n_paths: int, n_years: int) -> np.ndarray:
    bundle = (load_from_csv(_RETURNS_CSV) if _RETURNS_CSV.exists() else synthetic_returns(seed=42))
    rng = np.random.default_rng(seed)
    return block_bootstrap(bundle, n_paths=n_paths, n_years=n_years, block_size=5, rng=rng)


-def _project(req: SimulateRequest) -> tuple[SimulationResult, float]:
-    paths = _load_paths(req.seed, req.n_paths, req.horizon_years)
+async def _wealthfolio_paths(seed: int, n_paths: int, n_years: int) -> np.ndarray:
+    """Block-bootstrap the user's actual blended real returns. With
+    typically <10 distinct annual samples, block_size=1 is appropriate
+    — there's no serial-correlation signal to preserve."""
+    eng = create_wf_sync_engine_from_env()
+    try:
+        factory = async_sessionmaker(eng, expire_on_commit=False)
+        async with factory() as wf_sess:
+            bundle = await compute_annual_returns_from_pg(wf_sess)
+    finally:
+        await eng.dispose()
+    rng = np.random.default_rng(seed)
+    return block_bootstrap(bundle, n_paths=n_paths, n_years=n_years, block_size=1, rng=rng)
+
+
+async def _build_paths(req: SimulateRequest) -> np.ndarray:
+    if req.returns_mode == "manual":
+        if req.manual_real_return_pct is None:
+            raise HTTPException(
+                status_code=400,
+                detail="manual_real_return_pct is required when returns_mode='manual'",
+            )
+        return constant_real_return_paths(
+            n_paths=req.n_paths,
+            n_years=req.horizon_years,
+            real_return_pct=float(req.manual_real_return_pct),
+        )
+    if req.returns_mode == "wealthfolio":
+        try:
+            return await _wealthfolio_paths(req.seed, req.n_paths, req.horizon_years)
+        except ValueError as e:
+            raise HTTPException(
+                status_code=400,
+                detail=f"Wealthfolio history insufficient: {e}",
+            ) from e
+    return _shiller_paths(req.seed, req.n_paths, req.horizon_years)
+
+
+def _project(req: SimulateRequest, paths: np.ndarray) -> tuple[SimulationResult, float]:
    annual_savings = (np.full(req.horizon_years, float(req.savings_per_year_gbp), dtype=np.float64)
                      if req.savings_per_year_gbp > 0 else None)
    floor = float(req.floor_gbp) if req.floor_gbp is not None else None
@ -120,8 +163,9 @@ def _to_response(result: SimulationResult, elapsed: float) -> SimulateResult:
@router.post("/simulate", response_model=SimulateResult)
 async def simulate_one(req: SimulateRequest) -> SimulateResult:
    """Run one scenario synchronously, no DB write. ~1-3s for 5k paths."""
+    paths = await _build_paths(req)
    try:
-        result, elapsed = await asyncio.to_thread(_project, req)
+        result, elapsed = await asyncio.to_thread(_project, req, paths)
    except KeyError as e:
        raise HTTPException(status_code=400, detail=f"Unknown name: {e}") from None
    return _to_response(result, elapsed)
@ -131,7 +175,8 @@ async def simulate_one(req: SimulateRequest) -> SimulateResult:
 async def compare_scenarios(req: CompareRequest) -> CompareResult:
    """Run 2-5 scenarios in parallel, return all results."""
    async def one(s: SimulateRequest) -> SimulateResult:
-        result, elapsed = await asyncio.to_thread(_project, s)
+        paths = await _build_paths(s)
+        result, elapsed = await asyncio.to_thread(_project, s, paths)
        return _to_response(result, elapsed)

    try:
--- a/fire_planner/returns/wealthfolio_returns.py
+++ b/fire_planner/returns/wealthfolio_returns.py
@ -0,0 +1,190 @@
+"""Build a `ReturnsBundle` from the user's actual portfolio history.
+
+Reads the `wealthfolio_sync.daily_account_valuation` PG mirror, sums
+`total_value` and `net_contribution` across accounts per day, and
+computes the user's blended nominal portfolio return year-by-year:
+
+    daily_return_t = (total_t - total_{t-1} - delta_net_contribution_t) / total_{t-1}
+    annual_return_y = product(1 + daily_return) - 1   over calendar year y
+
+The contribution-stripping is essential: a £10k deposit isn't a 5%
+return on a £200k portfolio; without it the bundle would conflate
+investment returns with savings.
+
+Real returns are approximated by feeding nominal returns plus a
+constant CPI assumption (default 3%/yr) into the simulator, which
+already deflates with `(1 + nominal) / (1 + cpi) - 1`. For typical UK
+inflation over the 2020-present window, 3% is a reasonable default;
+callers can override.
+
+Output `ReturnsBundle` sets `stock_nominal == bond_nominal == blended`
+because we don't have asset-class breakdown — Wealthfolio mode treats
+the user's actual portfolio mix as a single asset, so the simulator's
+glide-path mixing becomes a no-op (identical to picking 100/0 or 60/40
+or 0/100; all produce the same per-year return).
+
+With ~6 years of data, block_bootstrap should be called with
+block_size=1 — there's not enough sample diversity to preserve
+multi-year serial correlation.
+"""
+from __future__ import annotations
+
+from collections import defaultdict
+from datetime import date
+from decimal import Decimal
+
+import numpy as np
+import numpy.typing as npt
+from sqlalchemy import text
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from fire_planner.returns.shiller import ReturnsBundle
+
+
+async def compute_annual_returns_from_pg(
+    wf_session: AsyncSession,
+    cpi_assumption_pct: float = 0.03,
+    drop_partial_years: bool = True,
+) -> ReturnsBundle:
+    """Aggregate wealthfolio_sync to a per-year `ReturnsBundle`.
+
+    Steps:
+    1. Pull (valuation_date, total_value, net_contribution) per row.
+       Convert to base currency via fx_rate_to_base.
+    2. Aggregate per date across all accounts.
+    3. Per calendar year, compound daily nominal returns into one annual
+       return. Skip days with zero or negative starting portfolio (early
+       account-onboarding edge cases).
+    4. Drop partial years (year start or end not at Jan/Dec) when
+       `drop_partial_years=True` to avoid annualising a 5-month sample
+       into a misleading number.
+
+    Returns a ReturnsBundle with stock = bond = annual nominal returns,
+    cpi = constant `cpi_assumption_pct` for every entry.
+
+    Raises ValueError if fewer than 2 complete annual samples are
+    available — the bootstrap needs at least 2 to be meaningful.
+    """
+    rows = (await wf_session.execute(
+        text("""
+            SELECT valuation_date,
+                   total_value,
+                   net_contribution,
+                   COALESCE(fx_rate_to_base, 1.0) AS fx,
+                   account_id
+              FROM daily_account_valuation
+             WHERE account_id != 'TOTAL'
+             ORDER BY valuation_date, account_id
+        """))).all()
+
+    if not rows:
+        raise ValueError("daily_account_valuation is empty")
+
+    # Aggregate per date: sum total_value*fx and net_contribution*fx across accounts.
+    # `defaultdict[(date)] = [total_base, net_contrib_base]`
+    by_date: dict[date, list[float]] = defaultdict(lambda: [0.0, 0.0])
+    for valuation_date, total_value, net_contrib, fx, _ in rows:
+        if total_value is None:
+            continue
+        # SQLite returns dates as strings; PG returns datetime.date.
+        d_obj = (valuation_date if isinstance(valuation_date, date) else
+                 date.fromisoformat(str(valuation_date)))
+        fx_f = float(fx)
+        total_f = float(_dec(total_value)) * fx_f
+        nc_f = float(_dec(net_contrib)) * fx_f if net_contrib is not None else 0.0
+        by_date[d_obj][0] += total_f
+        by_date[d_obj][1] += nc_f
+
+    sorted_dates = sorted(by_date.keys())
+
+    # Group by calendar year. Within each year, compound daily nominal
+    # returns. A "day" without a previous-day total (the first day in
+    # the dataset) is skipped — there's no valid prior baseline.
+    annual_returns: dict[int, float] = {}
+    year_start_total: dict[int, float] = {}
+    year_end_total: dict[int, float] = {}
+    year_first_date: dict[int, date] = {}
+    year_last_date: dict[int, date] = {}
+
+    prev_total = None
+    prev_nc = None
+    prev_year = None
+    for d in sorted_dates:
+        cur_total, cur_nc = by_date[d]
+        y = d.year
+        if y != prev_year:
+            if prev_total is not None and prev_year is not None:
+                year_end_total[prev_year] = prev_total
+                year_last_date[prev_year] = sorted_dates[sorted_dates.index(d) - 1]
+            annual_returns.setdefault(y, 1.0)
+            year_start_total[y] = cur_total
+            year_first_date[y] = d
+
+        if prev_total is not None and prev_total > 0:
+            delta_nc = (cur_nc - (prev_nc or 0.0)) if prev_nc is not None else 0.0
+            day_return = (cur_total - prev_total - delta_nc) / prev_total
+            annual_returns[y] = annual_returns.get(y, 1.0) * (1 + day_return)
+
+        prev_total = cur_total
+        prev_nc = cur_nc
+        prev_year = y
+
+    # Cap the final year — its end value is the last-seen total
+    if prev_year is not None and prev_total is not None:
+        year_end_total[prev_year] = prev_total
+        year_last_date[prev_year] = sorted_dates[-1]
+
+    # Convert from cumulative-product to return %, drop partial years.
+    samples: list[tuple[int, float]] = []
+    for y, cum in sorted(annual_returns.items()):
+        if drop_partial_years:
+            first = year_first_date[y]
+            last = year_last_date.get(y)
+            # Require the year span to cover Jan and Dec — coarse but cheap
+            if first.month > 1 or (last is not None and last.month < 12):
+                continue
+        samples.append((y, cum - 1.0))
+
+    if len(samples) < 1:
+        raise ValueError(
+            "No complete years of wealthfolio data found. "
+            "Try drop_partial_years=False or wait for more data."
+        )
+
+    years = np.array([y for y, _ in samples], dtype=np.int32)
+    rets = np.array([r for _, r in samples], dtype=np.float64)
+    cpi = np.full_like(rets, cpi_assumption_pct, dtype=np.float64)
+
+    return ReturnsBundle(
+        years=years,
+        stock_nominal=rets,
+        bond_nominal=rets.copy(),
+        cpi=cpi,
+    )
+
+
+def _dec(v: object) -> Decimal:
+    """Decimal coercion that handles strings + Decimals + floats."""
+    if isinstance(v, Decimal):
+        return v
+    return Decimal(str(v))
+
+
+def constant_real_return_paths(
+    n_paths: int,
+    n_years: int,
+    real_return_pct: float,
+) -> npt.NDArray[np.float64]:
+    """Manual mode: every year of every path = `real_return_pct` real.
+
+    Builds (n_paths, n_years, 3) where the third axis is
+    (stock_nominal, bond_nominal, cpi). Setting cpi=0 and
+    nominal=real_return_pct lets the simulator's
+    `(1+nominal)/(1+cpi)-1` simplification short-circuit to exactly
+    `real_return_pct`. No randomness, no fan — every path is identical.
+    """
+    out = np.zeros((n_paths, n_years, 3), dtype=np.float64)
+    out[..., 0] = real_return_pct
+    out[..., 1] = real_return_pct
+    # cpi axis stays 0 — nominal is already real
+    return out