returns: 3 models — Shiller bootstrap (default), manual %, Wealthfolio history
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful

Adds a "Returns model" picker on /what-if that switches how the
simulator's `paths` (n_paths × n_years × 3) is built:

1. shiller (default) — current behaviour, block-bootstrap of the
   Shiller 1871+ historical series (or its synthetic-calibrated
   fallback when the CSV isn't mounted).

2. manual — every year of every path = the user's "real return %"
   input. Deterministic, no fan, useful for sanity checks. New
   helper `constant_real_return_paths` constructs the (n_paths,
   n_years, 3) tensor with stock=bond=real, cpi=0 so the simulator's
   `(1+nominal)/(1+cpi)-1` short-circuits to exactly the input.

3. wealthfolio — pulls daily_account_valuation from the wealthfolio_sync
   PG mirror, sums total_value + net_contribution across accounts per
   day (FX-adjusted), strips contribution deltas to isolate market
   return, compounds daily returns into per-calendar-year samples,
   block-bootstraps with block_size=1 (only ~6 distinct samples
   available, no serial-correlation signal to preserve). Glide path
   is a no-op in this mode — the user's actual blended portfolio is
   treated as a single asset.

API: SimulateRequest gains `returns_mode` ("shiller"|"manual"|
"wealthfolio") + `manual_real_return_pct`. simulate.py's `_build_paths`
dispatches; wealthfolio mode opens a transient session against the
mirror DB.

UI: new Field on the form (next to Strategy / Glide path) with a
contextual hint that explains each option's tradeoff. The "About the
model" panel at the bottom now has a "Returns model" section
mirroring the same content. The Manual % input only shows when
returns_mode='manual'.

10 new tests on the Wealthfolio helper (contribution-stripping,
multi-account aggregation, FX, partial-year drop, TOTAL filter,
empty-input, plus 3 deterministic-paths tests). 198 backend tests +
7 frontend tests. mypy strict + ruff + tsc strict all pass.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Viktor Barzin 2026-05-10 01:04:25 +00:00
parent f2c36bc4a3
commit 00ec874889
6 changed files with 515 additions and 11 deletions

View file

@ -226,6 +226,18 @@ class SimulateRequest(BaseModel):
n_paths: int = Field(ge=100, le=50_000, default=5_000)
seed: int = 42
life_events: list[LifeEventInput] = Field(default_factory=list)
# Returns model — controls how `paths` (n_paths × n_years × 3) is built:
# "shiller" — block-bootstrap of Shiller 1871+ historical returns
# (or the synthetic Shiller-calibrated stream when the
# CSV isn't mounted). The default; broadest regime
# coverage including 1929/1973/2000/2008.
# "manual" — every year of every path = `manual_real_return_pct`.
# Deterministic, no fan, useful for sanity checks.
# "wealthfolio" — block-bootstrap of the user's actual blended real
# returns derived from wealthfolio_sync. Reflects the
# recent regime only (~6 years). Glide path is moot.
returns_mode: str = Field(default="shiller", pattern="^(shiller|manual|wealthfolio)$")
manual_real_return_pct: Decimal | None = None
class SimulateResult(BaseModel):

View file

@ -16,6 +16,7 @@ from pathlib import Path
import numpy as np
from fastapi import APIRouter, HTTPException
from sqlalchemy.ext.asyncio import async_sessionmaker
from fire_planner.api.schemas import (
CompareRequest,
@ -25,9 +26,14 @@ from fire_planner.api.schemas import (
SimulateResult,
)
from fire_planner.glide_path import get as get_glide
from fire_planner.ingest.wealthfolio_pg import create_wf_sync_engine_from_env
from fire_planner.life_events import EventInput, events_to_cashflow_array
from fire_planner.returns.bootstrap import block_bootstrap
from fire_planner.returns.shiller import load_from_csv, synthetic_returns
from fire_planner.returns.wealthfolio_returns import (
compute_annual_returns_from_pg,
constant_real_return_paths,
)
from fire_planner.scenarios import build_regime_schedule, build_strategy
from fire_planner.simulator import SimulationResult, simulate
@ -36,14 +42,51 @@ router = APIRouter(tags=["simulate"])
_RETURNS_CSV = Path("/data/shiller_returns.csv")
def _load_paths(seed: int, n_paths: int, n_years: int) -> np.ndarray:
def _shiller_paths(seed: int, n_paths: int, n_years: int) -> np.ndarray:
bundle = (load_from_csv(_RETURNS_CSV) if _RETURNS_CSV.exists() else synthetic_returns(seed=42))
rng = np.random.default_rng(seed)
return block_bootstrap(bundle, n_paths=n_paths, n_years=n_years, block_size=5, rng=rng)
def _project(req: SimulateRequest) -> tuple[SimulationResult, float]:
paths = _load_paths(req.seed, req.n_paths, req.horizon_years)
async def _wealthfolio_paths(seed: int, n_paths: int, n_years: int) -> np.ndarray:
"""Block-bootstrap the user's actual blended real returns. With
typically <10 distinct annual samples, block_size=1 is appropriate
there's no serial-correlation signal to preserve."""
eng = create_wf_sync_engine_from_env()
try:
factory = async_sessionmaker(eng, expire_on_commit=False)
async with factory() as wf_sess:
bundle = await compute_annual_returns_from_pg(wf_sess)
finally:
await eng.dispose()
rng = np.random.default_rng(seed)
return block_bootstrap(bundle, n_paths=n_paths, n_years=n_years, block_size=1, rng=rng)
async def _build_paths(req: SimulateRequest) -> np.ndarray:
if req.returns_mode == "manual":
if req.manual_real_return_pct is None:
raise HTTPException(
status_code=400,
detail="manual_real_return_pct is required when returns_mode='manual'",
)
return constant_real_return_paths(
n_paths=req.n_paths,
n_years=req.horizon_years,
real_return_pct=float(req.manual_real_return_pct),
)
if req.returns_mode == "wealthfolio":
try:
return await _wealthfolio_paths(req.seed, req.n_paths, req.horizon_years)
except ValueError as e:
raise HTTPException(
status_code=400,
detail=f"Wealthfolio history insufficient: {e}",
) from e
return _shiller_paths(req.seed, req.n_paths, req.horizon_years)
def _project(req: SimulateRequest, paths: np.ndarray) -> tuple[SimulationResult, float]:
annual_savings = (np.full(req.horizon_years, float(req.savings_per_year_gbp), dtype=np.float64)
if req.savings_per_year_gbp > 0 else None)
floor = float(req.floor_gbp) if req.floor_gbp is not None else None
@ -120,8 +163,9 @@ def _to_response(result: SimulationResult, elapsed: float) -> SimulateResult:
@router.post("/simulate", response_model=SimulateResult)
async def simulate_one(req: SimulateRequest) -> SimulateResult:
"""Run one scenario synchronously, no DB write. ~1-3s for 5k paths."""
paths = await _build_paths(req)
try:
result, elapsed = await asyncio.to_thread(_project, req)
result, elapsed = await asyncio.to_thread(_project, req, paths)
except KeyError as e:
raise HTTPException(status_code=400, detail=f"Unknown name: {e}") from None
return _to_response(result, elapsed)
@ -131,7 +175,8 @@ async def simulate_one(req: SimulateRequest) -> SimulateResult:
async def compare_scenarios(req: CompareRequest) -> CompareResult:
"""Run 2-5 scenarios in parallel, return all results."""
async def one(s: SimulateRequest) -> SimulateResult:
result, elapsed = await asyncio.to_thread(_project, s)
paths = await _build_paths(s)
result, elapsed = await asyncio.to_thread(_project, s, paths)
return _to_response(result, elapsed)
try:

View file

@ -0,0 +1,190 @@
"""Build a `ReturnsBundle` from the user's actual portfolio history.
Reads the `wealthfolio_sync.daily_account_valuation` PG mirror, sums
`total_value` and `net_contribution` across accounts per day, and
computes the user's blended nominal portfolio return year-by-year:
daily_return_t = (total_t - total_{t-1} - delta_net_contribution_t) / total_{t-1}
annual_return_y = product(1 + daily_return) - 1 over calendar year y
The contribution-stripping is essential: a £10k deposit isn't a 5%
return on a £200k portfolio; without it the bundle would conflate
investment returns with savings.
Real returns are approximated by feeding nominal returns plus a
constant CPI assumption (default 3%/yr) into the simulator, which
already deflates with `(1 + nominal) / (1 + cpi) - 1`. For typical UK
inflation over the 2020-present window, 3% is a reasonable default;
callers can override.
Output `ReturnsBundle` sets `stock_nominal == bond_nominal == blended`
because we don't have asset-class breakdown — Wealthfolio mode treats
the user's actual portfolio mix as a single asset, so the simulator's
glide-path mixing becomes a no-op (identical to picking 100/0 or 60/40
or 0/100; all produce the same per-year return).
With ~6 years of data, block_bootstrap should be called with
block_size=1 there's not enough sample diversity to preserve
multi-year serial correlation.
"""
from __future__ import annotations
from collections import defaultdict
from datetime import date
from decimal import Decimal
import numpy as np
import numpy.typing as npt
from sqlalchemy import text
from sqlalchemy.ext.asyncio import AsyncSession
from fire_planner.returns.shiller import ReturnsBundle
async def compute_annual_returns_from_pg(
wf_session: AsyncSession,
cpi_assumption_pct: float = 0.03,
drop_partial_years: bool = True,
) -> ReturnsBundle:
"""Aggregate wealthfolio_sync to a per-year `ReturnsBundle`.
Steps:
1. Pull (valuation_date, total_value, net_contribution) per row.
Convert to base currency via fx_rate_to_base.
2. Aggregate per date across all accounts.
3. Per calendar year, compound daily nominal returns into one annual
return. Skip days with zero or negative starting portfolio (early
account-onboarding edge cases).
4. Drop partial years (year start or end not at Jan/Dec) when
`drop_partial_years=True` to avoid annualising a 5-month sample
into a misleading number.
Returns a ReturnsBundle with stock = bond = annual nominal returns,
cpi = constant `cpi_assumption_pct` for every entry.
Raises ValueError if fewer than 2 complete annual samples are
available the bootstrap needs at least 2 to be meaningful.
"""
rows = (await wf_session.execute(
text("""
SELECT valuation_date,
total_value,
net_contribution,
COALESCE(fx_rate_to_base, 1.0) AS fx,
account_id
FROM daily_account_valuation
WHERE account_id != 'TOTAL'
ORDER BY valuation_date, account_id
"""))).all()
if not rows:
raise ValueError("daily_account_valuation is empty")
# Aggregate per date: sum total_value*fx and net_contribution*fx across accounts.
# `defaultdict[(date)] = [total_base, net_contrib_base]`
by_date: dict[date, list[float]] = defaultdict(lambda: [0.0, 0.0])
for valuation_date, total_value, net_contrib, fx, _ in rows:
if total_value is None:
continue
# SQLite returns dates as strings; PG returns datetime.date.
d_obj = (valuation_date if isinstance(valuation_date, date) else
date.fromisoformat(str(valuation_date)))
fx_f = float(fx)
total_f = float(_dec(total_value)) * fx_f
nc_f = float(_dec(net_contrib)) * fx_f if net_contrib is not None else 0.0
by_date[d_obj][0] += total_f
by_date[d_obj][1] += nc_f
sorted_dates = sorted(by_date.keys())
# Group by calendar year. Within each year, compound daily nominal
# returns. A "day" without a previous-day total (the first day in
# the dataset) is skipped — there's no valid prior baseline.
annual_returns: dict[int, float] = {}
year_start_total: dict[int, float] = {}
year_end_total: dict[int, float] = {}
year_first_date: dict[int, date] = {}
year_last_date: dict[int, date] = {}
prev_total = None
prev_nc = None
prev_year = None
for d in sorted_dates:
cur_total, cur_nc = by_date[d]
y = d.year
if y != prev_year:
if prev_total is not None and prev_year is not None:
year_end_total[prev_year] = prev_total
year_last_date[prev_year] = sorted_dates[sorted_dates.index(d) - 1]
annual_returns.setdefault(y, 1.0)
year_start_total[y] = cur_total
year_first_date[y] = d
if prev_total is not None and prev_total > 0:
delta_nc = (cur_nc - (prev_nc or 0.0)) if prev_nc is not None else 0.0
day_return = (cur_total - prev_total - delta_nc) / prev_total
annual_returns[y] = annual_returns.get(y, 1.0) * (1 + day_return)
prev_total = cur_total
prev_nc = cur_nc
prev_year = y
# Cap the final year — its end value is the last-seen total
if prev_year is not None and prev_total is not None:
year_end_total[prev_year] = prev_total
year_last_date[prev_year] = sorted_dates[-1]
# Convert from cumulative-product to return %, drop partial years.
samples: list[tuple[int, float]] = []
for y, cum in sorted(annual_returns.items()):
if drop_partial_years:
first = year_first_date[y]
last = year_last_date.get(y)
# Require the year span to cover Jan and Dec — coarse but cheap
if first.month > 1 or (last is not None and last.month < 12):
continue
samples.append((y, cum - 1.0))
if len(samples) < 1:
raise ValueError(
"No complete years of wealthfolio data found. "
"Try drop_partial_years=False or wait for more data."
)
years = np.array([y for y, _ in samples], dtype=np.int32)
rets = np.array([r for _, r in samples], dtype=np.float64)
cpi = np.full_like(rets, cpi_assumption_pct, dtype=np.float64)
return ReturnsBundle(
years=years,
stock_nominal=rets,
bond_nominal=rets.copy(),
cpi=cpi,
)
def _dec(v: object) -> Decimal:
"""Decimal coercion that handles strings + Decimals + floats."""
if isinstance(v, Decimal):
return v
return Decimal(str(v))
def constant_real_return_paths(
n_paths: int,
n_years: int,
real_return_pct: float,
) -> npt.NDArray[np.float64]:
"""Manual mode: every year of every path = `real_return_pct` real.
Builds (n_paths, n_years, 3) where the third axis is
(stock_nominal, bond_nominal, cpi). Setting cpi=0 and
nominal=real_return_pct lets the simulator's
`(1+nominal)/(1+cpi)-1` simplification short-circuit to exactly
`real_return_pct`. No randomness, no fan every path is identical.
"""
out = np.zeros((n_paths, n_years, 3), dtype=np.float64)
out[..., 0] = real_return_pct
out[..., 1] = real_return_pct
# cpi axis stays 0 — nominal is already real
return out