col: simulator auto-adjusts spending to local prices via Numbeo+Expatistan

The Monte Carlo used to compare jurisdictions at a flat London-equivalent spend, which silently overstated the cost-of-living for any move to a cheaper region. Now every cross-jurisdiction simulation auto-scales spending_gbp by the real Numbeo/Expatistan ratio between the user's baseline city and the target city. Architecture: - fire_planner/col/baseline.py — 22 cities with headline Numbeo data (source URLs + snapshot dates embedded) — fallback when scraper fails - col/numbeo.py + col/expatistan.py — httpx async scrapers, regex-parsed, polite 1.1s rate-limit, EUR/USD anchored - col/cache.py — PG-backed cache (col_snapshot table, 1-year TTL) - col/service.py — sync compute_col_ratio() for the simulator; async lookup_city_cached() with source reconciliation for the refresh CronJob - alembic 0005 — col_snapshot table, UNIQUE(city_slug, source_name) Simulator wiring: - SimulateRequest gains col_auto_adjust=True (default), col_baseline_city, col_target_city. Defaults pick the jurisdiction's representative city. - _resolve_col_adjustment scales spending_gbp before path-building. - SimulateResult surfaces col_multiplier_applied + col_adjusted_spending_gbp. CLIs: - python -m fire_planner col-seed — loads BASELINES into col_snapshot (post-migration seed step) - python -m fire_planner col-refresh-stale --within-days 7 — used by the weekly fire-planner-col-refresh CronJob 268 tests pass. Mypy strict + ruff clean. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-22 14:14:57 +00:00 · 2026-05-22 14:14:57 +00:00 · e72fd22a17
commit e72fd22a17
parent 70101c836c
14 changed files with 1641 additions and 6 deletions
--- a/fire_planner/api/simulate.py
+++ b/fire_planner/api/simulate.py
@ -26,6 +26,7 @@ from fire_planner.api.schemas import (
    SimulateRequest,
    SimulateResult,
 )
+from fire_planner.col import compute_col_ratio, representative_city_for
 from fire_planner.flex_spending import FlexRule as EngineFlexRule
 from fire_planner.glide_path import static
 from fire_planner.goals_eval import evaluate_goals
@ -50,6 +51,36 @@ router = APIRouter(tags=["simulate"])
 _RETURNS_CSV = Path("/data/shiller_returns.csv")


+def _resolve_col_adjustment(
+    req: SimulateRequest,
+) -> tuple[SimulateRequest, Decimal | None, Decimal | None, str | None]:
+    """Apply cost-of-living adjustment to `req.spending_gbp` when enabled.
+
+    Returns the (possibly modified) request, the multiplier applied (or
+    None), the post-adjustment spending GBP (or None), and the resolved
+    target city slug (or None). Skipped silently when:
+    - col_auto_adjust is False
+    - the jurisdiction has no representative city (e.g. nomad)
+    - baseline_city == resolved target city (identity transform)
+    - either city is unknown to the baseline lookup (degrade gracefully
+      rather than 400 — a future Phase-2 scraper will close the gap)
+    """
+    if not req.col_auto_adjust:
+        return req, None, None, None
+    target = req.col_target_city or representative_city_for(req.jurisdiction)
+    if target is None:
+        return req, None, None, None
+    if target == req.col_baseline_city:
+        return req, None, None, target
+    try:
+        ratio = compute_col_ratio(req.col_baseline_city, target)
+    except KeyError:
+        return req, None, None, target
+    adjusted_spend = req.spending_gbp * ratio
+    adjusted_req = req.model_copy(update={"spending_gbp": adjusted_spend})
+    return adjusted_req, ratio, adjusted_spend, target
+
+
 def _shiller_paths(seed: int, n_paths: int, n_years: int) -> np.ndarray:
    bundle = (load_from_csv(_RETURNS_CSV) if _RETURNS_CSV.exists() else synthetic_returns(seed=42))
    rng = np.random.default_rng(seed)
@ -193,6 +224,9 @@ def _to_response(
    result: SimulationResult,
    elapsed: float,
    req: SimulateRequest | None = None,
+    col_multiplier: Decimal | None = None,
+    col_adjusted_spend: Decimal | None = None,
+    col_target_city: str | None = None,
 ) -> SimulateResult:
    # portfolio_real has n_years+1 columns (year 0 = seed, year k = end-of-year k).
    # withdrawal_real / tax_real have n_years columns (year k = withdrawn in year k+1).
@ -243,27 +277,34 @@ def _to_response(
        elapsed_seconds=Decimal(str(round(elapsed, 3))),
        yearly=yearly,
        goals_probability=goals_probability,
+        col_multiplier_applied=(Decimal(str(round(float(col_multiplier), 6)))
+                                if col_multiplier is not None else None),
+        col_adjusted_spending_gbp=(Decimal(str(round(float(col_adjusted_spend), 2)))
+                                   if col_adjusted_spend is not None else None),
+        col_target_city=col_target_city,
    )


@router.post("/simulate", response_model=SimulateResult)
 async def simulate_one(req: SimulateRequest) -> SimulateResult:
    """Run one scenario synchronously, no DB write. ~1-3s for 5k paths."""
-    paths = await _build_paths(req)
+    adjusted_req, mult, adj_spend, target_city = _resolve_col_adjustment(req)
+    paths = await _build_paths(adjusted_req)
    try:
-        result, elapsed = await asyncio.to_thread(_project, req, paths)
+        result, elapsed = await asyncio.to_thread(_project, adjusted_req, paths)
    except KeyError as e:
        raise HTTPException(status_code=400, detail=f"Unknown name: {e}") from None
-    return _to_response(result, elapsed, req)
+    return _to_response(result, elapsed, adjusted_req, mult, adj_spend, target_city)


@router.post("/compare", response_model=CompareResult)
 async def compare_scenarios(req: CompareRequest) -> CompareResult:
    """Run 2-5 scenarios in parallel, return all results."""
    async def one(s: SimulateRequest) -> SimulateResult:
-        paths = await _build_paths(s)
-        result, elapsed = await asyncio.to_thread(_project, s, paths)
-        return _to_response(result, elapsed, s)
+        adjusted_s, mult, adj_spend, target_city = _resolve_col_adjustment(s)
+        paths = await _build_paths(adjusted_s)
+        result, elapsed = await asyncio.to_thread(_project, adjusted_s, paths)
+        return _to_response(result, elapsed, adjusted_s, mult, adj_spend, target_city)

    try:
        results = await asyncio.gather(*(one(s) for s in req.scenarios))