col: simulator auto-adjusts spending to local prices via Numbeo+Expatistan
The Monte Carlo used to compare jurisdictions at a flat London-equivalent spend, which silently overstated the cost-of-living for any move to a cheaper region. Now every cross-jurisdiction simulation auto-scales spending_gbp by the real Numbeo/Expatistan ratio between the user's baseline city and the target city. Architecture: - fire_planner/col/baseline.py — 22 cities with headline Numbeo data (source URLs + snapshot dates embedded) — fallback when scraper fails - col/numbeo.py + col/expatistan.py — httpx async scrapers, regex-parsed, polite 1.1s rate-limit, EUR/USD anchored - col/cache.py — PG-backed cache (col_snapshot table, 1-year TTL) - col/service.py — sync compute_col_ratio() for the simulator; async lookup_city_cached() with source reconciliation for the refresh CronJob - alembic 0005 — col_snapshot table, UNIQUE(city_slug, source_name) Simulator wiring: - SimulateRequest gains col_auto_adjust=True (default), col_baseline_city, col_target_city. Defaults pick the jurisdiction's representative city. - _resolve_col_adjustment scales spending_gbp before path-building. - SimulateResult surfaces col_multiplier_applied + col_adjusted_spending_gbp. CLIs: - python -m fire_planner col-seed — loads BASELINES into col_snapshot (post-migration seed step) - python -m fire_planner col-refresh-stale --within-days 7 — used by the weekly fire-planner-col-refresh CronJob 268 tests pass. Mypy strict + ruff clean. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
70101c836c
commit
e72fd22a17
14 changed files with 1641 additions and 6 deletions
|
|
@ -57,6 +57,103 @@ def migrate() -> None:
|
|||
sys.exit(rc.returncode)
|
||||
|
||||
|
||||
@cli.command("col-seed")
|
||||
@click.option("--ttl-days",
|
||||
type=int,
|
||||
default=365,
|
||||
help="Cache TTL in days (default 365 — matches Viktor's 1y choice).")
|
||||
def col_seed(ttl_days: int) -> None:
|
||||
"""Seed `col_snapshot` from baseline.py BASELINES.
|
||||
|
||||
Idempotent — uses upsert on (city_slug, source_name). Run once after
|
||||
the alembic migration creates the table. Subsequent live-scrape
|
||||
refreshes (Phase 3 CronJob) supersede these rows; the baseline
|
||||
fallback remains as a last-resort source.
|
||||
"""
|
||||
asyncio.run(_col_seed(ttl_days))
|
||||
|
||||
|
||||
async def _col_seed(ttl_days: int) -> None:
|
||||
from fire_planner.col.baseline import BASELINES
|
||||
from fire_planner.col.cache import upsert as col_upsert
|
||||
|
||||
engine = create_engine_from_env()
|
||||
factory = make_session_factory(engine)
|
||||
try:
|
||||
async with factory() as sess:
|
||||
for slug, idx in BASELINES.items():
|
||||
# Tag the source as `baseline` rather than `numbeo` so a
|
||||
# later live scrape (source_name='numbeo') doesn't conflict
|
||||
# on the (city_slug, source_name) unique constraint.
|
||||
tagged = idx.model_copy(
|
||||
update={"source": idx.source.model_copy(update={"name": "baseline"})}
|
||||
)
|
||||
await col_upsert(sess, tagged, ttl_days=ttl_days)
|
||||
click.echo(f" seeded {slug:20s} total={idx.total_single_with_rent_gbp} GBP")
|
||||
finally:
|
||||
await engine.dispose()
|
||||
click.echo(f"\ncol-seed: {len(BASELINES)} cities upserted (ttl_days={ttl_days}).")
|
||||
|
||||
|
||||
@cli.command("col-refresh-stale")
|
||||
@click.option("--within-days",
|
||||
type=int,
|
||||
default=7,
|
||||
help="Refresh rows whose expires_at is within this many days.")
|
||||
@click.option("--ttl-days",
|
||||
type=int,
|
||||
default=365,
|
||||
help="TTL for re-written rows (default 365).")
|
||||
def col_refresh_stale(within_days: int, ttl_days: int) -> None:
|
||||
"""Re-scrape COL rows that are within `within_days` of expiry.
|
||||
|
||||
Designed for the weekly CronJob. Walks every distinct city_slug in
|
||||
`col_snapshot` whose newest row will expire within the window,
|
||||
calls Numbeo+Expatistan via `service.lookup_city_cached`, which
|
||||
upserts the result. Idempotent — no-op for fresh rows.
|
||||
"""
|
||||
asyncio.run(_col_refresh_stale(within_days, ttl_days))
|
||||
|
||||
|
||||
async def _col_refresh_stale(within_days: int, ttl_days: int) -> None:
|
||||
from sqlalchemy import select, text
|
||||
|
||||
from fire_planner.col.service import lookup_city_cached
|
||||
from fire_planner.db import ColSnapshot
|
||||
|
||||
engine = create_engine_from_env()
|
||||
factory = make_session_factory(engine)
|
||||
threshold = f"NOW() + INTERVAL '{int(within_days)} days'"
|
||||
refreshed = 0
|
||||
failed = 0
|
||||
try:
|
||||
async with factory() as sess:
|
||||
# Find distinct city_slug whose freshest row expires within window.
|
||||
stmt = (
|
||||
select(ColSnapshot.city_slug, ColSnapshot.country)
|
||||
.distinct()
|
||||
.where(text(f"expires_at <= {threshold}"))
|
||||
)
|
||||
rows = (await sess.execute(stmt)).all()
|
||||
click.echo(f"col-refresh-stale: {len(rows)} city(ies) need refresh "
|
||||
f"(within_days={within_days})")
|
||||
for slug, country in rows:
|
||||
try:
|
||||
# lookup_city_cached upserts on cache miss, which is
|
||||
# what "stale" means here — read_fresh returns None.
|
||||
idx = await lookup_city_cached(sess, slug, country=country or "")
|
||||
click.echo(f" refreshed {slug:20s} → {idx.source.name:10s} "
|
||||
f"total={idx.total_single_with_rent_gbp}")
|
||||
refreshed += 1
|
||||
except Exception as e: # broad — log and continue per-city
|
||||
click.echo(f" FAILED {slug}: {e}", err=True)
|
||||
failed += 1
|
||||
finally:
|
||||
await engine.dispose()
|
||||
click.echo(f"\ncol-refresh-stale done: refreshed={refreshed} failed={failed} "
|
||||
f"ttl_days={ttl_days}")
|
||||
|
||||
|
||||
@cli.command("ingest")
|
||||
@click.option("--source",
|
||||
type=click.Choice(["wealthfolio"]),
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue