The Monte Carlo used to compare jurisdictions at a flat London-equivalent spend, which silently overstated the cost-of-living for any move to a cheaper region. Now every cross-jurisdiction simulation auto-scales spending_gbp by the real Numbeo/Expatistan ratio between the user's baseline city and the target city. Architecture: - fire_planner/col/baseline.py — 22 cities with headline Numbeo data (source URLs + snapshot dates embedded) — fallback when scraper fails - col/numbeo.py + col/expatistan.py — httpx async scrapers, regex-parsed, polite 1.1s rate-limit, EUR/USD anchored - col/cache.py — PG-backed cache (col_snapshot table, 1-year TTL) - col/service.py — sync compute_col_ratio() for the simulator; async lookup_city_cached() with source reconciliation for the refresh CronJob - alembic 0005 — col_snapshot table, UNIQUE(city_slug, source_name) Simulator wiring: - SimulateRequest gains col_auto_adjust=True (default), col_baseline_city, col_target_city. Defaults pick the jurisdiction's representative city. - _resolve_col_adjustment scales spending_gbp before path-building. - SimulateResult surfaces col_multiplier_applied + col_adjusted_spending_gbp. CLIs: - python -m fire_planner col-seed — loads BASELINES into col_snapshot (post-migration seed step) - python -m fire_planner col-refresh-stale --within-days 7 — used by the weekly fire-planner-col-refresh CronJob 268 tests pass. Mypy strict + ruff clean. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
73 lines
2.8 KiB
Python
73 lines
2.8 KiB
Python
"""add col_snapshot table for cached cost-of-living data
|
|
|
|
Revision ID: 0005
|
|
Revises: 0004
|
|
Create Date: 2026-05-21 12:00:00.000000
|
|
|
|
Phase 2 of the cost-of-living subsystem (`fire_planner.col`). Caches
|
|
Numbeo / Expatistan headline data with a 1-year TTL so the simulator
|
|
can scale `spending_gbp` to local prices without re-scraping per-call.
|
|
Refresh is async (Phase-3 CronJob); user-facing lookups never block on
|
|
the network in the steady state.
|
|
|
|
Unique on (city_slug, source_name) — multiple sources per city are
|
|
allowed; service.py reconciles them when computing the headline.
|
|
"""
|
|
from collections.abc import Sequence
|
|
|
|
import sqlalchemy as sa
|
|
|
|
from alembic import op
|
|
|
|
revision: str = "0005"
|
|
down_revision: str | None = "0004"
|
|
branch_labels: str | Sequence[str] | None = None
|
|
depends_on: str | Sequence[str] | None = None
|
|
|
|
SCHEMA = "fire_planner"
|
|
|
|
|
|
def upgrade() -> None:
|
|
op.create_table(
|
|
"col_snapshot",
|
|
sa.Column("id", sa.Integer(), nullable=False, autoincrement=True),
|
|
sa.Column("city_slug", sa.String(length=64), nullable=False),
|
|
sa.Column("city_display", sa.String(length=128), nullable=False),
|
|
sa.Column("country", sa.String(length=64), nullable=False),
|
|
sa.Column("source_name", sa.String(length=32), nullable=False),
|
|
sa.Column("source_url", sa.String(), nullable=True),
|
|
sa.Column("snapshot_date", sa.Date(), nullable=False),
|
|
sa.Column("fetched_at", sa.TIMESTAMP(timezone=True), nullable=False,
|
|
server_default=sa.func.now()),
|
|
sa.Column("expires_at", sa.TIMESTAMP(timezone=True), nullable=False),
|
|
sa.Column("total_no_rent_gbp", sa.Numeric(12, 2), nullable=False),
|
|
sa.Column("total_with_rent_gbp", sa.Numeric(12, 2), nullable=False),
|
|
sa.Column("rent_1bed_center_gbp", sa.Numeric(12, 2), nullable=False),
|
|
sa.Column("rent_1bed_outside_gbp", sa.Numeric(12, 2), nullable=True),
|
|
sa.Column("raw_currency", sa.String(length=3), nullable=False,
|
|
server_default=sa.text("'GBP'")),
|
|
sa.Column("gbp_per_unit", sa.Numeric(12, 8), nullable=False,
|
|
server_default=sa.text("1")),
|
|
sa.Column("by_category_json", sa.JSON(), nullable=True),
|
|
sa.PrimaryKeyConstraint("id"),
|
|
sa.UniqueConstraint("city_slug", "source_name", name="uq_col_snapshot_city_source"),
|
|
schema=SCHEMA,
|
|
)
|
|
op.create_index(
|
|
"ix_col_snapshot_city_slug",
|
|
"col_snapshot",
|
|
["city_slug"],
|
|
schema=SCHEMA,
|
|
)
|
|
op.create_index(
|
|
"ix_col_snapshot_expires_at",
|
|
"col_snapshot",
|
|
["expires_at"],
|
|
schema=SCHEMA,
|
|
)
|
|
|
|
|
|
def downgrade() -> None:
|
|
op.drop_index("ix_col_snapshot_expires_at", table_name="col_snapshot", schema=SCHEMA)
|
|
op.drop_index("ix_col_snapshot_city_slug", table_name="col_snapshot", schema=SCHEMA)
|
|
op.drop_table("col_snapshot", schema=SCHEMA)
|