"""Daily market-trend aggregator. Two outputs per run: 1. Per-listing trend columns. For each row in RentListing / BuyListing we parse `price_history_json` and find the price entry whose `last_seen` was closest to `lookback_days` ago. The current price and that historical price land on `price_14d_ago` / `price_change_pct_14d` for the PropertyCard badge to render. 2. Aggregate market snapshot. For each configured (listing_type, bedroom band) we compute median/mean/count over the CURRENT listing pool and upsert one row in `dailylistingaggregate` keyed on today's date. The `MarketTrendStrip` UI consumes these rows. Both steps are idempotent — re-running on the same day refreshes the snapshot rather than appending. Designed to fire daily ~04:00 UTC (1h after the 03:00 RENT scrape so the data is fresh). """ from __future__ import annotations import json import logging import time from datetime import datetime, timedelta from statistics import mean, median from typing import Iterable from sqlalchemy import Engine from sqlmodel import Session, select from models.listing import ( BuyListing, DailyListingAggregate, PriceHistoryItem, RentListing, ) logger = logging.getLogger("uvicorn") # Default scope: the user's daily filter (1-2 bed, both listing types). DEFAULT_BEDROOM_BANDS: tuple[tuple[int, int], ...] = ((1, 2),) DEFAULT_LISTING_TYPES: tuple[str, ...] = ("RENT", "BUY") # Trend lookback window for the per-listing badge. Surfaces price moves # that happened in the last fortnight (long enough for prices to actually # settle, short enough to feel current). DEFAULT_LOOKBACK_DAYS = 14 def _parse_history(price_history_json: str | None) -> list[PriceHistoryItem]: if not price_history_json: return [] try: raw = json.loads(price_history_json) except (ValueError, TypeError): return [] out: list[PriceHistoryItem] = [] for item in raw: try: out.append( PriceHistoryItem( first_seen=datetime.fromisoformat(item["first_seen"]), last_seen=datetime.fromisoformat(item["last_seen"]), price=float(item["price"]), ) ) except (KeyError, ValueError, TypeError): continue return out def _price_at_or_before( history: list[PriceHistoryItem], cutoff: datetime ) -> float | None: """Return the price of the entry whose `last_seen` is closest to (but not after) `cutoff`. Returns None if no entry that old exists. History is in chronological order; we scan and keep the latest match. """ found: float | None = None for item in history: if item.last_seen <= cutoff: found = item.price else: break return found def compute_trend_for_listing( price_history_json: str | None, current_price: float | None, *, lookback_days: int = DEFAULT_LOOKBACK_DAYS, now: datetime | None = None, ) -> tuple[float | None, float | None]: """Return `(price_n_days_ago, change_pct)` for one listing. `change_pct` is `(current - past) / past * 100` rounded to 2dp; positive = price went up, negative = down. Both are None when there's no entry that old in history or current price is unusable. """ if not isinstance(current_price, (int, float)) or current_price <= 0: return None, None cutoff = (now or datetime.utcnow()) - timedelta(days=lookback_days) history = _parse_history(price_history_json) past = _price_at_or_before(history, cutoff) if past is None or past <= 0: return None, None pct = round((current_price - past) / past * 100.0, 2) return past, pct def update_per_listing_trend( engine: Engine, *, lookback_days: int = DEFAULT_LOOKBACK_DAYS, batch_size: int = 1000, now: datetime | None = None, ) -> dict[str, int]: """Walk every RentListing + BuyListing, recompute trend columns, write.""" counts = {"rent_updated": 0, "buy_updated": 0} t0 = time.monotonic() for model_name, model in (("rent", RentListing), ("buy", BuyListing)): with Session(engine) as session: offset = 0 while True: stmt = select(model).offset(offset).limit(batch_size) rows: list = list(session.exec(stmt).all()) if not rows: break for row in rows: past, pct = compute_trend_for_listing( row.price_history_json, row.price, lookback_days=lookback_days, now=now, ) if row.price_14d_ago != past or row.price_change_pct_14d != pct: row.price_14d_ago = past row.price_change_pct_14d = pct session.add(row) counts[f"{model_name}_updated"] += 1 session.commit() if len(rows) < batch_size: break offset += batch_size logger.info( "Per-listing trend updated in %.1fs: rent=%d buy=%d (lookback=%dd)", time.monotonic() - t0, counts["rent_updated"], counts["buy_updated"], lookback_days, ) return counts def _stats(values: Iterable[float]) -> dict[str, float | None]: """Median + mean over the valid positive entries; null for empty input.""" finite = [v for v in values if isinstance(v, (int, float)) and v > 0] if not finite: return {"median": None, "mean": None, "count": 0} return { "median": float(median(finite)), "mean": round(float(mean(finite)), 2), "count": len(finite), } def compute_aggregate_snapshot( engine: Engine, *, listing_types: tuple[str, ...] = DEFAULT_LISTING_TYPES, bedroom_bands: tuple[tuple[int, int], ...] = DEFAULT_BEDROOM_BANDS, snapshot_date: datetime | None = None, ) -> list[DailyListingAggregate]: """Compute one aggregate row per (listing_type * bedroom band) and upsert it onto today's `snapshot_date`. Returns the persisted rows. Uses an `INSERT ... ON DUPLICATE KEY UPDATE` so re-running on the same day refreshes the row in place — no duplicates, no DELETE. """ today = snapshot_date or datetime.utcnow().replace( hour=0, minute=0, second=0, microsecond=0 ) written: list[DailyListingAggregate] = [] dialect = engine.dialect.name with Session(engine) as session: for listing_type in listing_types: model = RentListing if listing_type == "RENT" else BuyListing for min_bed, max_bed in bedroom_bands: stmt = select(model.price, model.square_meters).where( model.number_of_bedrooms >= min_bed, model.number_of_bedrooms <= max_bed, ) rows = list(session.exec(stmt).all()) prices = [r[0] for r in rows] qmprices = [ (r[0] / r[1]) for r in rows if r[1] is not None and r[1] > 0 ] price_stats = _stats(prices) qm_stats = _stats(qmprices) values = { "snapshot_date": today, "listing_type": listing_type, "min_bedrooms": min_bed, "max_bedrooms": max_bed, "listing_count": price_stats["count"], "median_total_price": price_stats["median"], "median_qmprice": qm_stats["median"], "mean_total_price": price_stats["mean"], "mean_qmprice": qm_stats["mean"], } if dialect == "mysql": from sqlalchemy.dialects.mysql import insert as mysql_insert stmt_ins = mysql_insert(DailyListingAggregate).values(**values) stmt_ins = stmt_ins.on_duplicate_key_update( listing_count=stmt_ins.inserted.listing_count, median_total_price=stmt_ins.inserted.median_total_price, median_qmprice=stmt_ins.inserted.median_qmprice, mean_total_price=stmt_ins.inserted.mean_total_price, mean_qmprice=stmt_ins.inserted.mean_qmprice, ) session.execute(stmt_ins) else: from sqlalchemy.dialects.sqlite import insert as sqlite_insert stmt_ins = sqlite_insert(DailyListingAggregate).values(**values) stmt_ins = stmt_ins.on_conflict_do_update( index_elements=[ "snapshot_date", "listing_type", "min_bedrooms", "max_bedrooms", ], set_={ "listing_count": stmt_ins.excluded.listing_count, "median_total_price": stmt_ins.excluded.median_total_price, "median_qmprice": stmt_ins.excluded.median_qmprice, "mean_total_price": stmt_ins.excluded.mean_total_price, "mean_qmprice": stmt_ins.excluded.mean_qmprice, }, ) session.execute(stmt_ins) session.commit() row = session.exec( select(DailyListingAggregate).where( DailyListingAggregate.snapshot_date == today, DailyListingAggregate.listing_type == listing_type, DailyListingAggregate.min_bedrooms == min_bed, DailyListingAggregate.max_bedrooms == max_bed, ) ).first() if row is not None: written.append(row) logger.info( "Aggregate %s %d-%d on %s: count=%s median=%s/%s mean=%s/%s", listing_type, min_bed, max_bed, today.date(), price_stats["count"], price_stats["median"], qm_stats["median"], price_stats["mean"], qm_stats["mean"], ) return written def fetch_trend_series( engine: Engine, *, listing_type: str, min_bedrooms: int, max_bedrooms: int, days: int = 30, ) -> list[DailyListingAggregate]: """Return the aggregate rows for the last `days` days, ordered ascending by date. Empty list when no rows match — the strip handles that case.""" cutoff = datetime.utcnow() - timedelta(days=days) with Session(engine) as session: stmt = ( select(DailyListingAggregate) .where( DailyListingAggregate.listing_type == listing_type, DailyListingAggregate.min_bedrooms == min_bedrooms, DailyListingAggregate.max_bedrooms == max_bedrooms, DailyListingAggregate.snapshot_date >= cutoff, ) .order_by(DailyListingAggregate.snapshot_date) ) return list(session.exec(stmt).all())