wrongmove/services/market_aggregator.py

"""Daily market-trend aggregator.

Two outputs per run:

1. Per-listing trend columns. For each row in RentListing / BuyListing we
   parse `price_history_json` and find the price entry whose `last_seen` was
   closest to `lookback_days` ago. The current price and that historical
   price land on `price_14d_ago` / `price_change_pct_14d` for the
   PropertyCard badge to render.

2. Aggregate market snapshot. For each configured (listing_type, bedroom
   band) we compute median/mean/count over the CURRENT listing pool and
   upsert one row in `dailylistingaggregate` keyed on today's date. The
   `MarketTrendStrip` UI consumes these rows.

Both steps are idempotent — re-running on the same day refreshes the
snapshot rather than appending. Designed to fire daily ~04:00 UTC (1h
after the 03:00 RENT scrape so the data is fresh).
"""
from __future__ import annotations

import json
import logging
import time
from datetime import datetime, timedelta
from statistics import mean, median
from typing import Iterable

from sqlalchemy import Engine
from sqlmodel import Session, select

from models.listing import (
    BuyListing,
    DailyListingAggregate,
    PriceHistoryItem,
    RentListing,
)

logger = logging.getLogger("uvicorn")

# Default scope: the user's daily filter (1-2 bed, both listing types).
DEFAULT_BEDROOM_BANDS: tuple[tuple[int, int], ...] = ((1, 2),)
DEFAULT_LISTING_TYPES: tuple[str, ...] = ("RENT", "BUY")

# Trend lookback window for the per-listing badge. Surfaces price moves
# that happened in the last fortnight (long enough for prices to actually
# settle, short enough to feel current).
DEFAULT_LOOKBACK_DAYS = 14


def _parse_history(price_history_json: str | None) -> list[PriceHistoryItem]:
    if not price_history_json:
        return []
    try:
        raw = json.loads(price_history_json)
    except (ValueError, TypeError):
        return []
    out: list[PriceHistoryItem] = []
    for item in raw:
        try:
            out.append(
                PriceHistoryItem(
                    first_seen=datetime.fromisoformat(item["first_seen"]),
                    last_seen=datetime.fromisoformat(item["last_seen"]),
                    price=float(item["price"]),
                )
            )
        except (KeyError, ValueError, TypeError):
            continue
    return out


def _price_at_or_before(
    history: list[PriceHistoryItem], cutoff: datetime
) -> float | None:
    """Return the price of the entry whose `last_seen` is closest to (but
    not after) `cutoff`. Returns None if no entry that old exists.

    History is in chronological order; we scan and keep the latest match.
    """
    found: float | None = None
    for item in history:
        if item.last_seen <= cutoff:
            found = item.price
        else:
            break
    return found


def compute_trend_for_listing(
    price_history_json: str | None,
    current_price: float | None,
    *,
    lookback_days: int = DEFAULT_LOOKBACK_DAYS,
    now: datetime | None = None,
) -> tuple[float | None, float | None]:
    """Return `(price_n_days_ago, change_pct)` for one listing.

    `change_pct` is `(current - past) / past * 100` rounded to 2dp; positive
    = price went up, negative = down. Both are None when there's no entry
    that old in history or current price is unusable.
    """
    if not isinstance(current_price, (int, float)) or current_price <= 0:
        return None, None
    cutoff = (now or datetime.utcnow()) - timedelta(days=lookback_days)
    history = _parse_history(price_history_json)
    past = _price_at_or_before(history, cutoff)
    if past is None or past <= 0:
        return None, None
    pct = round((current_price - past) / past * 100.0, 2)
    return past, pct


def update_per_listing_trend(
    engine: Engine,
    *,
    lookback_days: int = DEFAULT_LOOKBACK_DAYS,
    batch_size: int = 1000,
    now: datetime | None = None,
) -> dict[str, int]:
    """Walk every RentListing + BuyListing, recompute trend columns, write."""
    counts = {"rent_updated": 0, "buy_updated": 0}
    t0 = time.monotonic()
    for model_name, model in (("rent", RentListing), ("buy", BuyListing)):
        with Session(engine) as session:
            offset = 0
            while True:
                stmt = select(model).offset(offset).limit(batch_size)
                rows: list = list(session.exec(stmt).all())
                if not rows:
                    break
                for row in rows:
                    past, pct = compute_trend_for_listing(
                        row.price_history_json,
                        row.price,
                        lookback_days=lookback_days,
                        now=now,
                    )
                    if row.price_14d_ago != past or row.price_change_pct_14d != pct:
                        row.price_14d_ago = past
                        row.price_change_pct_14d = pct
                        session.add(row)
                        counts[f"{model_name}_updated"] += 1
                session.commit()
                if len(rows) < batch_size:
                    break
                offset += batch_size
    logger.info(
        "Per-listing trend updated in %.1fs: rent=%d buy=%d (lookback=%dd)",
        time.monotonic() - t0,
        counts["rent_updated"],
        counts["buy_updated"],
        lookback_days,
    )
    return counts


def _stats(values: Iterable[float]) -> dict[str, float | None]:
    """Median + mean over the valid positive entries; null for empty input."""
    finite = [v for v in values if isinstance(v, (int, float)) and v > 0]
    if not finite:
        return {"median": None, "mean": None, "count": 0}
    return {
        "median": float(median(finite)),
        "mean": round(float(mean(finite)), 2),
        "count": len(finite),
    }


def compute_aggregate_snapshot(
    engine: Engine,
    *,
    listing_types: tuple[str, ...] = DEFAULT_LISTING_TYPES,
    bedroom_bands: tuple[tuple[int, int], ...] = DEFAULT_BEDROOM_BANDS,
    snapshot_date: datetime | None = None,
) -> list[DailyListingAggregate]:
    """Compute one aggregate row per (listing_type * bedroom band) and
    upsert it onto today's `snapshot_date`. Returns the persisted rows.

    Uses an `INSERT ... ON DUPLICATE KEY UPDATE` so re-running on the same
    day refreshes the row in place — no duplicates, no DELETE.
    """
    today = snapshot_date or datetime.utcnow().replace(
        hour=0, minute=0, second=0, microsecond=0
    )
    written: list[DailyListingAggregate] = []
    dialect = engine.dialect.name
    with Session(engine) as session:
        for listing_type in listing_types:
            model = RentListing if listing_type == "RENT" else BuyListing
            for min_bed, max_bed in bedroom_bands:
                stmt = select(model.price, model.square_meters).where(
                    model.number_of_bedrooms >= min_bed,
                    model.number_of_bedrooms <= max_bed,
                )
                rows = list(session.exec(stmt).all())
                prices = [r[0] for r in rows]
                qmprices = [
                    (r[0] / r[1])
                    for r in rows
                    if r[1] is not None and r[1] > 0
                ]
                price_stats = _stats(prices)
                qm_stats = _stats(qmprices)
                values = {
                    "snapshot_date": today,
                    "listing_type": listing_type,
                    "min_bedrooms": min_bed,
                    "max_bedrooms": max_bed,
                    "listing_count": price_stats["count"],
                    "median_total_price": price_stats["median"],
                    "median_qmprice": qm_stats["median"],
                    "mean_total_price": price_stats["mean"],
                    "mean_qmprice": qm_stats["mean"],
                }
                if dialect == "mysql":
                    from sqlalchemy.dialects.mysql import insert as mysql_insert
                    stmt_ins = mysql_insert(DailyListingAggregate).values(**values)
                    stmt_ins = stmt_ins.on_duplicate_key_update(
                        listing_count=stmt_ins.inserted.listing_count,
                        median_total_price=stmt_ins.inserted.median_total_price,
                        median_qmprice=stmt_ins.inserted.median_qmprice,
                        mean_total_price=stmt_ins.inserted.mean_total_price,
                        mean_qmprice=stmt_ins.inserted.mean_qmprice,
                    )
                    session.execute(stmt_ins)
                else:
                    from sqlalchemy.dialects.sqlite import insert as sqlite_insert
                    stmt_ins = sqlite_insert(DailyListingAggregate).values(**values)
                    stmt_ins = stmt_ins.on_conflict_do_update(
                        index_elements=[
                            "snapshot_date", "listing_type",
                            "min_bedrooms", "max_bedrooms",
                        ],
                        set_={
                            "listing_count": stmt_ins.excluded.listing_count,
                            "median_total_price": stmt_ins.excluded.median_total_price,
                            "median_qmprice": stmt_ins.excluded.median_qmprice,
                            "mean_total_price": stmt_ins.excluded.mean_total_price,
                            "mean_qmprice": stmt_ins.excluded.mean_qmprice,
                        },
                    )
                    session.execute(stmt_ins)
                session.commit()
                row = session.exec(
                    select(DailyListingAggregate).where(
                        DailyListingAggregate.snapshot_date == today,
                        DailyListingAggregate.listing_type == listing_type,
                        DailyListingAggregate.min_bedrooms == min_bed,
                        DailyListingAggregate.max_bedrooms == max_bed,
                    )
                ).first()
                if row is not None:
                    written.append(row)
                logger.info(
                    "Aggregate %s %d-%d on %s: count=%s median=%s/%s mean=%s/%s",
                    listing_type, min_bed, max_bed, today.date(),
                    price_stats["count"],
                    price_stats["median"], qm_stats["median"],
                    price_stats["mean"], qm_stats["mean"],
                )
    return written


def fetch_trend_series(
    engine: Engine,
    *,
    listing_type: str,
    min_bedrooms: int,
    max_bedrooms: int,
    days: int = 30,
) -> list[DailyListingAggregate]:
    """Return the aggregate rows for the last `days` days, ordered ascending
    by date. Empty list when no rows match — the strip handles that case."""
    cutoff = datetime.utcnow() - timedelta(days=days)
    with Session(engine) as session:
        stmt = (
            select(DailyListingAggregate)
            .where(
                DailyListingAggregate.listing_type == listing_type,
                DailyListingAggregate.min_bedrooms == min_bedrooms,
                DailyListingAggregate.max_bedrooms == max_bedrooms,
                DailyListingAggregate.snapshot_date >= cutoff,
            )
            .order_by(DailyListingAggregate.snapshot_date)
        )
        return list(session.exec(stmt).all())
wrongmove: daily price-trend monitoring (per-listing badge + macro strip) Two surfaces wired up so the user can "get a vibe of the market": Per-listing — each PropertyCard now shows a small pill next to the price when the listing's total_price moved >=1% over a 14-day lookback (e.g. "↓ £200 (-4%) in 14d"). Drops render green, rises render red. Computed from `price_history_json` by the daily aggregator and denormalised onto the listing row so the streaming endpoint just passes it through. Macro — new always-visible inline strip above the chip strip showing today's median total price, median £/m², and listing count for the current filter's bedroom band, each with a 30-day % delta: "Rent · 1-2 bed · 30d: Median £2,500 ↓ -4% · £/m² £50 ↓ -2% · Listings 4,200 ↑ +5%". Both data sources are populated daily at 04:00 UTC by a new Celery beat task that fires 1h after the 03:00 RENT scrape and feeds two sinks: a per-listing update pass and an upsert to a new `dailylistingaggregate` table keyed on (snapshot_date, listing_type, min_bedrooms, max_bedrooms). ## Backend - `models/listing.py`: Listing parent gains `price_14d_ago` + `price_ change_pct_14d` nullable floats (inherited by RentListing/BuyListing). New `DailyListingAggregate` table model with unique constraint on (date, type, min_bed, max_bed). - Alembic `a8b9c0d1e2f3`: adds the two columns to both listing tables and creates the aggregate table + date index. - `services/market_aggregator.py` (new): `compute_trend_for_listing`, `update_per_listing_trend` (batched, idempotent), `_stats` (median + mean filtered to positive finite values), `compute_aggregate_ snapshot` (dialect-aware MySQL / SQLite upsert), `fetch_trend_ series` (range query for the API). - `tasks/market_tasks.py` (new): `compute_daily_market_aggregates_task` Celery task wrapping both stages. - `tasks/listing_tasks.py:setup_periodic_tasks`: registers the daily task at 04:00 UTC alongside the existing scrape schedules. - `celery_app.py`: includes the new tasks module. - `api/app.py`: new `GET /api/market_trend?listing_type=&min_bedrooms=& max_bedrooms=&days=` endpoint returning the daily series. - `ui_exporter.py`: GeoJSON feature properties now carry `price_14d_ago` and `price_change_pct_14d` so the frontend can render the badge without an extra round-trip. ## Frontend - `types/index.ts`: new `MarketTrendPoint`; `PropertyProperties` gains the two optional trend fields. - `components/PropertyCard.tsx`: derived `trendBadge` (>=1% threshold, null-safe) rendered as a small pill on both card variants. - `hooks/useMarketTrend.ts` (new): fetches the trend series, derives current-vs-oldest deltas per metric (% change rounded to 1dp). - `components/MarketTrendStrip.tsx` (new): compact inline strip with three metric cells. Hidden when the aggregator hasn't produced any rows yet (graceful start during the first week post-launch). - `App.tsx`: renders the strip above the chip strip whenever the active queryParameters are known. ## Tests - pytest: 10 new (trend math edge cases including null history, malformed JSON, only-recent entries, drops, rises, zero current price; _stats empty / nonpositive filtering; upsert idempotency on an in-memory SQLite seed). 34 decision + aggregator tests pass. - vitest: 8 new (useMarketTrend fetch URL, two-point delta, single-point null delta, empty series; PropertyCard trend badge arrow direction + sign for drops/rises, noise threshold, null guard). 229 tests pass total, tsc clean. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> 2026-05-16 12:02:25 +00:00			`"""Daily market-trend aggregator.`

			`Two outputs per run:`

			`1. Per-listing trend columns. For each row in RentListing / BuyListing we`
			parse `price_history_json` and find the price entry whose `last_seen` was
			closest to `lookback_days` ago. The current price and that historical
			price land on `price_14d_ago` / `price_change_pct_14d` for the
			`PropertyCard badge to render.`

			`2. Aggregate market snapshot. For each configured (listing_type, bedroom`
			`band) we compute median/mean/count over the CURRENT listing pool and`
			upsert one row in `dailylistingaggregate` keyed on today's date. The
			`MarketTrendStrip` UI consumes these rows.

			`Both steps are idempotent — re-running on the same day refreshes the`
			`snapshot rather than appending. Designed to fire daily ~04:00 UTC (1h`
			`after the 03:00 RENT scrape so the data is fresh).`
			`"""`
			`from __future__ import annotations`

			`import json`
			`import logging`
			`import time`
			`from datetime import datetime, timedelta`
			`from statistics import mean, median`
			`from typing import Iterable`

			`from sqlalchemy import Engine`
			`from sqlmodel import Session, select`

			`from models.listing import (`
			`BuyListing,`
			`DailyListingAggregate,`
			`PriceHistoryItem,`
			`RentListing,`
			`)`

			`logger = logging.getLogger("uvicorn")`

			`# Default scope: the user's daily filter (1-2 bed, both listing types).`
			`DEFAULT_BEDROOM_BANDS: tuple[tuple[int, int], ...] = ((1, 2),)`
			`DEFAULT_LISTING_TYPES: tuple[str, ...] = ("RENT", "BUY")`

			`# Trend lookback window for the per-listing badge. Surfaces price moves`
			`# that happened in the last fortnight (long enough for prices to actually`
			`# settle, short enough to feel current).`
			`DEFAULT_LOOKBACK_DAYS = 14`


			`def _parse_history(price_history_json: str \| None) -> list[PriceHistoryItem]:`
			`if not price_history_json:`
			`return []`
			`try:`
			`raw = json.loads(price_history_json)`
			`except (ValueError, TypeError):`
			`return []`
			`out: list[PriceHistoryItem] = []`
			`for item in raw:`
			`try:`
			`out.append(`
			`PriceHistoryItem(`
			`first_seen=datetime.fromisoformat(item["first_seen"]),`
			`last_seen=datetime.fromisoformat(item["last_seen"]),`
			`price=float(item["price"]),`
			`)`
			`)`
			`except (KeyError, ValueError, TypeError):`
			`continue`
			`return out`


			`def _price_at_or_before(`
			`history: list[PriceHistoryItem], cutoff: datetime`
			`) -> float \| None:`
			"""Return the price of the entry whose `last_seen` is closest to (but
			not after) `cutoff`. Returns None if no entry that old exists.

			`History is in chronological order; we scan and keep the latest match.`
			`"""`
			`found: float \| None = None`
			`for item in history:`
			`if item.last_seen <= cutoff:`
			`found = item.price`
			`else:`
			`break`
			`return found`


			`def compute_trend_for_listing(`
			`price_history_json: str \| None,`
			`current_price: float \| None,`
			`*,`
			`lookback_days: int = DEFAULT_LOOKBACK_DAYS,`
			`now: datetime \| None = None,`
			`) -> tuple[float \| None, float \| None]:`
			"""Return `(price_n_days_ago, change_pct)` for one listing.

			`change_pct` is `(current - past) / past * 100` rounded to 2dp; positive
			`= price went up, negative = down. Both are None when there's no entry`
			`that old in history or current price is unusable.`
			`"""`
			`if not isinstance(current_price, (int, float)) or current_price <= 0:`
			`return None, None`
			`cutoff = (now or datetime.utcnow()) - timedelta(days=lookback_days)`
			`history = _parse_history(price_history_json)`
			`past = _price_at_or_before(history, cutoff)`
			`if past is None or past <= 0:`
			`return None, None`
			`pct = round((current_price - past) / past * 100.0, 2)`
			`return past, pct`


			`def update_per_listing_trend(`
			`engine: Engine,`
			`*,`
			`lookback_days: int = DEFAULT_LOOKBACK_DAYS,`
			`batch_size: int = 1000,`
			`now: datetime \| None = None,`
			`) -> dict[str, int]:`
			`"""Walk every RentListing + BuyListing, recompute trend columns, write."""`
			`counts = {"rent_updated": 0, "buy_updated": 0}`
			`t0 = time.monotonic()`
			`for model_name, model in (("rent", RentListing), ("buy", BuyListing)):`
			`with Session(engine) as session:`
			`offset = 0`
			`while True:`
			`stmt = select(model).offset(offset).limit(batch_size)`
			`rows: list = list(session.exec(stmt).all())`
			`if not rows:`
			`break`
			`for row in rows:`
			`past, pct = compute_trend_for_listing(`
			`row.price_history_json,`
			`row.price,`
			`lookback_days=lookback_days,`
			`now=now,`
			`)`
			`if row.price_14d_ago != past or row.price_change_pct_14d != pct:`
			`row.price_14d_ago = past`
			`row.price_change_pct_14d = pct`
			`session.add(row)`
			`counts[f"{model_name}_updated"] += 1`
			`session.commit()`
			`if len(rows) < batch_size:`
			`break`
			`offset += batch_size`
			`logger.info(`
			`"Per-listing trend updated in %.1fs: rent=%d buy=%d (lookback=%dd)",`
			`time.monotonic() - t0,`
			`counts["rent_updated"],`
			`counts["buy_updated"],`
			`lookback_days,`
			`)`
			`return counts`


			`def _stats(values: Iterable[float]) -> dict[str, float \| None]:`
			`"""Median + mean over the valid positive entries; null for empty input."""`
			`finite = [v for v in values if isinstance(v, (int, float)) and v > 0]`
			`if not finite:`
			`return {"median": None, "mean": None, "count": 0}`
			`return {`
			`"median": float(median(finite)),`
			`"mean": round(float(mean(finite)), 2),`
			`"count": len(finite),`
			`}`


			`def compute_aggregate_snapshot(`
			`engine: Engine,`
			`*,`
			`listing_types: tuple[str, ...] = DEFAULT_LISTING_TYPES,`
			`bedroom_bands: tuple[tuple[int, int], ...] = DEFAULT_BEDROOM_BANDS,`
			`snapshot_date: datetime \| None = None,`
			`) -> list[DailyListingAggregate]:`
			`"""Compute one aggregate row per (listing_type * bedroom band) and`
			upsert it onto today's `snapshot_date`. Returns the persisted rows.

			Uses an `INSERT ... ON DUPLICATE KEY UPDATE` so re-running on the same
			`day refreshes the row in place — no duplicates, no DELETE.`
			`"""`
			`today = snapshot_date or datetime.utcnow().replace(`
			`hour=0, minute=0, second=0, microsecond=0`
			`)`
			`written: list[DailyListingAggregate] = []`
			`dialect = engine.dialect.name`
			`with Session(engine) as session:`
			`for listing_type in listing_types:`
			`model = RentListing if listing_type == "RENT" else BuyListing`
			`for min_bed, max_bed in bedroom_bands:`
			`stmt = select(model.price, model.square_meters).where(`
			`model.number_of_bedrooms >= min_bed,`
			`model.number_of_bedrooms <= max_bed,`
			`)`
			`rows = list(session.exec(stmt).all())`
			`prices = [r[0] for r in rows]`
			`qmprices = [`
			`(r[0] / r[1])`
			`for r in rows`
			`if r[1] is not None and r[1] > 0`
			`]`
			`price_stats = _stats(prices)`
			`qm_stats = _stats(qmprices)`
			`values = {`
			`"snapshot_date": today,`
			`"listing_type": listing_type,`
			`"min_bedrooms": min_bed,`
			`"max_bedrooms": max_bed,`
			`"listing_count": price_stats["count"],`
			`"median_total_price": price_stats["median"],`
			`"median_qmprice": qm_stats["median"],`
			`"mean_total_price": price_stats["mean"],`
			`"mean_qmprice": qm_stats["mean"],`
			`}`
			`if dialect == "mysql":`
			`from sqlalchemy.dialects.mysql import insert as mysql_insert`
			`stmt_ins = mysql_insert(DailyListingAggregate).values(**values)`
			`stmt_ins = stmt_ins.on_duplicate_key_update(`
			`listing_count=stmt_ins.inserted.listing_count,`
			`median_total_price=stmt_ins.inserted.median_total_price,`
			`median_qmprice=stmt_ins.inserted.median_qmprice,`
			`mean_total_price=stmt_ins.inserted.mean_total_price,`
			`mean_qmprice=stmt_ins.inserted.mean_qmprice,`
			`)`
			`session.execute(stmt_ins)`
			`else:`
			`from sqlalchemy.dialects.sqlite import insert as sqlite_insert`
			`stmt_ins = sqlite_insert(DailyListingAggregate).values(**values)`
			`stmt_ins = stmt_ins.on_conflict_do_update(`
			`index_elements=[`
			`"snapshot_date", "listing_type",`
			`"min_bedrooms", "max_bedrooms",`
			`],`
			`set_={`
			`"listing_count": stmt_ins.excluded.listing_count,`
			`"median_total_price": stmt_ins.excluded.median_total_price,`
			`"median_qmprice": stmt_ins.excluded.median_qmprice,`
			`"mean_total_price": stmt_ins.excluded.mean_total_price,`
			`"mean_qmprice": stmt_ins.excluded.mean_qmprice,`
			`},`
			`)`
			`session.execute(stmt_ins)`
			`session.commit()`
			`row = session.exec(`
			`select(DailyListingAggregate).where(`
			`DailyListingAggregate.snapshot_date == today,`
			`DailyListingAggregate.listing_type == listing_type,`
			`DailyListingAggregate.min_bedrooms == min_bed,`
			`DailyListingAggregate.max_bedrooms == max_bed,`
			`)`
			`).first()`
			`if row is not None:`
			`written.append(row)`
			`logger.info(`
			`"Aggregate %s %d-%d on %s: count=%s median=%s/%s mean=%s/%s",`
			`listing_type, min_bed, max_bed, today.date(),`
			`price_stats["count"],`
			`price_stats["median"], qm_stats["median"],`
			`price_stats["mean"], qm_stats["mean"],`
			`)`
			`return written`


			`def fetch_trend_series(`
			`engine: Engine,`
			`*,`
			`listing_type: str,`
			`min_bedrooms: int,`
			`max_bedrooms: int,`
			`days: int = 30,`
			`) -> list[DailyListingAggregate]:`
			"""Return the aggregate rows for the last `days` days, ordered ascending
			`by date. Empty list when no rows match — the strip handles that case."""`
			`cutoff = datetime.utcnow() - timedelta(days=days)`
			`with Session(engine) as session:`
			`stmt = (`
			`select(DailyListingAggregate)`
			`.where(`
			`DailyListingAggregate.listing_type == listing_type,`
			`DailyListingAggregate.min_bedrooms == min_bedrooms,`
			`DailyListingAggregate.max_bedrooms == max_bedrooms,`
			`DailyListingAggregate.snapshot_date >= cutoff,`
			`)`
			`.order_by(DailyListingAggregate.snapshot_date)`
			`)`
			`return list(session.exec(stmt).all())`