288 lines
11 KiB
Python
288 lines
11 KiB
Python
|
|
"""Daily market-trend aggregator.
|
||
|
|
|
||
|
|
Two outputs per run:
|
||
|
|
|
||
|
|
1. Per-listing trend columns. For each row in RentListing / BuyListing we
|
||
|
|
parse `price_history_json` and find the price entry whose `last_seen` was
|
||
|
|
closest to `lookback_days` ago. The current price and that historical
|
||
|
|
price land on `price_14d_ago` / `price_change_pct_14d` for the
|
||
|
|
PropertyCard badge to render.
|
||
|
|
|
||
|
|
2. Aggregate market snapshot. For each configured (listing_type, bedroom
|
||
|
|
band) we compute median/mean/count over the CURRENT listing pool and
|
||
|
|
upsert one row in `dailylistingaggregate` keyed on today's date. The
|
||
|
|
`MarketTrendStrip` UI consumes these rows.
|
||
|
|
|
||
|
|
Both steps are idempotent — re-running on the same day refreshes the
|
||
|
|
snapshot rather than appending. Designed to fire daily ~04:00 UTC (1h
|
||
|
|
after the 03:00 RENT scrape so the data is fresh).
|
||
|
|
"""
|
||
|
|
from __future__ import annotations
|
||
|
|
|
||
|
|
import json
|
||
|
|
import logging
|
||
|
|
import time
|
||
|
|
from datetime import datetime, timedelta
|
||
|
|
from statistics import mean, median
|
||
|
|
from typing import Iterable
|
||
|
|
|
||
|
|
from sqlalchemy import Engine
|
||
|
|
from sqlmodel import Session, select
|
||
|
|
|
||
|
|
from models.listing import (
|
||
|
|
BuyListing,
|
||
|
|
DailyListingAggregate,
|
||
|
|
PriceHistoryItem,
|
||
|
|
RentListing,
|
||
|
|
)
|
||
|
|
|
||
|
|
logger = logging.getLogger("uvicorn")
|
||
|
|
|
||
|
|
# Default scope: the user's daily filter (1-2 bed, both listing types).
|
||
|
|
DEFAULT_BEDROOM_BANDS: tuple[tuple[int, int], ...] = ((1, 2),)
|
||
|
|
DEFAULT_LISTING_TYPES: tuple[str, ...] = ("RENT", "BUY")
|
||
|
|
|
||
|
|
# Trend lookback window for the per-listing badge. Surfaces price moves
|
||
|
|
# that happened in the last fortnight (long enough for prices to actually
|
||
|
|
# settle, short enough to feel current).
|
||
|
|
DEFAULT_LOOKBACK_DAYS = 14
|
||
|
|
|
||
|
|
|
||
|
|
def _parse_history(price_history_json: str | None) -> list[PriceHistoryItem]:
|
||
|
|
if not price_history_json:
|
||
|
|
return []
|
||
|
|
try:
|
||
|
|
raw = json.loads(price_history_json)
|
||
|
|
except (ValueError, TypeError):
|
||
|
|
return []
|
||
|
|
out: list[PriceHistoryItem] = []
|
||
|
|
for item in raw:
|
||
|
|
try:
|
||
|
|
out.append(
|
||
|
|
PriceHistoryItem(
|
||
|
|
first_seen=datetime.fromisoformat(item["first_seen"]),
|
||
|
|
last_seen=datetime.fromisoformat(item["last_seen"]),
|
||
|
|
price=float(item["price"]),
|
||
|
|
)
|
||
|
|
)
|
||
|
|
except (KeyError, ValueError, TypeError):
|
||
|
|
continue
|
||
|
|
return out
|
||
|
|
|
||
|
|
|
||
|
|
def _price_at_or_before(
|
||
|
|
history: list[PriceHistoryItem], cutoff: datetime
|
||
|
|
) -> float | None:
|
||
|
|
"""Return the price of the entry whose `last_seen` is closest to (but
|
||
|
|
not after) `cutoff`. Returns None if no entry that old exists.
|
||
|
|
|
||
|
|
History is in chronological order; we scan and keep the latest match.
|
||
|
|
"""
|
||
|
|
found: float | None = None
|
||
|
|
for item in history:
|
||
|
|
if item.last_seen <= cutoff:
|
||
|
|
found = item.price
|
||
|
|
else:
|
||
|
|
break
|
||
|
|
return found
|
||
|
|
|
||
|
|
|
||
|
|
def compute_trend_for_listing(
|
||
|
|
price_history_json: str | None,
|
||
|
|
current_price: float | None,
|
||
|
|
*,
|
||
|
|
lookback_days: int = DEFAULT_LOOKBACK_DAYS,
|
||
|
|
now: datetime | None = None,
|
||
|
|
) -> tuple[float | None, float | None]:
|
||
|
|
"""Return `(price_n_days_ago, change_pct)` for one listing.
|
||
|
|
|
||
|
|
`change_pct` is `(current - past) / past * 100` rounded to 2dp; positive
|
||
|
|
= price went up, negative = down. Both are None when there's no entry
|
||
|
|
that old in history or current price is unusable.
|
||
|
|
"""
|
||
|
|
if not isinstance(current_price, (int, float)) or current_price <= 0:
|
||
|
|
return None, None
|
||
|
|
cutoff = (now or datetime.utcnow()) - timedelta(days=lookback_days)
|
||
|
|
history = _parse_history(price_history_json)
|
||
|
|
past = _price_at_or_before(history, cutoff)
|
||
|
|
if past is None or past <= 0:
|
||
|
|
return None, None
|
||
|
|
pct = round((current_price - past) / past * 100.0, 2)
|
||
|
|
return past, pct
|
||
|
|
|
||
|
|
|
||
|
|
def update_per_listing_trend(
|
||
|
|
engine: Engine,
|
||
|
|
*,
|
||
|
|
lookback_days: int = DEFAULT_LOOKBACK_DAYS,
|
||
|
|
batch_size: int = 1000,
|
||
|
|
now: datetime | None = None,
|
||
|
|
) -> dict[str, int]:
|
||
|
|
"""Walk every RentListing + BuyListing, recompute trend columns, write."""
|
||
|
|
counts = {"rent_updated": 0, "buy_updated": 0}
|
||
|
|
t0 = time.monotonic()
|
||
|
|
for model_name, model in (("rent", RentListing), ("buy", BuyListing)):
|
||
|
|
with Session(engine) as session:
|
||
|
|
offset = 0
|
||
|
|
while True:
|
||
|
|
stmt = select(model).offset(offset).limit(batch_size)
|
||
|
|
rows: list = list(session.exec(stmt).all())
|
||
|
|
if not rows:
|
||
|
|
break
|
||
|
|
for row in rows:
|
||
|
|
past, pct = compute_trend_for_listing(
|
||
|
|
row.price_history_json,
|
||
|
|
row.price,
|
||
|
|
lookback_days=lookback_days,
|
||
|
|
now=now,
|
||
|
|
)
|
||
|
|
if row.price_14d_ago != past or row.price_change_pct_14d != pct:
|
||
|
|
row.price_14d_ago = past
|
||
|
|
row.price_change_pct_14d = pct
|
||
|
|
session.add(row)
|
||
|
|
counts[f"{model_name}_updated"] += 1
|
||
|
|
session.commit()
|
||
|
|
if len(rows) < batch_size:
|
||
|
|
break
|
||
|
|
offset += batch_size
|
||
|
|
logger.info(
|
||
|
|
"Per-listing trend updated in %.1fs: rent=%d buy=%d (lookback=%dd)",
|
||
|
|
time.monotonic() - t0,
|
||
|
|
counts["rent_updated"],
|
||
|
|
counts["buy_updated"],
|
||
|
|
lookback_days,
|
||
|
|
)
|
||
|
|
return counts
|
||
|
|
|
||
|
|
|
||
|
|
def _stats(values: Iterable[float]) -> dict[str, float | None]:
|
||
|
|
"""Median + mean over the valid positive entries; null for empty input."""
|
||
|
|
finite = [v for v in values if isinstance(v, (int, float)) and v > 0]
|
||
|
|
if not finite:
|
||
|
|
return {"median": None, "mean": None, "count": 0}
|
||
|
|
return {
|
||
|
|
"median": float(median(finite)),
|
||
|
|
"mean": round(float(mean(finite)), 2),
|
||
|
|
"count": len(finite),
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
def compute_aggregate_snapshot(
|
||
|
|
engine: Engine,
|
||
|
|
*,
|
||
|
|
listing_types: tuple[str, ...] = DEFAULT_LISTING_TYPES,
|
||
|
|
bedroom_bands: tuple[tuple[int, int], ...] = DEFAULT_BEDROOM_BANDS,
|
||
|
|
snapshot_date: datetime | None = None,
|
||
|
|
) -> list[DailyListingAggregate]:
|
||
|
|
"""Compute one aggregate row per (listing_type * bedroom band) and
|
||
|
|
upsert it onto today's `snapshot_date`. Returns the persisted rows.
|
||
|
|
|
||
|
|
Uses an `INSERT ... ON DUPLICATE KEY UPDATE` so re-running on the same
|
||
|
|
day refreshes the row in place — no duplicates, no DELETE.
|
||
|
|
"""
|
||
|
|
today = snapshot_date or datetime.utcnow().replace(
|
||
|
|
hour=0, minute=0, second=0, microsecond=0
|
||
|
|
)
|
||
|
|
written: list[DailyListingAggregate] = []
|
||
|
|
dialect = engine.dialect.name
|
||
|
|
with Session(engine) as session:
|
||
|
|
for listing_type in listing_types:
|
||
|
|
model = RentListing if listing_type == "RENT" else BuyListing
|
||
|
|
for min_bed, max_bed in bedroom_bands:
|
||
|
|
stmt = select(model.price, model.square_meters).where(
|
||
|
|
model.number_of_bedrooms >= min_bed,
|
||
|
|
model.number_of_bedrooms <= max_bed,
|
||
|
|
)
|
||
|
|
rows = list(session.exec(stmt).all())
|
||
|
|
prices = [r[0] for r in rows]
|
||
|
|
qmprices = [
|
||
|
|
(r[0] / r[1])
|
||
|
|
for r in rows
|
||
|
|
if r[1] is not None and r[1] > 0
|
||
|
|
]
|
||
|
|
price_stats = _stats(prices)
|
||
|
|
qm_stats = _stats(qmprices)
|
||
|
|
values = {
|
||
|
|
"snapshot_date": today,
|
||
|
|
"listing_type": listing_type,
|
||
|
|
"min_bedrooms": min_bed,
|
||
|
|
"max_bedrooms": max_bed,
|
||
|
|
"listing_count": price_stats["count"],
|
||
|
|
"median_total_price": price_stats["median"],
|
||
|
|
"median_qmprice": qm_stats["median"],
|
||
|
|
"mean_total_price": price_stats["mean"],
|
||
|
|
"mean_qmprice": qm_stats["mean"],
|
||
|
|
}
|
||
|
|
if dialect == "mysql":
|
||
|
|
from sqlalchemy.dialects.mysql import insert as mysql_insert
|
||
|
|
stmt_ins = mysql_insert(DailyListingAggregate).values(**values)
|
||
|
|
stmt_ins = stmt_ins.on_duplicate_key_update(
|
||
|
|
listing_count=stmt_ins.inserted.listing_count,
|
||
|
|
median_total_price=stmt_ins.inserted.median_total_price,
|
||
|
|
median_qmprice=stmt_ins.inserted.median_qmprice,
|
||
|
|
mean_total_price=stmt_ins.inserted.mean_total_price,
|
||
|
|
mean_qmprice=stmt_ins.inserted.mean_qmprice,
|
||
|
|
)
|
||
|
|
session.execute(stmt_ins)
|
||
|
|
else:
|
||
|
|
from sqlalchemy.dialects.sqlite import insert as sqlite_insert
|
||
|
|
stmt_ins = sqlite_insert(DailyListingAggregate).values(**values)
|
||
|
|
stmt_ins = stmt_ins.on_conflict_do_update(
|
||
|
|
index_elements=[
|
||
|
|
"snapshot_date", "listing_type",
|
||
|
|
"min_bedrooms", "max_bedrooms",
|
||
|
|
],
|
||
|
|
set_={
|
||
|
|
"listing_count": stmt_ins.excluded.listing_count,
|
||
|
|
"median_total_price": stmt_ins.excluded.median_total_price,
|
||
|
|
"median_qmprice": stmt_ins.excluded.median_qmprice,
|
||
|
|
"mean_total_price": stmt_ins.excluded.mean_total_price,
|
||
|
|
"mean_qmprice": stmt_ins.excluded.mean_qmprice,
|
||
|
|
},
|
||
|
|
)
|
||
|
|
session.execute(stmt_ins)
|
||
|
|
session.commit()
|
||
|
|
row = session.exec(
|
||
|
|
select(DailyListingAggregate).where(
|
||
|
|
DailyListingAggregate.snapshot_date == today,
|
||
|
|
DailyListingAggregate.listing_type == listing_type,
|
||
|
|
DailyListingAggregate.min_bedrooms == min_bed,
|
||
|
|
DailyListingAggregate.max_bedrooms == max_bed,
|
||
|
|
)
|
||
|
|
).first()
|
||
|
|
if row is not None:
|
||
|
|
written.append(row)
|
||
|
|
logger.info(
|
||
|
|
"Aggregate %s %d-%d on %s: count=%s median=%s/%s mean=%s/%s",
|
||
|
|
listing_type, min_bed, max_bed, today.date(),
|
||
|
|
price_stats["count"],
|
||
|
|
price_stats["median"], qm_stats["median"],
|
||
|
|
price_stats["mean"], qm_stats["mean"],
|
||
|
|
)
|
||
|
|
return written
|
||
|
|
|
||
|
|
|
||
|
|
def fetch_trend_series(
|
||
|
|
engine: Engine,
|
||
|
|
*,
|
||
|
|
listing_type: str,
|
||
|
|
min_bedrooms: int,
|
||
|
|
max_bedrooms: int,
|
||
|
|
days: int = 30,
|
||
|
|
) -> list[DailyListingAggregate]:
|
||
|
|
"""Return the aggregate rows for the last `days` days, ordered ascending
|
||
|
|
by date. Empty list when no rows match — the strip handles that case."""
|
||
|
|
cutoff = datetime.utcnow() - timedelta(days=days)
|
||
|
|
with Session(engine) as session:
|
||
|
|
stmt = (
|
||
|
|
select(DailyListingAggregate)
|
||
|
|
.where(
|
||
|
|
DailyListingAggregate.listing_type == listing_type,
|
||
|
|
DailyListingAggregate.min_bedrooms == min_bedrooms,
|
||
|
|
DailyListingAggregate.max_bedrooms == max_bedrooms,
|
||
|
|
DailyListingAggregate.snapshot_date >= cutoff,
|
||
|
|
)
|
||
|
|
.order_by(DailyListingAggregate.snapshot_date)
|
||
|
|
)
|
||
|
|
return list(session.exec(stmt).all())
|