2025-05-26 19:36:54 +00:00
|
|
|
import json
|
2025-06-22 21:18:52 +00:00
|
|
|
import logging
|
2025-05-26 19:36:54 +00:00
|
|
|
import pathlib
|
2026-02-01 19:13:29 +00:00
|
|
|
from typing import Any
|
2025-05-26 19:36:54 +00:00
|
|
|
|
2026-02-01 19:13:29 +00:00
|
|
|
from models.listing import QueryParameters, RentListing, BuyListing
|
2025-06-08 18:18:38 +00:00
|
|
|
from repositories.listing_repository import ListingRepository
|
2025-05-26 19:36:54 +00:00
|
|
|
|
2025-06-22 21:18:52 +00:00
|
|
|
logger = logging.getLogger("uvicorn.error")
|
|
|
|
|
|
2025-05-26 19:36:54 +00:00
|
|
|
|
2026-02-01 19:13:29 +00:00
|
|
|
def convert_row_to_geojson(row: dict[str, Any], listing_type: str = "RENT") -> dict[str, Any]:
|
|
|
|
|
"""Convert a projected row dict to GeoJSON Feature format.
|
|
|
|
|
|
|
|
|
|
This function handles dict rows from stream_listings_optimized(),
|
|
|
|
|
which uses column projection and returns dicts instead of model instances.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
row: A dict with keys matching STREAMING_COLUMNS
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
A GeoJSON Feature dict with properties and geometry
|
|
|
|
|
"""
|
|
|
|
|
# Parse price history from JSON string
|
|
|
|
|
price_history = []
|
|
|
|
|
if row.get('price_history_json'):
|
|
|
|
|
parsed = json.loads(row['price_history_json'])
|
|
|
|
|
price_history = [
|
|
|
|
|
{
|
|
|
|
|
"first_seen": p["first_seen"],
|
|
|
|
|
"last_seen": p["last_seen"],
|
|
|
|
|
"price": p["price"]
|
|
|
|
|
}
|
|
|
|
|
for p in parsed
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
sqm = row.get('square_meters')
|
|
|
|
|
price = row['price']
|
|
|
|
|
|
|
|
|
|
# Handle available_from which may be a datetime or None
|
|
|
|
|
available_from_val = row.get('available_from')
|
|
|
|
|
available_from_str = None
|
|
|
|
|
if available_from_val is not None:
|
|
|
|
|
if hasattr(available_from_val, 'isoformat'):
|
|
|
|
|
available_from_str = available_from_val.isoformat()
|
|
|
|
|
else:
|
|
|
|
|
available_from_str = str(available_from_val)
|
|
|
|
|
|
|
|
|
|
# Handle last_seen which should be a datetime
|
|
|
|
|
last_seen_val = row['last_seen']
|
|
|
|
|
if hasattr(last_seen_val, 'isoformat'):
|
|
|
|
|
last_seen_str = last_seen_val.isoformat()
|
|
|
|
|
else:
|
|
|
|
|
last_seen_str = str(last_seen_val)
|
|
|
|
|
|
2026-02-22 00:54:58 +00:00
|
|
|
# Extract photo URLs from additional_info (prefer high-res maxSizeUrl)
|
2026-02-22 01:21:50 +00:00
|
|
|
# Rightmove API stores photos under "photos" key, but some code paths used "images"
|
2026-02-21 19:19:32 +00:00
|
|
|
photos: list[str] = []
|
|
|
|
|
additional_info = row.get('additional_info')
|
|
|
|
|
if additional_info:
|
|
|
|
|
if isinstance(additional_info, str):
|
|
|
|
|
additional_info = json.loads(additional_info)
|
2026-02-22 01:21:50 +00:00
|
|
|
prop = additional_info.get('property', {})
|
|
|
|
|
images = prop.get('images', []) or prop.get('photos', [])
|
2026-02-22 00:54:58 +00:00
|
|
|
photos = [
|
|
|
|
|
img.get('maxSizeUrl') or img['url']
|
|
|
|
|
for img in images
|
|
|
|
|
if isinstance(img, dict) and ('maxSizeUrl' in img or 'url' in img)
|
|
|
|
|
]
|
2026-02-21 19:19:32 +00:00
|
|
|
if not photos and row.get('photo_thumbnail'):
|
|
|
|
|
photos = [row['photo_thumbnail']]
|
|
|
|
|
|
2026-02-07 23:34:08 +00:00
|
|
|
properties: dict[str, Any] = {
|
2026-02-21 15:48:02 +00:00
|
|
|
"id": row['id'],
|
2026-02-01 19:13:29 +00:00
|
|
|
"listing_type": listing_type,
|
|
|
|
|
"city": "London",
|
|
|
|
|
"country": "United Kingdom",
|
|
|
|
|
"qm": sqm,
|
|
|
|
|
"qmprice": round(price / sqm, 2) if sqm else None,
|
|
|
|
|
"rooms": row['number_of_bedrooms'],
|
|
|
|
|
"total_price": price,
|
|
|
|
|
"url": f"https://www.rightmove.co.uk/properties/{row['id']}",
|
|
|
|
|
"photo_thumbnail": row.get('photo_thumbnail'),
|
2026-02-21 19:19:32 +00:00
|
|
|
"photos": photos,
|
2026-02-01 19:13:29 +00:00
|
|
|
"last_seen": last_seen_str,
|
|
|
|
|
"price_history": price_history,
|
|
|
|
|
"agency": row.get('agency'),
|
|
|
|
|
"available_from": available_from_str,
|
2026-02-07 23:34:08 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if row.get('service_charge') is not None:
|
|
|
|
|
properties["service_charge"] = row['service_charge']
|
|
|
|
|
if row.get('lease_left') is not None:
|
|
|
|
|
properties["lease_left"] = row['lease_left']
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
"type": "Feature",
|
|
|
|
|
"properties": properties,
|
2026-02-01 19:13:29 +00:00
|
|
|
"geometry": {
|
|
|
|
|
"coordinates": [row['longitude'], row['latitude']],
|
|
|
|
|
"type": "Point",
|
|
|
|
|
},
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def convert_to_geojson_feature(listing: RentListing | BuyListing) -> dict[str, Any]:
|
|
|
|
|
"""Convert a single listing to GeoJSON Feature format.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
listing: A RentListing or BuyListing model instance
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
A GeoJSON Feature dict with properties and geometry
|
|
|
|
|
"""
|
|
|
|
|
# Safely access nested additional_info
|
|
|
|
|
property_info = listing.additional_info.get("property", {}) if listing.additional_info else {}
|
|
|
|
|
listing_type = "RENT" if isinstance(listing, RentListing) else "BUY"
|
|
|
|
|
|
2026-02-22 00:54:58 +00:00
|
|
|
# Extract photo URLs (prefer high-res maxSizeUrl)
|
2026-02-22 01:21:50 +00:00
|
|
|
# Rightmove API stores photos under "photos" key, but some code paths used "images"
|
|
|
|
|
images = property_info.get('images', []) or property_info.get('photos', [])
|
2026-02-22 00:54:58 +00:00
|
|
|
photos = [
|
|
|
|
|
img.get('maxSizeUrl') or img['url']
|
|
|
|
|
for img in images
|
|
|
|
|
if isinstance(img, dict) and ('maxSizeUrl' in img or 'url' in img)
|
|
|
|
|
]
|
2026-02-21 19:19:32 +00:00
|
|
|
if not photos and listing.photo_thumbnail:
|
|
|
|
|
photos = [listing.photo_thumbnail]
|
|
|
|
|
|
2026-02-07 23:34:08 +00:00
|
|
|
properties: dict[str, Any] = {
|
2026-02-21 15:48:02 +00:00
|
|
|
"id": listing.id,
|
2026-02-01 19:13:29 +00:00
|
|
|
"listing_type": listing_type,
|
Refactor backend for cleaner error handling, DRY, and type safety
- Extract rate limiter DRY: consolidate 3 duplicated check/respond paths
into _check_counter and _enforce_limit helpers, add proper type annotations
- Replace bare Exception raises with FloorplanDownloadError and
RightmoveApiError; narrow catch clauses to specific exception types;
fix Step base class to inherit from ABC
- Consolidate MAX_OCR_WORKERS into config/scraper_config.py; extract
_find_tenure_value helper to deduplicate tenure parsing
- Extract _build_poi_distances_lookup from stream endpoint to reduce nesting
- Fix csv_exporter: optional decisions.json, NaN instead of -1 sentinels,
guard against division by zero on missing square meters
- Fix notifications.py broken list[Surface]() constructor, database.py
stale comments and missing type annotation, auth.py type:ignore,
ui_exporter.py stale TODO
- Fix 3 pre-existing test failures: mock cache layer in streaming tests,
bypass rate limiter for test isolation, fix cache invalidation test to
account for two-pattern scan loop
2026-02-10 22:19:24 +00:00
|
|
|
"city": "London",
|
2026-02-01 19:13:29 +00:00
|
|
|
"country": "United Kingdom",
|
|
|
|
|
"qm": listing.square_meters,
|
|
|
|
|
"qmprice": listing.price_per_square_meter,
|
|
|
|
|
"rooms": listing.number_of_bedrooms,
|
|
|
|
|
"total_price": listing.price,
|
|
|
|
|
"url": listing.url,
|
|
|
|
|
"photo_thumbnail": listing.photo_thumbnail,
|
2026-02-21 19:19:32 +00:00
|
|
|
"photos": photos,
|
2026-02-01 19:13:29 +00:00
|
|
|
"last_seen": listing.last_seen.isoformat(),
|
|
|
|
|
"price_history": [item.to_dict() for item in listing.price_history],
|
|
|
|
|
"agency": listing.agency,
|
|
|
|
|
"available_from": property_info.get("letDateAvailable", None),
|
wrongmove: daily price-trend monitoring (per-listing badge + macro strip)
Two surfaces wired up so the user can "get a vibe of the market":
**Per-listing** — each PropertyCard now shows a small pill next to the
price when the listing's total_price moved >=1% over a 14-day lookback
(e.g. "↓ £200 (-4%) in 14d"). Drops render green, rises render red.
Computed from `price_history_json` by the daily aggregator and
denormalised onto the listing row so the streaming endpoint just
passes it through.
**Macro** — new always-visible inline strip above the chip strip
showing today's median total price, median £/m², and listing count
for the current filter's bedroom band, each with a 30-day % delta:
"Rent · 1-2 bed · 30d: Median £2,500 ↓ -4% · £/m² £50 ↓ -2% · Listings 4,200 ↑ +5%".
Both data sources are populated daily at 04:00 UTC by a new Celery
beat task that fires 1h after the 03:00 RENT scrape and feeds two
sinks: a per-listing update pass and an upsert to a new
`dailylistingaggregate` table keyed on
(snapshot_date, listing_type, min_bedrooms, max_bedrooms).
## Backend
- `models/listing.py`: Listing parent gains `price_14d_ago` + `price_
change_pct_14d` nullable floats (inherited by RentListing/BuyListing).
New `DailyListingAggregate` table model with unique constraint on
(date, type, min_bed, max_bed).
- Alembic `a8b9c0d1e2f3`: adds the two columns to both listing tables
and creates the aggregate table + date index.
- `services/market_aggregator.py` (new): `compute_trend_for_listing`,
`update_per_listing_trend` (batched, idempotent), `_stats` (median
+ mean filtered to positive finite values), `compute_aggregate_
snapshot` (dialect-aware MySQL / SQLite upsert), `fetch_trend_
series` (range query for the API).
- `tasks/market_tasks.py` (new): `compute_daily_market_aggregates_task`
Celery task wrapping both stages.
- `tasks/listing_tasks.py:setup_periodic_tasks`: registers the daily
task at 04:00 UTC alongside the existing scrape schedules.
- `celery_app.py`: includes the new tasks module.
- `api/app.py`: new `GET /api/market_trend?listing_type=&min_bedrooms=&
max_bedrooms=&days=` endpoint returning the daily series.
- `ui_exporter.py`: GeoJSON feature properties now carry
`price_14d_ago` and `price_change_pct_14d` so the frontend can
render the badge without an extra round-trip.
## Frontend
- `types/index.ts`: new `MarketTrendPoint`; `PropertyProperties` gains
the two optional trend fields.
- `components/PropertyCard.tsx`: derived `trendBadge` (>=1% threshold,
null-safe) rendered as a small pill on both card variants.
- `hooks/useMarketTrend.ts` (new): fetches the trend series, derives
current-vs-oldest deltas per metric (% change rounded to 1dp).
- `components/MarketTrendStrip.tsx` (new): compact inline strip with
three metric cells. Hidden when the aggregator hasn't produced any
rows yet (graceful start during the first week post-launch).
- `App.tsx`: renders the strip above the chip strip whenever the
active queryParameters are known.
## Tests
- pytest: 10 new (trend math edge cases including null history,
malformed JSON, only-recent entries, drops, rises, zero current
price; _stats empty / nonpositive filtering; upsert idempotency on
an in-memory SQLite seed). 34 decision + aggregator tests pass.
- vitest: 8 new (useMarketTrend fetch URL, two-point delta,
single-point null delta, empty series; PropertyCard trend badge
arrow direction + sign for drops/rises, noise threshold, null
guard). 229 tests pass total, tsc clean.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-16 12:02:25 +00:00
|
|
|
# Per-listing trend snapshot (populated by the daily aggregator —
|
|
|
|
|
# null until the aggregator has seen this listing at least once).
|
|
|
|
|
"price_14d_ago": listing.price_14d_ago,
|
|
|
|
|
"price_change_pct_14d": listing.price_change_pct_14d,
|
2026-02-07 23:34:08 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if isinstance(listing, BuyListing):
|
|
|
|
|
if listing.service_charge is not None:
|
|
|
|
|
properties["service_charge"] = listing.service_charge
|
|
|
|
|
if listing.lease_left is not None:
|
|
|
|
|
properties["lease_left"] = listing.lease_left
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
"type": "Feature",
|
|
|
|
|
"properties": properties,
|
2026-02-01 19:13:29 +00:00
|
|
|
"geometry": {
|
|
|
|
|
"coordinates": [
|
|
|
|
|
listing.longitude,
|
|
|
|
|
listing.latitude,
|
|
|
|
|
],
|
|
|
|
|
"type": "Point",
|
|
|
|
|
},
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2025-06-01 15:17:14 +00:00
|
|
|
async def export_immoweb(
|
2025-06-08 18:18:38 +00:00
|
|
|
repository: ListingRepository,
|
2025-06-15 12:42:56 +00:00
|
|
|
output_file: str | None = None,
|
2025-06-01 15:17:14 +00:00
|
|
|
query_parameters: QueryParameters | None = None,
|
2025-06-15 12:42:56 +00:00
|
|
|
limit: int | None = None,
|
2025-06-01 15:17:14 +00:00
|
|
|
):
|
2025-06-08 18:18:38 +00:00
|
|
|
listings = await repository.get_listings(
|
|
|
|
|
query_parameters=query_parameters,
|
2025-06-15 12:42:56 +00:00
|
|
|
limit=limit,
|
2025-06-08 18:18:38 +00:00
|
|
|
)
|
2025-06-22 21:18:52 +00:00
|
|
|
logger.info(f"Fetched {len(listings)} listings")
|
2025-06-01 15:17:14 +00:00
|
|
|
|
2026-02-01 19:13:29 +00:00
|
|
|
# Convert listings to GeoJSON features using the helper function
|
|
|
|
|
immoweb_listings = [convert_to_geojson_feature(listing) for listing in listings]
|
2025-06-01 15:17:14 +00:00
|
|
|
|
|
|
|
|
prefix = "var data = "
|
2025-05-26 19:36:54 +00:00
|
|
|
serialized_data = {"type": "FeatureCollection", "features": immoweb_listings}
|
|
|
|
|
result = prefix + json.dumps(serialized_data, indent=4)
|
2025-06-15 12:42:56 +00:00
|
|
|
|
|
|
|
|
if output_file:
|
|
|
|
|
output_file_path = pathlib.Path(output_file)
|
|
|
|
|
output_file_path.touch(exist_ok=True)
|
|
|
|
|
with open(str(output_file_path), "w") as f:
|
|
|
|
|
f.write(result)
|
|
|
|
|
return serialized_data
|