2026-02-01 17:28:37 +00:00
|
|
|
"""FastAPI application for the Real Estate Crawler API."""
|
2025-06-23 19:43:54 +00:00
|
|
|
from datetime import datetime, timedelta
|
2025-06-22 21:18:52 +00:00
|
|
|
import json
|
2025-06-22 14:00:47 +00:00
|
|
|
import logging
|
|
|
|
|
import logging.config
|
2026-02-07 20:19:57 +00:00
|
|
|
from typing import Annotated, AsyncGenerator, Optional
|
2025-06-14 13:39:37 +00:00
|
|
|
from api.auth import get_current_user
|
2026-02-08 19:42:30 +00:00
|
|
|
from api.config import DEV_TIER_ORIGINS, PROD_TIER_ORIGINS, APP_ENV
|
2026-02-21 13:54:38 +00:00
|
|
|
from api.decision_routes import decision_router
|
2026-02-21 15:48:02 +00:00
|
|
|
from api.passkey_routes import passkey_router
|
2026-02-08 13:14:47 +00:00
|
|
|
from api.poi_routes import poi_router
|
2026-02-09 21:17:30 +00:00
|
|
|
from api.ws_routes import ws_router
|
2026-02-08 00:45:43 +00:00
|
|
|
from api.rate_limit_config import RateLimitConfig
|
|
|
|
|
from api.rate_limiter import RateLimitMiddleware
|
|
|
|
|
from api.audit_middleware import AuditLogMiddleware
|
|
|
|
|
from api.metrics_guard import MetricsGuardMiddleware
|
2026-02-08 19:42:30 +00:00
|
|
|
from api.security_headers import SecurityHeadersMiddleware
|
2026-02-08 20:06:46 +00:00
|
|
|
from api.origin_validator import OriginValidatorMiddleware
|
2025-06-22 21:18:52 +00:00
|
|
|
from dotenv import load_dotenv
|
2026-02-08 19:42:30 +00:00
|
|
|
from fastapi import Depends, FastAPI, HTTPException, Query
|
2026-02-08 20:06:46 +00:00
|
|
|
from fastapi.responses import JSONResponse, StreamingResponse
|
2026-02-21 15:48:02 +00:00
|
|
|
from pydantic import BaseModel
|
2026-02-08 20:06:46 +00:00
|
|
|
from starlette.requests import Request
|
2025-06-14 13:39:37 +00:00
|
|
|
from api.auth import User
|
2026-02-01 17:28:37 +00:00
|
|
|
from models.listing import QueryParameters, ListingType, FurnishType
|
2025-07-25 21:32:06 +00:00
|
|
|
from notifications import send_notification
|
2025-06-11 20:05:26 +00:00
|
|
|
from repositories.listing_repository import ListingRepository
|
|
|
|
|
from database import engine
|
2025-06-14 13:39:37 +00:00
|
|
|
from fastapi.middleware.cors import CORSMiddleware
|
2026-02-01 17:28:37 +00:00
|
|
|
from ui_exporter import convert_to_geojson_feature, convert_row_to_geojson
|
2025-06-22 21:18:52 +00:00
|
|
|
|
2026-02-21 15:48:02 +00:00
|
|
|
from services import listing_service, export_service, district_service, task_service
|
|
|
|
|
from services import decision_service
|
2026-02-06 20:34:50 +00:00
|
|
|
from services.listing_cache import (
|
|
|
|
|
get_cached_count,
|
|
|
|
|
get_cached_features,
|
2026-02-09 21:17:30 +00:00
|
|
|
begin_cache_population,
|
|
|
|
|
cache_features_batch_staged,
|
|
|
|
|
finalize_cache_population,
|
|
|
|
|
delete_staging_key,
|
2026-02-06 20:34:50 +00:00
|
|
|
)
|
2026-02-08 13:14:47 +00:00
|
|
|
from repositories.poi_repository import POIRepository
|
2026-02-21 15:48:02 +00:00
|
|
|
from repositories.decision_repository import DecisionRepository
|
2026-02-08 13:14:47 +00:00
|
|
|
from repositories.user_repository import UserRepository
|
2025-10-18 09:58:55 +00:00
|
|
|
from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
|
2026-02-14 11:21:49 +00:00
|
|
|
from api.metrics import init_metrics, get_metrics_asgi_app
|
|
|
|
|
import api.metrics as app_metrics
|
2026-02-14 10:59:12 +00:00
|
|
|
from logging_config import configure_logging
|
2025-10-18 09:58:55 +00:00
|
|
|
|
2025-06-11 20:05:26 +00:00
|
|
|
|
2025-06-22 21:18:52 +00:00
|
|
|
load_dotenv()
|
2026-02-14 10:59:12 +00:00
|
|
|
configure_logging("api")
|
|
|
|
|
logger = logging.getLogger(__name__)
|
2025-06-21 22:35:32 +00:00
|
|
|
|
2026-02-07 20:19:57 +00:00
|
|
|
DEFAULT_BATCH_SIZE = 50
|
2026-02-08 00:45:43 +00:00
|
|
|
_rate_limit_config = RateLimitConfig.from_env()
|
2026-02-07 20:19:57 +00:00
|
|
|
|
2025-06-21 21:52:51 +00:00
|
|
|
|
2026-02-01 17:28:37 +00:00
|
|
|
def get_query_parameters(
|
|
|
|
|
listing_type: ListingType,
|
|
|
|
|
min_bedrooms: int = 1,
|
|
|
|
|
max_bedrooms: int = 999,
|
|
|
|
|
min_price: int = 0,
|
|
|
|
|
max_price: int = 10_000_000,
|
|
|
|
|
min_sqm: Optional[int] = None,
|
2026-02-08 18:50:06 +00:00
|
|
|
max_sqm: Optional[int] = None,
|
|
|
|
|
min_price_per_sqm: Optional[float] = None,
|
|
|
|
|
max_price_per_sqm: Optional[float] = None,
|
2026-02-01 17:28:37 +00:00
|
|
|
last_seen_days: Optional[int] = None,
|
|
|
|
|
let_date_available_from: Optional[datetime] = None,
|
|
|
|
|
furnish_types: Optional[str] = None, # comma-separated list
|
2026-02-08 18:50:06 +00:00
|
|
|
district_names: Optional[str] = None, # comma-separated list
|
2026-02-01 17:28:37 +00:00
|
|
|
) -> QueryParameters:
|
|
|
|
|
"""Parse query parameters into QueryParameters model."""
|
|
|
|
|
parsed_furnish_types = None
|
|
|
|
|
if furnish_types:
|
|
|
|
|
parsed_furnish_types = [FurnishType(f.strip()) for f in furnish_types.split(",")]
|
|
|
|
|
|
2026-02-08 18:50:06 +00:00
|
|
|
parsed_district_names: set[str] = set()
|
|
|
|
|
if district_names:
|
|
|
|
|
parsed_district_names = {d.strip() for d in district_names.split(",") if d.strip()}
|
|
|
|
|
|
2026-02-01 17:28:37 +00:00
|
|
|
return QueryParameters(
|
|
|
|
|
listing_type=listing_type,
|
|
|
|
|
min_bedrooms=min_bedrooms,
|
|
|
|
|
max_bedrooms=max_bedrooms,
|
|
|
|
|
min_price=min_price,
|
|
|
|
|
max_price=max_price,
|
|
|
|
|
min_sqm=min_sqm,
|
2026-02-08 18:50:06 +00:00
|
|
|
max_sqm=max_sqm,
|
|
|
|
|
min_price_per_sqm=min_price_per_sqm,
|
|
|
|
|
max_price_per_sqm=max_price_per_sqm,
|
2026-02-01 17:28:37 +00:00
|
|
|
last_seen_days=last_seen_days,
|
|
|
|
|
let_date_available_from=let_date_available_from,
|
|
|
|
|
furnish_types=parsed_furnish_types,
|
2026-02-08 18:50:06 +00:00
|
|
|
district_names=parsed_district_names,
|
2026-02-01 17:28:37 +00:00
|
|
|
)
|
2025-06-21 21:52:51 +00:00
|
|
|
|
|
|
|
|
|
2026-02-08 20:06:46 +00:00
|
|
|
app = FastAPI(
|
|
|
|
|
docs_url=None if APP_ENV == "production" else "/docs",
|
|
|
|
|
redoc_url=None if APP_ENV == "production" else "/redoc",
|
|
|
|
|
openapi_url=None if APP_ENV == "production" else "/openapi.json",
|
|
|
|
|
)
|
2026-02-07 00:34:47 +00:00
|
|
|
app.include_router(passkey_router)
|
2026-02-08 13:14:47 +00:00
|
|
|
app.include_router(poi_router)
|
2026-02-21 13:54:38 +00:00
|
|
|
app.include_router(decision_router)
|
2026-02-09 21:17:30 +00:00
|
|
|
app.include_router(ws_router)
|
2026-02-14 10:59:12 +00:00
|
|
|
init_metrics("realestate-crawler-api")
|
|
|
|
|
app.mount("/metrics", get_metrics_asgi_app())
|
2025-06-11 21:08:11 +00:00
|
|
|
|
2025-06-21 12:49:04 +00:00
|
|
|
|
2025-06-14 13:39:37 +00:00
|
|
|
# Allow CORS (for React frontend)
|
|
|
|
|
app.add_middleware(
|
|
|
|
|
CORSMiddleware,
|
|
|
|
|
allow_origins=[*DEV_TIER_ORIGINS, *PROD_TIER_ORIGINS],
|
2026-02-08 19:42:30 +00:00
|
|
|
allow_methods=["GET", "POST", "PUT", "DELETE"],
|
2026-02-08 00:45:43 +00:00
|
|
|
allow_headers=["Authorization", "Content-Type"],
|
2025-06-14 13:39:37 +00:00
|
|
|
)
|
2025-06-11 20:05:26 +00:00
|
|
|
|
2026-02-08 20:06:46 +00:00
|
|
|
app.add_middleware(
|
|
|
|
|
OriginValidatorMiddleware,
|
|
|
|
|
allowed_origins=[*DEV_TIER_ORIGINS, *PROD_TIER_ORIGINS],
|
|
|
|
|
)
|
|
|
|
|
|
2026-02-08 00:45:43 +00:00
|
|
|
# Security middleware (added bottom-to-top; last added = outermost)
|
|
|
|
|
# 3. Rate limiting — enforces per-user limits
|
|
|
|
|
app.add_middleware(RateLimitMiddleware, config=_rate_limit_config)
|
|
|
|
|
# 2. Metrics guard — blocks unauthorized /metrics access
|
|
|
|
|
app.add_middleware(MetricsGuardMiddleware, config=_rate_limit_config)
|
|
|
|
|
# 1. Audit logging — logs everything including 429s and 403s
|
|
|
|
|
app.add_middleware(AuditLogMiddleware)
|
2026-02-08 19:42:30 +00:00
|
|
|
# 0. Security headers — adds standard security headers to all responses
|
|
|
|
|
app.add_middleware(SecurityHeadersMiddleware)
|
2026-02-08 00:45:43 +00:00
|
|
|
|
2025-06-11 20:05:26 +00:00
|
|
|
|
2026-02-08 20:06:46 +00:00
|
|
|
@app.exception_handler(Exception)
|
|
|
|
|
async def unhandled_exception_handler(request: Request, exc: Exception) -> JSONResponse:
|
|
|
|
|
logger.exception("Unhandled exception")
|
|
|
|
|
return JSONResponse(
|
|
|
|
|
status_code=500,
|
|
|
|
|
content={"detail": "Internal server error"},
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
2025-07-26 13:06:28 +00:00
|
|
|
@app.get("/api/status")
|
2026-02-17 19:54:15 +00:00
|
|
|
async def get_status() -> dict[str, str | None]:
|
|
|
|
|
repository = ListingRepository(engine)
|
|
|
|
|
last_updated = repository.get_last_updated()
|
|
|
|
|
return {
|
|
|
|
|
"status": "OK",
|
|
|
|
|
"last_updated": last_updated.isoformat() if last_updated else None,
|
|
|
|
|
}
|
2025-07-26 13:06:28 +00:00
|
|
|
|
|
|
|
|
|
2025-06-14 15:36:38 +00:00
|
|
|
@app.get("/api/listing")
|
2026-02-01 17:28:37 +00:00
|
|
|
async def get_listing(
|
|
|
|
|
user: Annotated[User, Depends(get_current_user)],
|
|
|
|
|
limit: int = 5,
|
|
|
|
|
) -> dict[str, list]:
|
|
|
|
|
"""Get listings from the database."""
|
2026-02-08 00:45:43 +00:00
|
|
|
limit = min(limit, _rate_limit_config.listing_limit_cap)
|
2025-06-11 20:05:26 +00:00
|
|
|
repository = ListingRepository(engine)
|
2026-02-01 17:28:37 +00:00
|
|
|
result = await listing_service.get_listings(repository, limit=limit)
|
|
|
|
|
logger.info(f"Fetched {result.total_count} listings for {user.email}")
|
|
|
|
|
return {"listings": result.listings}
|
2025-06-15 12:42:56 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.get("/api/listing_geojson")
|
2025-06-18 20:38:50 +00:00
|
|
|
async def get_listing_geojson(
|
|
|
|
|
user: Annotated[User, Depends(get_current_user)],
|
2026-02-01 17:28:37 +00:00
|
|
|
query_parameters: Annotated[QueryParameters, Depends(get_query_parameters)],
|
2026-02-06 20:34:50 +00:00
|
|
|
limit: int | None = None,
|
2026-02-01 17:28:37 +00:00
|
|
|
) -> dict:
|
2026-02-21 15:48:02 +00:00
|
|
|
"""Get listings as GeoJSON for map display."""
|
2026-02-08 00:45:43 +00:00
|
|
|
if limit is not None:
|
|
|
|
|
limit = min(limit, _rate_limit_config.geojson_limit_cap)
|
|
|
|
|
else:
|
|
|
|
|
limit = _rate_limit_config.geojson_limit_cap
|
2025-06-15 12:42:56 +00:00
|
|
|
repository = ListingRepository(engine)
|
2026-02-01 17:28:37 +00:00
|
|
|
result = await export_service.export_to_geojson(
|
|
|
|
|
repository,
|
|
|
|
|
query_parameters=query_parameters,
|
|
|
|
|
limit=limit,
|
|
|
|
|
)
|
|
|
|
|
return result.data
|
|
|
|
|
|
|
|
|
|
|
2026-02-07 20:19:57 +00:00
|
|
|
|
Refactor backend for cleaner error handling, DRY, and type safety
- Extract rate limiter DRY: consolidate 3 duplicated check/respond paths
into _check_counter and _enforce_limit helpers, add proper type annotations
- Replace bare Exception raises with FloorplanDownloadError and
RightmoveApiError; narrow catch clauses to specific exception types;
fix Step base class to inherit from ABC
- Consolidate MAX_OCR_WORKERS into config/scraper_config.py; extract
_find_tenure_value helper to deduplicate tenure parsing
- Extract _build_poi_distances_lookup from stream endpoint to reduce nesting
- Fix csv_exporter: optional decisions.json, NaN instead of -1 sentinels,
guard against division by zero on missing square meters
- Fix notifications.py broken list[Surface]() constructor, database.py
stale comments and missing type annotation, auth.py type:ignore,
ui_exporter.py stale TODO
- Fix 3 pre-existing test failures: mock cache layer in streaming tests,
bypass rate limiter for test isolation, fix cache invalidation test to
account for two-pattern scan loop
2026-02-10 22:19:24 +00:00
|
|
|
def _build_poi_distances_lookup(
|
|
|
|
|
user_email: str,
|
|
|
|
|
listing_type: ListingType,
|
|
|
|
|
) -> dict[int, list[dict[str, str | int]]] | None:
|
|
|
|
|
"""Build POI distance lookup for a user, or None if no POIs configured."""
|
|
|
|
|
user_repo = UserRepository(engine)
|
|
|
|
|
db_user = user_repo.get_user_by_email(user_email)
|
|
|
|
|
if not db_user or db_user.id is None:
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
poi_repo = POIRepository(engine)
|
|
|
|
|
pois = {p.id: p for p in poi_repo.get_pois_for_user(db_user.id)}
|
|
|
|
|
if not pois:
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
listing_repo = ListingRepository(engine)
|
|
|
|
|
all_ids = list(listing_repo.get_listing_ids(listing_type))
|
|
|
|
|
if not all_ids:
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
distances = poi_repo.get_distances_for_listings(all_ids, listing_type, db_user.id)
|
|
|
|
|
lookup: dict[int, list[dict[str, str | int]]] = {}
|
|
|
|
|
for d in distances:
|
|
|
|
|
poi_name = pois[d.poi_id].name if d.poi_id in pois else "Unknown"
|
|
|
|
|
lookup.setdefault(d.listing_id, []).append({
|
|
|
|
|
"poi_id": d.poi_id,
|
|
|
|
|
"poi_name": poi_name,
|
|
|
|
|
"travel_mode": d.travel_mode,
|
|
|
|
|
"duration_seconds": d.duration_seconds,
|
|
|
|
|
"distance_meters": d.distance_meters,
|
|
|
|
|
})
|
|
|
|
|
return lookup
|
|
|
|
|
|
|
|
|
|
|
2026-02-21 15:48:02 +00:00
|
|
|
def _get_user_id_safe(user_email: str) -> int | None:
|
|
|
|
|
"""Get database user ID by email, or None if user doesn't exist."""
|
|
|
|
|
try:
|
|
|
|
|
user_repo = UserRepository(engine)
|
|
|
|
|
db_user = user_repo.get_user_by_email(user_email)
|
|
|
|
|
if db_user is None or db_user.id is None:
|
|
|
|
|
return None
|
|
|
|
|
return db_user.id
|
|
|
|
|
except Exception:
|
|
|
|
|
logger.debug("Could not look up user ID for %s", user_email)
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _should_include(
|
|
|
|
|
feature_id: int,
|
|
|
|
|
decision_filter: str,
|
|
|
|
|
disliked_ids: set[int] | None,
|
|
|
|
|
liked_ids: set[int] | None,
|
|
|
|
|
) -> bool:
|
|
|
|
|
"""Determine if a listing should be included based on decision filter."""
|
|
|
|
|
if decision_filter == "everything":
|
|
|
|
|
return True
|
|
|
|
|
if decision_filter == "liked":
|
|
|
|
|
return liked_ids is not None and feature_id in liked_ids
|
|
|
|
|
# default "all": hide disliked
|
|
|
|
|
return disliked_ids is None or feature_id not in disliked_ids
|
|
|
|
|
|
|
|
|
|
|
2026-02-07 20:19:57 +00:00
|
|
|
async def _stream_from_cache(
|
|
|
|
|
query_parameters: QueryParameters,
|
|
|
|
|
batch_size: int,
|
|
|
|
|
limit: int | None,
|
2026-02-21 15:48:02 +00:00
|
|
|
disliked_ids: set[int] | None = None,
|
|
|
|
|
liked_ids: set[int] | None = None,
|
|
|
|
|
decision_filter: str = "all",
|
2026-02-07 20:19:57 +00:00
|
|
|
) -> AsyncGenerator[str, None]:
|
|
|
|
|
"""Stream GeoJSON features from the Redis cache (cache-hit path)."""
|
|
|
|
|
cached_count = get_cached_count(query_parameters)
|
|
|
|
|
effective_total = min(limit, cached_count) if limit and cached_count else cached_count
|
|
|
|
|
|
|
|
|
|
yield json.dumps({
|
|
|
|
|
"type": "metadata",
|
|
|
|
|
"batch_size": batch_size,
|
|
|
|
|
"total_expected": effective_total,
|
|
|
|
|
"cached": True,
|
|
|
|
|
}) + "\n"
|
|
|
|
|
|
|
|
|
|
count = 0
|
|
|
|
|
for feature_batch in get_cached_features(query_parameters, batch_size=batch_size):
|
2026-02-21 15:48:02 +00:00
|
|
|
# Apply decision filtering
|
|
|
|
|
if decision_filter != "everything":
|
|
|
|
|
feature_batch = [
|
|
|
|
|
f for f in feature_batch
|
|
|
|
|
if _should_include(
|
|
|
|
|
f.get("properties", {}).get("id", 0),
|
|
|
|
|
decision_filter,
|
|
|
|
|
disliked_ids,
|
|
|
|
|
liked_ids,
|
|
|
|
|
)
|
|
|
|
|
]
|
2026-02-07 20:19:57 +00:00
|
|
|
if limit and count + len(feature_batch) > limit:
|
|
|
|
|
feature_batch = feature_batch[:limit - count]
|
|
|
|
|
count += len(feature_batch)
|
2026-02-21 15:48:02 +00:00
|
|
|
if feature_batch:
|
|
|
|
|
yield json.dumps({"type": "batch", "features": feature_batch}) + "\n"
|
2026-02-07 20:19:57 +00:00
|
|
|
if limit and count >= limit:
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
yield json.dumps({"type": "complete", "total": count}) + "\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def _stream_from_db(
|
|
|
|
|
query_parameters: QueryParameters,
|
|
|
|
|
batch_size: int,
|
|
|
|
|
limit: int | None,
|
2026-02-08 13:14:47 +00:00
|
|
|
poi_distances_lookup: dict[int, list[dict[str, str | int]]] | None = None,
|
2026-02-09 21:17:30 +00:00
|
|
|
skip_cache: bool = False,
|
2026-02-21 13:57:43 +00:00
|
|
|
disliked_ids: set[int] | None = None,
|
2026-02-21 15:48:02 +00:00
|
|
|
liked_ids: set[int] | None = None,
|
|
|
|
|
decision_filter: str = "all",
|
2026-02-07 20:19:57 +00:00
|
|
|
) -> AsyncGenerator[str, None]:
|
|
|
|
|
"""Stream GeoJSON features from the database, populating the cache as we go."""
|
|
|
|
|
repository = ListingRepository(engine)
|
|
|
|
|
|
|
|
|
|
total = repository.count_listings(query_parameters)
|
|
|
|
|
effective_total = min(limit, total) if limit else total
|
|
|
|
|
|
|
|
|
|
yield json.dumps({
|
|
|
|
|
"type": "metadata",
|
|
|
|
|
"batch_size": batch_size,
|
|
|
|
|
"total_expected": effective_total,
|
|
|
|
|
"cached": False,
|
|
|
|
|
}) + "\n"
|
|
|
|
|
|
2026-02-09 21:17:30 +00:00
|
|
|
staging_key: str | None = None
|
|
|
|
|
if not skip_cache:
|
|
|
|
|
staging_key = begin_cache_population(query_parameters)
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
count = 0
|
|
|
|
|
batch: list[dict] = []
|
|
|
|
|
for row in repository.stream_listings_optimized(
|
|
|
|
|
query_parameters, limit=limit, page_size=batch_size
|
|
|
|
|
):
|
|
|
|
|
feature = convert_row_to_geojson(row, query_parameters.listing_type.value)
|
|
|
|
|
# Inject POI distances if available
|
|
|
|
|
if poi_distances_lookup and row['id'] in poi_distances_lookup:
|
|
|
|
|
feature['properties']['poi_distances'] = poi_distances_lookup[row['id']]
|
2026-02-21 15:48:02 +00:00
|
|
|
|
|
|
|
|
# Apply decision filtering
|
|
|
|
|
if not _should_include(row['id'], decision_filter, disliked_ids, liked_ids):
|
|
|
|
|
# Still cache the feature (it's valid data), just don't stream it
|
|
|
|
|
if staging_key:
|
|
|
|
|
cache_features_batch_staged(staging_key, [feature])
|
|
|
|
|
continue
|
|
|
|
|
|
2026-02-09 21:17:30 +00:00
|
|
|
batch.append(feature)
|
|
|
|
|
count += 1
|
|
|
|
|
|
|
|
|
|
if len(batch) >= batch_size:
|
|
|
|
|
if staging_key:
|
|
|
|
|
cache_features_batch_staged(staging_key, batch)
|
|
|
|
|
yield json.dumps({"type": "batch", "features": batch}) + "\n"
|
|
|
|
|
batch = []
|
|
|
|
|
|
|
|
|
|
if batch:
|
|
|
|
|
if staging_key:
|
|
|
|
|
cache_features_batch_staged(staging_key, batch)
|
2026-02-07 20:19:57 +00:00
|
|
|
yield json.dumps({"type": "batch", "features": batch}) + "\n"
|
|
|
|
|
|
2026-02-09 21:17:30 +00:00
|
|
|
# Atomically promote staged data to live cache
|
|
|
|
|
if staging_key:
|
|
|
|
|
finalize_cache_population(staging_key, query_parameters)
|
|
|
|
|
staging_key = None # Mark as finalized
|
2026-02-07 20:19:57 +00:00
|
|
|
|
2026-02-09 21:17:30 +00:00
|
|
|
yield json.dumps({"type": "complete", "total": count}) + "\n"
|
|
|
|
|
finally:
|
|
|
|
|
# Clean up orphaned staging key on failure
|
|
|
|
|
if staging_key:
|
|
|
|
|
delete_staging_key(staging_key)
|
2026-02-07 20:19:57 +00:00
|
|
|
|
|
|
|
|
|
2026-02-01 17:28:37 +00:00
|
|
|
@app.get("/api/listing_geojson/stream")
|
|
|
|
|
async def stream_listing_geojson(
|
|
|
|
|
user: Annotated[User, Depends(get_current_user)],
|
|
|
|
|
query_parameters: Annotated[QueryParameters, Depends(get_query_parameters)],
|
2026-02-07 20:19:57 +00:00
|
|
|
batch_size: int = DEFAULT_BATCH_SIZE,
|
2026-02-06 20:34:50 +00:00
|
|
|
limit: int | None = None,
|
2026-02-08 13:14:47 +00:00
|
|
|
include_poi_distances: bool = False,
|
2026-02-21 15:48:02 +00:00
|
|
|
decision_filter: str = "all",
|
2026-02-01 17:28:37 +00:00
|
|
|
) -> StreamingResponse:
|
|
|
|
|
"""Stream listings as NDJSON for progressive map loading.
|
|
|
|
|
|
|
|
|
|
Returns newline-delimited JSON with three message types:
|
|
|
|
|
- metadata: Initial message with batch_size and total_expected count
|
|
|
|
|
- batch: Array of GeoJSON features
|
|
|
|
|
- complete: Final message with total count
|
2026-02-21 15:48:02 +00:00
|
|
|
|
|
|
|
|
Decision filter options:
|
|
|
|
|
- "all" (default): Show all listings except disliked ones
|
|
|
|
|
- "liked": Show only liked listings
|
|
|
|
|
- "everything": Show all listings including disliked
|
2026-02-01 17:28:37 +00:00
|
|
|
"""
|
2026-02-08 00:45:43 +00:00
|
|
|
batch_size = min(batch_size, _rate_limit_config.geojson_stream_batch_size_cap)
|
|
|
|
|
if limit is not None:
|
|
|
|
|
limit = min(limit, _rate_limit_config.geojson_stream_limit_cap)
|
|
|
|
|
else:
|
|
|
|
|
limit = _rate_limit_config.geojson_stream_limit_cap
|
|
|
|
|
|
2026-02-08 13:14:47 +00:00
|
|
|
# Build POI distances lookup if requested
|
Refactor backend for cleaner error handling, DRY, and type safety
- Extract rate limiter DRY: consolidate 3 duplicated check/respond paths
into _check_counter and _enforce_limit helpers, add proper type annotations
- Replace bare Exception raises with FloorplanDownloadError and
RightmoveApiError; narrow catch clauses to specific exception types;
fix Step base class to inherit from ABC
- Consolidate MAX_OCR_WORKERS into config/scraper_config.py; extract
_find_tenure_value helper to deduplicate tenure parsing
- Extract _build_poi_distances_lookup from stream endpoint to reduce nesting
- Fix csv_exporter: optional decisions.json, NaN instead of -1 sentinels,
guard against division by zero on missing square meters
- Fix notifications.py broken list[Surface]() constructor, database.py
stale comments and missing type annotation, auth.py type:ignore,
ui_exporter.py stale TODO
- Fix 3 pre-existing test failures: mock cache layer in streaming tests,
bypass rate limiter for test isolation, fix cache invalidation test to
account for two-pattern scan loop
2026-02-10 22:19:24 +00:00
|
|
|
poi_distances_lookup = _build_poi_distances_lookup(user.email, query_parameters.listing_type) if include_poi_distances else None
|
2026-02-08 13:14:47 +00:00
|
|
|
|
2026-02-21 15:48:02 +00:00
|
|
|
# Build decision filter sets
|
|
|
|
|
disliked_ids: set[int] | None = None
|
|
|
|
|
liked_ids: set[int] | None = None
|
|
|
|
|
if decision_filter != "everything":
|
|
|
|
|
user_id = _get_user_id_safe(user.email)
|
|
|
|
|
if user_id is not None:
|
|
|
|
|
decision_repo = DecisionRepository(engine)
|
|
|
|
|
listing_type_str = query_parameters.listing_type.value
|
|
|
|
|
if decision_filter == "liked":
|
|
|
|
|
liked_ids = decision_service.get_liked_ids(
|
|
|
|
|
decision_repo, user_id, listing_type_str
|
|
|
|
|
)
|
|
|
|
|
else:
|
|
|
|
|
# default "all": load disliked to exclude
|
|
|
|
|
disliked_ids = decision_service.get_disliked_ids(
|
|
|
|
|
decision_repo, user_id, listing_type_str
|
|
|
|
|
)
|
2026-02-21 13:57:43 +00:00
|
|
|
|
2026-02-07 20:19:57 +00:00
|
|
|
cached_count = get_cached_count(query_parameters)
|
2026-02-08 13:14:47 +00:00
|
|
|
if cached_count is not None and cached_count > 0 and not include_poi_distances:
|
2026-02-14 11:21:49 +00:00
|
|
|
app_metrics.geojson_cache_operations.add(1, {"result": "hit"})
|
2026-02-21 15:48:02 +00:00
|
|
|
generator = _stream_from_cache(
|
|
|
|
|
query_parameters, batch_size, limit,
|
|
|
|
|
disliked_ids=disliked_ids,
|
|
|
|
|
liked_ids=liked_ids,
|
|
|
|
|
decision_filter=decision_filter,
|
|
|
|
|
)
|
2026-02-07 20:19:57 +00:00
|
|
|
else:
|
2026-02-14 11:21:49 +00:00
|
|
|
app_metrics.geojson_cache_operations.add(1, {"result": "miss"})
|
2026-02-09 21:17:30 +00:00
|
|
|
generator = _stream_from_db(
|
|
|
|
|
query_parameters, batch_size, limit, poi_distances_lookup,
|
|
|
|
|
skip_cache=include_poi_distances,
|
2026-02-21 15:48:02 +00:00
|
|
|
disliked_ids=disliked_ids,
|
|
|
|
|
liked_ids=liked_ids,
|
|
|
|
|
decision_filter=decision_filter,
|
2026-02-09 21:17:30 +00:00
|
|
|
)
|
2026-02-01 17:28:37 +00:00
|
|
|
|
|
|
|
|
return StreamingResponse(
|
2026-02-07 20:19:57 +00:00
|
|
|
generator,
|
2026-02-01 17:28:37 +00:00
|
|
|
media_type="application/x-ndjson",
|
|
|
|
|
headers={
|
|
|
|
|
"Cache-Control": "no-cache",
|
|
|
|
|
"X-Accel-Buffering": "no", # Disable nginx buffering
|
|
|
|
|
}
|
2025-06-18 20:38:50 +00:00
|
|
|
)
|
2025-06-21 12:49:04 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.post("/api/refresh_listings")
|
|
|
|
|
async def refresh_listings(
|
|
|
|
|
user: Annotated[User, Depends(get_current_user)],
|
2026-02-01 17:28:37 +00:00
|
|
|
query_parameters: Annotated[QueryParameters, Depends(get_query_parameters)],
|
2025-06-21 12:49:04 +00:00
|
|
|
) -> dict[str, str]:
|
2026-02-01 17:28:37 +00:00
|
|
|
"""Trigger a background task to refresh listings."""
|
2025-07-26 13:06:28 +00:00
|
|
|
await send_notification(
|
|
|
|
|
f"{user.email} refreshing listings with query parameters {query_parameters.model_dump_json()}"
|
|
|
|
|
)
|
2026-02-01 17:28:37 +00:00
|
|
|
|
|
|
|
|
repository = ListingRepository(engine)
|
|
|
|
|
result = await listing_service.refresh_listings(
|
|
|
|
|
repository,
|
|
|
|
|
query_parameters,
|
|
|
|
|
async_mode=True,
|
|
|
|
|
user_email=user.email,
|
2025-06-23 19:43:54 +00:00
|
|
|
)
|
2025-07-06 12:02:25 +00:00
|
|
|
|
2026-02-01 17:28:37 +00:00
|
|
|
# Track task for user
|
|
|
|
|
if result.task_id:
|
|
|
|
|
task_service.add_task_for_user(user.email, result.task_id)
|
|
|
|
|
|
|
|
|
|
return {"task_id": result.task_id or "", "message": result.message}
|
2025-06-21 12:49:04 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.get("/api/task_status")
|
|
|
|
|
async def get_task_status(
|
|
|
|
|
user: Annotated[User, Depends(get_current_user)],
|
|
|
|
|
task_id: str,
|
2026-02-06 20:34:50 +00:00
|
|
|
) -> dict[str, str | int | float | None]:
|
2026-02-01 17:28:37 +00:00
|
|
|
"""Get the status of a background task."""
|
2026-02-08 19:42:30 +00:00
|
|
|
user_tasks = task_service.get_user_tasks(user.email)
|
|
|
|
|
if task_id not in user_tasks:
|
|
|
|
|
raise HTTPException(status_code=404, detail="Task not found")
|
2026-02-01 17:28:37 +00:00
|
|
|
status = task_service.get_task_status(task_id)
|
2025-06-22 21:18:52 +00:00
|
|
|
return {
|
2026-02-01 17:28:37 +00:00
|
|
|
"task_id": status.task_id,
|
|
|
|
|
"status": status.status,
|
2026-02-06 20:34:50 +00:00
|
|
|
"result": json.dumps(status.result) if status.result else None,
|
|
|
|
|
"progress": status.progress,
|
|
|
|
|
"processed": status.processed,
|
|
|
|
|
"total": status.total,
|
|
|
|
|
"message": status.message,
|
2026-02-08 19:42:30 +00:00
|
|
|
"error": status.error if APP_ENV != "production" else None,
|
|
|
|
|
"traceback": status.traceback if APP_ENV != "production" else None,
|
2025-06-22 21:18:52 +00:00
|
|
|
}
|
2025-06-23 21:09:03 +00:00
|
|
|
|
|
|
|
|
|
2025-07-06 12:02:25 +00:00
|
|
|
@app.get("/api/tasks_for_user")
|
|
|
|
|
async def get_tasks_for_user(
|
|
|
|
|
user: Annotated[User, Depends(get_current_user)],
|
|
|
|
|
) -> list[str]:
|
2026-02-01 17:28:37 +00:00
|
|
|
"""Get all task IDs for the current user."""
|
|
|
|
|
return task_service.get_user_tasks(user.email)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.post("/api/cancel_task")
|
|
|
|
|
async def cancel_task(
|
|
|
|
|
user: Annotated[User, Depends(get_current_user)],
|
|
|
|
|
task_id: str = Query(..., description="The task ID to cancel"),
|
|
|
|
|
) -> dict[str, str | bool]:
|
2026-02-01 20:40:07 +00:00
|
|
|
"""Cancel a running task and remove it from the user's task list."""
|
2026-02-01 17:28:37 +00:00
|
|
|
# Verify user owns this task
|
|
|
|
|
user_tasks = task_service.get_user_tasks(user.email)
|
|
|
|
|
if task_id not in user_tasks:
|
2026-02-13 19:36:43 +00:00
|
|
|
raise HTTPException(status_code=404, detail="Task not found or not owned by user")
|
2026-02-01 17:28:37 +00:00
|
|
|
|
|
|
|
|
try:
|
2026-02-01 20:40:07 +00:00
|
|
|
task_service.cancel_task(task_id, user_email=user.email)
|
2026-02-01 17:28:37 +00:00
|
|
|
logger.info(f"Task {task_id} cancelled by {user.email}")
|
|
|
|
|
return {"success": True, "message": "Task cancelled"}
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(f"Failed to cancel task {task_id}: {e}")
|
|
|
|
|
return {"success": False, "message": str(e)}
|
2025-07-06 12:02:25 +00:00
|
|
|
|
|
|
|
|
|
2026-02-01 20:40:07 +00:00
|
|
|
@app.post("/api/clear_all_tasks")
|
|
|
|
|
async def clear_all_tasks(
|
|
|
|
|
user: Annotated[User, Depends(get_current_user)],
|
|
|
|
|
) -> dict[str, str | int | bool]:
|
|
|
|
|
"""Clear all tasks for the current user."""
|
|
|
|
|
try:
|
|
|
|
|
count = task_service.clear_all_tasks(user.email)
|
|
|
|
|
logger.info(f"Cleared {count} tasks for {user.email}")
|
|
|
|
|
return {"success": True, "count": count, "message": f"Cleared {count} tasks"}
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(f"Failed to clear tasks for {user.email}: {e}")
|
|
|
|
|
return {"success": False, "count": 0, "message": str(e)}
|
|
|
|
|
|
|
|
|
|
|
2025-06-23 21:09:03 +00:00
|
|
|
@app.get("/api/get_districts")
|
2025-07-01 16:12:06 +00:00
|
|
|
async def get_districts(
|
2025-06-23 21:09:03 +00:00
|
|
|
user: Annotated[User, Depends(get_current_user)],
|
|
|
|
|
) -> dict[str, str]:
|
2026-02-01 17:28:37 +00:00
|
|
|
"""Get all available districts."""
|
|
|
|
|
return district_service.get_all_districts()
|
2025-10-18 09:58:55 +00:00
|
|
|
|
|
|
|
|
|
2026-02-21 15:48:02 +00:00
|
|
|
class ListingDetailResponse(BaseModel):
|
|
|
|
|
id: int
|
|
|
|
|
price: float
|
|
|
|
|
number_of_bedrooms: int
|
|
|
|
|
square_meters: float | None
|
|
|
|
|
agency: str | None
|
|
|
|
|
council_tax_band: str | None
|
|
|
|
|
url: str
|
|
|
|
|
listing_type: str
|
|
|
|
|
description: str | None
|
|
|
|
|
display_address: str | None
|
|
|
|
|
property_sub_type: str | None
|
|
|
|
|
key_features: list[str]
|
|
|
|
|
photos: list[dict]
|
|
|
|
|
floorplans: list[dict]
|
|
|
|
|
price_history: list[dict]
|
|
|
|
|
furnish_type: str | None
|
|
|
|
|
available_from: str | None
|
|
|
|
|
service_charge: float | None
|
|
|
|
|
lease_left: int | None
|
|
|
|
|
decision: str | None
|
|
|
|
|
poi_distances: list[dict]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.get("/api/listing/{listing_id}/detail", response_model=ListingDetailResponse)
|
|
|
|
|
async def get_listing_detail(
|
|
|
|
|
user: Annotated[User, Depends(get_current_user)],
|
|
|
|
|
listing_id: int,
|
|
|
|
|
listing_type: str = Query(default="RENT"),
|
|
|
|
|
) -> ListingDetailResponse:
|
|
|
|
|
"""Get detailed information for a single listing."""
|
|
|
|
|
repository = ListingRepository(engine)
|
|
|
|
|
lt = ListingType(listing_type)
|
|
|
|
|
listings = await repository.get_listings(
|
|
|
|
|
only_ids=[listing_id], listing_type=lt
|
|
|
|
|
)
|
|
|
|
|
if not listings:
|
|
|
|
|
raise HTTPException(status_code=404, detail="Listing not found")
|
|
|
|
|
|
|
|
|
|
listing = listings[0]
|
|
|
|
|
additional_info = listing.additional_info or {}
|
|
|
|
|
property_info = additional_info.get("property", {})
|
|
|
|
|
|
|
|
|
|
# Extract description
|
|
|
|
|
text_info = property_info.get("text", {})
|
|
|
|
|
description = text_info.get("description") if isinstance(text_info, dict) else None
|
|
|
|
|
|
|
|
|
|
# Extract photos
|
|
|
|
|
photos_raw = property_info.get("images", [])
|
|
|
|
|
photos: list[dict] = []
|
|
|
|
|
if isinstance(photos_raw, list):
|
|
|
|
|
for img in photos_raw:
|
|
|
|
|
if isinstance(img, dict):
|
|
|
|
|
photos.append({
|
|
|
|
|
"url": img.get("url", ""),
|
|
|
|
|
"caption": img.get("caption", ""),
|
|
|
|
|
"type": img.get("type", ""),
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
# Extract floorplans
|
|
|
|
|
floorplans_raw = property_info.get("floorplans", [])
|
|
|
|
|
floorplans: list[dict] = []
|
|
|
|
|
if isinstance(floorplans_raw, list):
|
|
|
|
|
for fp in floorplans_raw:
|
|
|
|
|
if isinstance(fp, dict):
|
|
|
|
|
floorplans.append({
|
|
|
|
|
"url": fp.get("url", ""),
|
|
|
|
|
"caption": fp.get("caption", ""),
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
# Extract other fields
|
|
|
|
|
key_features = property_info.get("keyFeatures", [])
|
|
|
|
|
if not isinstance(key_features, list):
|
|
|
|
|
key_features = []
|
|
|
|
|
display_address_info = property_info.get("address", {})
|
|
|
|
|
display_address = (
|
|
|
|
|
display_address_info.get("displayAddress")
|
|
|
|
|
if isinstance(display_address_info, dict)
|
|
|
|
|
else None
|
|
|
|
|
)
|
|
|
|
|
property_sub_type = property_info.get("propertySubType")
|
|
|
|
|
council_tax_band = property_info.get("councilTaxBand") or listing.council_tax_band
|
|
|
|
|
furnish_type_val = property_info.get("letFurnishType")
|
|
|
|
|
available_from_val = property_info.get("letDateAvailable")
|
|
|
|
|
|
|
|
|
|
# Price history
|
|
|
|
|
price_history = [item.to_dict() for item in listing.price_history]
|
|
|
|
|
|
|
|
|
|
# Service charge and lease (for BuyListing)
|
|
|
|
|
service_charge: float | None = None
|
|
|
|
|
lease_left: int | None = None
|
|
|
|
|
if hasattr(listing, "service_charge"):
|
|
|
|
|
service_charge = listing.service_charge # type: ignore[union-attr]
|
|
|
|
|
if hasattr(listing, "lease_left"):
|
|
|
|
|
lease_left = listing.lease_left # type: ignore[union-attr]
|
|
|
|
|
|
|
|
|
|
# Available from (for RentListing)
|
|
|
|
|
if available_from_val is None and hasattr(listing, "available_from"):
|
|
|
|
|
af = listing.available_from # type: ignore[union-attr]
|
|
|
|
|
if af is not None:
|
|
|
|
|
available_from_val = af.isoformat() if hasattr(af, "isoformat") else str(af)
|
|
|
|
|
|
|
|
|
|
# Furnish type (for RentListing)
|
|
|
|
|
if furnish_type_val is None and hasattr(listing, "furnish_type"):
|
|
|
|
|
ft = listing.furnish_type # type: ignore[union-attr]
|
|
|
|
|
if ft is not None:
|
|
|
|
|
furnish_type_val = str(ft)
|
|
|
|
|
|
|
|
|
|
# Load user's decision for this listing
|
|
|
|
|
decision_val: str | None = None
|
|
|
|
|
user_id = _get_user_id_safe(user.email)
|
|
|
|
|
if user_id is not None:
|
|
|
|
|
decision_repo = DecisionRepository(engine)
|
|
|
|
|
decisions = decision_repo.get_decisions_for_user(user_id)
|
|
|
|
|
for d in decisions:
|
|
|
|
|
if d.listing_id == listing_id and d.listing_type == listing_type:
|
|
|
|
|
decision_val = d.decision
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
# Load POI distances
|
|
|
|
|
poi_distances_list: list[dict] = []
|
|
|
|
|
if user_id is not None:
|
|
|
|
|
poi_repo = POIRepository(engine)
|
|
|
|
|
pois = {p.id: p for p in poi_repo.get_pois_for_user(user_id)}
|
|
|
|
|
if pois:
|
|
|
|
|
distances = poi_repo.get_distances_for_listings(
|
|
|
|
|
[listing_id], lt, user_id
|
|
|
|
|
)
|
|
|
|
|
for d in distances:
|
|
|
|
|
poi_name = pois[d.poi_id].name if d.poi_id in pois else "Unknown"
|
|
|
|
|
poi_distances_list.append({
|
|
|
|
|
"poi_id": d.poi_id,
|
|
|
|
|
"poi_name": poi_name,
|
|
|
|
|
"travel_mode": d.travel_mode,
|
|
|
|
|
"duration_seconds": d.duration_seconds,
|
|
|
|
|
"distance_meters": d.distance_meters,
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
return ListingDetailResponse(
|
|
|
|
|
id=listing.id,
|
|
|
|
|
price=listing.price,
|
|
|
|
|
number_of_bedrooms=listing.number_of_bedrooms,
|
|
|
|
|
square_meters=listing.square_meters,
|
|
|
|
|
agency=listing.agency,
|
|
|
|
|
council_tax_band=council_tax_band,
|
|
|
|
|
url=listing.url,
|
|
|
|
|
listing_type=listing_type,
|
|
|
|
|
description=description,
|
|
|
|
|
display_address=display_address,
|
|
|
|
|
property_sub_type=property_sub_type,
|
|
|
|
|
key_features=key_features,
|
|
|
|
|
photos=photos,
|
|
|
|
|
floorplans=floorplans,
|
|
|
|
|
price_history=price_history,
|
|
|
|
|
furnish_type=furnish_type_val,
|
|
|
|
|
available_from=available_from_val,
|
|
|
|
|
service_charge=service_charge,
|
|
|
|
|
lease_left=lease_left,
|
|
|
|
|
decision=decision_val,
|
|
|
|
|
poi_distances=poi_distances_list,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
2025-10-18 09:58:55 +00:00
|
|
|
FastAPIInstrumentor.instrument_app(app)
|