Refactor backend for cleaner error handling, DRY, and type safety

- Extract rate limiter DRY: consolidate 3 duplicated check/respond paths
  into _check_counter and _enforce_limit helpers, add proper type annotations
- Replace bare Exception raises with FloorplanDownloadError and
  RightmoveApiError; narrow catch clauses to specific exception types;
  fix Step base class to inherit from ABC
- Consolidate MAX_OCR_WORKERS into config/scraper_config.py; extract
  _find_tenure_value helper to deduplicate tenure parsing
- Extract _build_poi_distances_lookup from stream endpoint to reduce nesting
- Fix csv_exporter: optional decisions.json, NaN instead of -1 sentinels,
  guard against division by zero on missing square meters
- Fix notifications.py broken list[Surface]() constructor, database.py
  stale comments and missing type annotation, auth.py type:ignore,
  ui_exporter.py stale TODO
- Fix 3 pre-existing test failures: mock cache layer in streaming tests,
  bypass rate limiter for test isolation, fix cache invalidation test to
  account for two-pattern scan loop
This commit is contained in:
Viktor Barzin 2026-02-10 22:19:24 +00:00
parent 6897820cc7
commit f833309297
No known key found for this signature in database
GPG key ID: 0EB088298288D958
20 changed files with 199 additions and 178 deletions

View file

@ -1,15 +1,15 @@
from __future__ import annotations
from abc import abstractmethod
from abc import ABC, abstractmethod
import asyncio
from collections.abc import Callable
from datetime import datetime
import logging
import multiprocessing
from pathlib import Path
import re
from typing import Any
from urllib.parse import urlparse
import aiohttp
from config.scraper_config import MAX_OCR_WORKERS
from models.listing import (
BuyListing,
FurnishType,
@ -20,14 +20,12 @@ from models.listing import (
RentListing,
)
from rec import floorplan
from rec.exceptions import FloorplanDownloadError
from rec.query import detail_query
from repositories.listing_repository import ListingRepository
logger = logging.getLogger("uvicorn.error")
# Limit OCR threads to 25% of available cores to avoid starving other work.
MAX_OCR_WORKERS = max(1, multiprocessing.cpu_count() // 4)
def _parse_furnish_type(raw: str | None) -> FurnishType:
"""Normalise the raw furnish-type string from the API into a FurnishType enum."""
@ -97,13 +95,13 @@ class ListingProcessor:
step_class_name, step_class_name
)
on_step_complete(short_name)
except Exception as e:
except (ValueError, KeyError, aiohttp.ClientError, FloorplanDownloadError) as e:
logger.error(f"[{listing_id}] {step_class_name} failed: {e}")
return None
return listing
class Step:
class Step(ABC):
listing_repository: ListingRepository
listing_type: ListingType
@ -123,29 +121,32 @@ class Step:
return True
def _find_tenure_value(details: dict[str, Any], tenure_type: str) -> str | None:
"""Find a value in the tenure info content by type key."""
tenure_content = details.get("property", {}).get("tenureInfo", {}).get("content", [])
for item in tenure_content:
if item.get("type") == tenure_type:
return item.get("value")
return None
def _parse_service_charge(details: dict[str, Any]) -> float | None:
"""Parse annual service charge from the tenure info in API response."""
tenure_content = (
details.get("property", {}).get("tenureInfo", {}).get("content", [])
)
for item in tenure_content:
if item.get("type") == "annualServiceCharge":
matches = re.findall(r"([\d,.]+)", str(item.get("value", "")))
if matches:
return float(matches[0].replace(",", ""))
value = _find_tenure_value(details, "annualServiceCharge")
if value is not None:
matches = re.findall(r"([\d,.]+)", str(value))
if matches:
return float(matches[0].replace(",", ""))
return None
def _parse_lease_left(details: dict[str, Any]) -> int | None:
"""Parse remaining lease years from the tenure info in API response."""
tenure_content = (
details.get("property", {}).get("tenureInfo", {}).get("content", [])
)
for item in tenure_content:
if item.get("type") == "lengthOfLease":
matches = re.findall(r"(\d+)", str(item.get("value", "")))
if matches:
return int(matches[0])
value = _find_tenure_value(details, "lengthOfLease")
if value is not None:
matches = re.findall(r"(\d+)", str(value))
if matches:
return int(matches[0])
return None
@ -265,7 +266,7 @@ class FetchImagesStep(Step):
if response.status == 404:
return listing
if response.status != 200:
raise Exception(f"Error for {url}: {response.status}")
raise FloorplanDownloadError(url, response.status)
floorplan_path.parent.mkdir(parents=True, exist_ok=True)
with open(floorplan_path, "wb") as f:
f.write(await response.read())