Refactor backend for cleaner error handling, DRY, and type safety

- Extract rate limiter DRY: consolidate 3 duplicated check/respond paths into _check_counter and _enforce_limit helpers, add proper type annotations - Replace bare Exception raises with FloorplanDownloadError and RightmoveApiError; narrow catch clauses to specific exception types; fix Step base class to inherit from ABC - Consolidate MAX_OCR_WORKERS into config/scraper_config.py; extract _find_tenure_value helper to deduplicate tenure parsing - Extract _build_poi_distances_lookup from stream endpoint to reduce nesting - Fix csv_exporter: optional decisions.json, NaN instead of -1 sentinels, guard against division by zero on missing square meters - Fix notifications.py broken list[Surface]() constructor, database.py stale comments and missing type annotation, auth.py type:ignore, ui_exporter.py stale TODO - Fix 3 pre-existing test failures: mock cache layer in streaming tests, bypass rate limiter for test isolation, fix cache invalidation test to account for two-pattern scan loop
2026-02-10 22:19:24 +00:00 · 2026-02-10 22:19:24 +00:00 · f833309297
commit f833309297
parent 6897820cc7
20 changed files with 199 additions and 178 deletions
--- a/api/rate_limiter.py
+++ b/api/rate_limiter.py
@ -4,6 +4,7 @@ from __future__ import annotations
 import logging
 import os
 import time
+from collections.abc import Awaitable, Callable
 from urllib.parse import urlparse, urlunparse

 import jwt
@ -11,6 +12,7 @@ import redis
 from starlette.middleware.base import BaseHTTPMiddleware
 from starlette.requests import Request
 from starlette.responses import JSONResponse, Response
+from starlette.types import ASGIApp

 from api.rate_limit_config import EndpointLimit, RateLimitConfig

@ -87,21 +89,77 @@ class _InMemoryCounter:
 class RateLimitMiddleware(BaseHTTPMiddleware):
    """Starlette middleware enforcing per-user fixed-window rate limits via Redis."""

-    def __init__(self, app, config: RateLimitConfig | None = None) -> None:  # type: ignore[no-untyped-def]
+    def __init__(self, app: ASGIApp, config: RateLimitConfig | None = None) -> None:
        super().__init__(app)
        self.config = config or RateLimitConfig.from_env()
        self._fallback = _InMemoryCounter()
        try:
-            self._redis = _get_rate_limit_redis(self.config)
+            self._redis: redis.Redis | None = _get_rate_limit_redis(self.config)  # type: ignore[type-arg]
            self._redis.ping()
        except redis.RedisError:
            logger.warning("Rate limiter: Redis unavailable at startup, will fail open")
            self._redis = None

-    async def dispatch(self, request: Request, call_next) -> Response:  # type: ignore[no-untyped-def]
+    def _check_counter(self, key: str, limit: EndpointLimit) -> tuple[bool, int, int | None]:
+        """Check rate limit counter, returning (allowed, remaining, retry_after).
+
+        Tries Redis first; falls back to in-memory counter on Redis errors.
+        retry_after is None for in-memory counters (no TTL available).
+        """
+        if self._redis is None:
+            allowed, remaining = self._fallback.check(key, limit.max_requests, limit.window_seconds)
+            return allowed, remaining, None
+
+        try:
+            pipe = self._redis.pipeline(transaction=True)
+            pipe.incr(key)
+            pipe.ttl(key)
+            result = pipe.execute()
+            current_count: int = result[0]
+            ttl: int = result[1]
+
+            # Set expiry on first request in window
+            if ttl == -1:
+                self._redis.expire(key, limit.window_seconds)
+                ttl = limit.window_seconds
+
+            remaining = max(0, limit.max_requests - current_count)
+            allowed = current_count <= limit.max_requests
+            retry_after = max(1, ttl) if not allowed else None
+            return allowed, remaining, retry_after
+
+        except redis.RedisError as e:
+            logger.warning(f"Rate limiter Redis error, using in-memory fallback: {e}")
+            allowed, remaining = self._fallback.check(key, limit.max_requests, limit.window_seconds)
+            return allowed, remaining, None
+
+    async def _enforce_limit(
+        self,
+        request: Request,
+        call_next: Callable[[Request], Awaitable[Response]],
+        limit: EndpointLimit,
+        key: str,
+    ) -> Response:
+        """Check the rate limit and either reject with 429 or forward with headers."""
+        allowed, remaining, retry_after = self._check_counter(key, limit)
+
+        if not allowed:
+            headers: dict[str, str] = {
+                "X-RateLimit-Limit": str(limit.max_requests),
+                "X-RateLimit-Remaining": "0",
+            }
+            if retry_after is not None:
+                headers["Retry-After"] = str(retry_after)
+            return JSONResponse(status_code=429, content={"detail": "Rate limit exceeded"}, headers=headers)
+
+        response = await call_next(request)
+        response.headers["X-RateLimit-Limit"] = str(limit.max_requests)
+        response.headers["X-RateLimit-Remaining"] = str(remaining)
+        return response
+
+    async def dispatch(self, request: Request, call_next: Callable[[Request], Awaitable[Response]]) -> Response:
        path = request.url.path

-        # Skip exempt paths
        if path in EXEMPT_PATHS:
            return await call_next(request)

@ -109,68 +167,6 @@ class RateLimitMiddleware(BaseHTTPMiddleware):
        if limit is None:
            return await call_next(request)

-        # Determine identity for the counter key
        identity = _extract_user_email(request) or _client_ip(request, self.config.trusted_proxy_depth)
-
-        # If Redis is unavailable, use in-memory fallback
-        if self._redis is None:
-            fallback_key = f"ratelimit:{identity}:{path}"
-            allowed, remaining = self._fallback.check(fallback_key, limit.max_requests, limit.window_seconds)
-            if not allowed:
-                return JSONResponse(
-                    status_code=429,
-                    content={"detail": "Rate limit exceeded"},
-                    headers={"X-RateLimit-Limit": str(limit.max_requests), "X-RateLimit-Remaining": "0"},
-                )
-            response = await call_next(request)
-            response.headers["X-RateLimit-Limit"] = str(limit.max_requests)
-            response.headers["X-RateLimit-Remaining"] = str(remaining)
-            return response
-
-        redis_key = f"ratelimit:{identity}:{path}"
-        try:
-            pipe = self._redis.pipeline(transaction=True)
-            pipe.incr(redis_key)
-            pipe.ttl(redis_key)
-            result = pipe.execute()
-            current_count: int = result[0]
-            ttl: int = result[1]
-
-            # Set expiry on first request in window
-            if ttl == -1:
-                self._redis.expire(redis_key, limit.window_seconds)
-                ttl = limit.window_seconds
-
-            remaining = max(0, limit.max_requests - current_count)
-
-            if current_count > limit.max_requests:
-                retry_after = max(1, ttl)
-                return JSONResponse(
-                    status_code=429,
-                    content={"detail": "Rate limit exceeded"},
-                    headers={
-                        "Retry-After": str(retry_after),
-                        "X-RateLimit-Limit": str(limit.max_requests),
-                        "X-RateLimit-Remaining": "0",
-                    },
-                )
-
-            response = await call_next(request)
-            response.headers["X-RateLimit-Limit"] = str(limit.max_requests)
-            response.headers["X-RateLimit-Remaining"] = str(remaining)
-            return response
-
-        except redis.RedisError as e:
-            logger.warning(f"Rate limiter Redis error, using in-memory fallback: {e}")
-            fallback_key = f"ratelimit:{identity}:{path}"
-            allowed, remaining = self._fallback.check(fallback_key, limit.max_requests, limit.window_seconds)
-            if not allowed:
-                return JSONResponse(
-                    status_code=429,
-                    content={"detail": "Rate limit exceeded"},
-                    headers={"X-RateLimit-Limit": str(limit.max_requests), "X-RateLimit-Remaining": "0"},
-                )
-            response = await call_next(request)
-            response.headers["X-RateLimit-Limit"] = str(limit.max_requests)
-            response.headers["X-RateLimit-Remaining"] = str(remaining)
-            return response
+        key = f"ratelimit:{identity}:{path}"
+        return await self._enforce_limit(request, call_next, limit, key)