Refactor backend for cleaner error handling, DRY, and type safety

- Extract rate limiter DRY: consolidate 3 duplicated check/respond paths
  into _check_counter and _enforce_limit helpers, add proper type annotations
- Replace bare Exception raises with FloorplanDownloadError and
  RightmoveApiError; narrow catch clauses to specific exception types;
  fix Step base class to inherit from ABC
- Consolidate MAX_OCR_WORKERS into config/scraper_config.py; extract
  _find_tenure_value helper to deduplicate tenure parsing
- Extract _build_poi_distances_lookup from stream endpoint to reduce nesting
- Fix csv_exporter: optional decisions.json, NaN instead of -1 sentinels,
  guard against division by zero on missing square meters
- Fix notifications.py broken list[Surface]() constructor, database.py
  stale comments and missing type annotation, auth.py type:ignore,
  ui_exporter.py stale TODO
- Fix 3 pre-existing test failures: mock cache layer in streaming tests,
  bypass rate limiter for test isolation, fix cache invalidation test to
  account for two-pattern scan loop
This commit is contained in:
Viktor Barzin 2026-02-10 22:19:24 +00:00
parent 6897820cc7
commit f833309297
No known key found for this signature in database
GPG key ID: 0EB088298288D958
20 changed files with 199 additions and 178 deletions

View file

@ -4,6 +4,7 @@ from __future__ import annotations
import logging
import os
import time
from collections.abc import Awaitable, Callable
from urllib.parse import urlparse, urlunparse
import jwt
@ -11,6 +12,7 @@ import redis
from starlette.middleware.base import BaseHTTPMiddleware
from starlette.requests import Request
from starlette.responses import JSONResponse, Response
from starlette.types import ASGIApp
from api.rate_limit_config import EndpointLimit, RateLimitConfig
@ -87,21 +89,77 @@ class _InMemoryCounter:
class RateLimitMiddleware(BaseHTTPMiddleware):
"""Starlette middleware enforcing per-user fixed-window rate limits via Redis."""
def __init__(self, app, config: RateLimitConfig | None = None) -> None: # type: ignore[no-untyped-def]
def __init__(self, app: ASGIApp, config: RateLimitConfig | None = None) -> None:
super().__init__(app)
self.config = config or RateLimitConfig.from_env()
self._fallback = _InMemoryCounter()
try:
self._redis = _get_rate_limit_redis(self.config)
self._redis: redis.Redis | None = _get_rate_limit_redis(self.config) # type: ignore[type-arg]
self._redis.ping()
except redis.RedisError:
logger.warning("Rate limiter: Redis unavailable at startup, will fail open")
self._redis = None
async def dispatch(self, request: Request, call_next) -> Response: # type: ignore[no-untyped-def]
def _check_counter(self, key: str, limit: EndpointLimit) -> tuple[bool, int, int | None]:
"""Check rate limit counter, returning (allowed, remaining, retry_after).
Tries Redis first; falls back to in-memory counter on Redis errors.
retry_after is None for in-memory counters (no TTL available).
"""
if self._redis is None:
allowed, remaining = self._fallback.check(key, limit.max_requests, limit.window_seconds)
return allowed, remaining, None
try:
pipe = self._redis.pipeline(transaction=True)
pipe.incr(key)
pipe.ttl(key)
result = pipe.execute()
current_count: int = result[0]
ttl: int = result[1]
# Set expiry on first request in window
if ttl == -1:
self._redis.expire(key, limit.window_seconds)
ttl = limit.window_seconds
remaining = max(0, limit.max_requests - current_count)
allowed = current_count <= limit.max_requests
retry_after = max(1, ttl) if not allowed else None
return allowed, remaining, retry_after
except redis.RedisError as e:
logger.warning(f"Rate limiter Redis error, using in-memory fallback: {e}")
allowed, remaining = self._fallback.check(key, limit.max_requests, limit.window_seconds)
return allowed, remaining, None
async def _enforce_limit(
self,
request: Request,
call_next: Callable[[Request], Awaitable[Response]],
limit: EndpointLimit,
key: str,
) -> Response:
"""Check the rate limit and either reject with 429 or forward with headers."""
allowed, remaining, retry_after = self._check_counter(key, limit)
if not allowed:
headers: dict[str, str] = {
"X-RateLimit-Limit": str(limit.max_requests),
"X-RateLimit-Remaining": "0",
}
if retry_after is not None:
headers["Retry-After"] = str(retry_after)
return JSONResponse(status_code=429, content={"detail": "Rate limit exceeded"}, headers=headers)
response = await call_next(request)
response.headers["X-RateLimit-Limit"] = str(limit.max_requests)
response.headers["X-RateLimit-Remaining"] = str(remaining)
return response
async def dispatch(self, request: Request, call_next: Callable[[Request], Awaitable[Response]]) -> Response:
path = request.url.path
# Skip exempt paths
if path in EXEMPT_PATHS:
return await call_next(request)
@ -109,68 +167,6 @@ class RateLimitMiddleware(BaseHTTPMiddleware):
if limit is None:
return await call_next(request)
# Determine identity for the counter key
identity = _extract_user_email(request) or _client_ip(request, self.config.trusted_proxy_depth)
# If Redis is unavailable, use in-memory fallback
if self._redis is None:
fallback_key = f"ratelimit:{identity}:{path}"
allowed, remaining = self._fallback.check(fallback_key, limit.max_requests, limit.window_seconds)
if not allowed:
return JSONResponse(
status_code=429,
content={"detail": "Rate limit exceeded"},
headers={"X-RateLimit-Limit": str(limit.max_requests), "X-RateLimit-Remaining": "0"},
)
response = await call_next(request)
response.headers["X-RateLimit-Limit"] = str(limit.max_requests)
response.headers["X-RateLimit-Remaining"] = str(remaining)
return response
redis_key = f"ratelimit:{identity}:{path}"
try:
pipe = self._redis.pipeline(transaction=True)
pipe.incr(redis_key)
pipe.ttl(redis_key)
result = pipe.execute()
current_count: int = result[0]
ttl: int = result[1]
# Set expiry on first request in window
if ttl == -1:
self._redis.expire(redis_key, limit.window_seconds)
ttl = limit.window_seconds
remaining = max(0, limit.max_requests - current_count)
if current_count > limit.max_requests:
retry_after = max(1, ttl)
return JSONResponse(
status_code=429,
content={"detail": "Rate limit exceeded"},
headers={
"Retry-After": str(retry_after),
"X-RateLimit-Limit": str(limit.max_requests),
"X-RateLimit-Remaining": "0",
},
)
response = await call_next(request)
response.headers["X-RateLimit-Limit"] = str(limit.max_requests)
response.headers["X-RateLimit-Remaining"] = str(remaining)
return response
except redis.RedisError as e:
logger.warning(f"Rate limiter Redis error, using in-memory fallback: {e}")
fallback_key = f"ratelimit:{identity}:{path}"
allowed, remaining = self._fallback.check(fallback_key, limit.max_requests, limit.window_seconds)
if not allowed:
return JSONResponse(
status_code=429,
content={"detail": "Rate limit exceeded"},
headers={"X-RateLimit-Limit": str(limit.max_requests), "X-RateLimit-Remaining": "0"},
)
response = await call_next(request)
response.headers["X-RateLimit-Limit"] = str(limit.max_requests)
response.headers["X-RateLimit-Remaining"] = str(remaining)
return response
key = f"ratelimit:{identity}:{path}"
return await self._enforce_limit(request, call_next, limit, key)