From 29ba73906345895ebf8472b288c28acea44a7cdd Mon Sep 17 00:00:00 2001
From: Viktor Barzin <vbarzin@gmail.com>
Date: Mon, 2 Feb 2026 20:08:03 +0000
Subject: [PATCH 1/5] Improve login UI with error handling and callback page

---
 crawler/frontend/src/App.tsx                  |  16 +-
 crawler/frontend/src/auth/authService.ts      |  33 +++-
 crawler/frontend/src/auth/errors.ts           |  60 +++++++
 .../frontend/src/components/AuthCallback.tsx  | 111 +++++++++++++
 .../frontend/src/components/LoginModal.tsx    | 148 ++++++++++++++----
 5 files changed, 324 insertions(+), 44 deletions(-)
 create mode 100644 crawler/frontend/src/auth/errors.ts
 create mode 100644 crawler/frontend/src/components/AuthCallback.tsx
diff --git a/crawler/frontend/src/App.tsx b/crawler/frontend/src/App.tsx
index 08be85c..5e90bdf 100644
--- a/crawler/frontend/src/App.tsx
+++ b/crawler/frontend/src/App.tsx
@@ -1,9 +1,10 @@
 import type { User } from 'oidc-client-ts';
 import { useEffect, useState, useRef, useCallback } from 'react';
 import './App.css';
-import { getUser, handleCallback } from './auth/authService';
+import { getUser } from './auth/authService';
 import AlertError from './components/AlertError';
 import LoginModal from './components/LoginModal';
+import AuthCallback from './components/AuthCallback';
 import { Map } from './components/Map';
 import { FilterPanel, type ParameterValues, DEFAULT_FILTER_VALUES } from './components/FilterPanel';
 import { Header } from './components/Header';
@@ -34,15 +35,12 @@ function App() {
   // Ref to track if initial load has been triggered
   const initialLoadTriggeredRef = useRef(false);
 
-  useEffect(() => {
-    // Check if this is a callback from Authentik (after login)
-    if (window.location.pathname === '/callback') {
-      handleCallback().then(() => {
-        window.location.href = '/'; // Redirect to home after login
-      });
-      return;
-    }
+  // Check if this is the callback route - render dedicated component
+  if (window.location.pathname === '/callback') {
+    return <AuthCallback />;
+  }
 
+  useEffect(() => {
     // Load user data
     getUser().then(setUser);
   }, []);
diff --git a/crawler/frontend/src/auth/authService.ts b/crawler/frontend/src/auth/authService.ts
index 726dbd3..c63b253 100644
--- a/crawler/frontend/src/auth/authService.ts
+++ b/crawler/frontend/src/auth/authService.ts
@@ -1,11 +1,36 @@
 import { User, UserManager } from 'oidc-client-ts';
 import { oidcConfig } from './config';
+import { parseOidcError, type AuthError } from './errors';
 
 const userManager = new UserManager(oidcConfig);
 
-export const login = () => userManager.signinRedirect();
-export const logout = () => userManager.signoutRedirect();
-export const handleCallback = () => userManager.signinRedirectCallback();
+export const login = async (): Promise<void> => {
+    try {
+        await userManager.signinRedirect();
+    } catch (error) {
+        console.error('Login redirect failed:', error);
+        throw parseOidcError(error);
+    }
+};
+
+export const logout = async (): Promise<void> => {
+    try {
+        await userManager.signoutRedirect();
+    } catch (error) {
+        console.error('Logout redirect failed:', error);
+        throw parseOidcError(error);
+    }
+};
+
+export const handleCallback = async (): Promise<User> => {
+    try {
+        const user = await userManager.signinRedirectCallback();
+        return user;
+    } catch (error) {
+        console.error('Callback handling failed:', error);
+        throw parseOidcError(error);
+    }
+};
 
 export const getUser = async (): Promise<User | null> => {
     try {
@@ -16,3 +41,5 @@ export const getUser = async (): Promise<User | null> => {
         return null;
     }
 };
+
+export type { AuthError };
diff --git a/crawler/frontend/src/auth/errors.ts b/crawler/frontend/src/auth/errors.ts
new file mode 100644
index 0000000..ce82fc5
--- /dev/null
+++ b/crawler/frontend/src/auth/errors.ts
@@ -0,0 +1,60 @@
+export enum AuthErrorType {
+  REDIRECT_FAILED = 'REDIRECT_FAILED',
+  CALLBACK_FAILED = 'CALLBACK_FAILED',
+  NETWORK_ERROR = 'NETWORK_ERROR',
+  USER_CANCELLED = 'USER_CANCELLED',
+}
+
+export interface AuthError {
+  type: AuthErrorType;
+  message: string;
+  retryable: boolean;
+}
+
+export function parseOidcError(error: unknown): AuthError {
+  const errorMessage = error instanceof Error ? error.message : String(error);
+  const errorString = errorMessage.toLowerCase();
+
+  // Check for popup/redirect blocked errors
+  if (errorString.includes('popup') || errorString.includes('blocked') || errorString.includes('window')) {
+    return {
+      type: AuthErrorType.REDIRECT_FAILED,
+      message: 'Unable to redirect. Please check if popups are blocked.',
+      retryable: true,
+    };
+  }
+
+  // Check for user cancellation
+  if (errorString.includes('cancel') || errorString.includes('closed') || errorString.includes('denied')) {
+    return {
+      type: AuthErrorType.USER_CANCELLED,
+      message: 'Sign in was cancelled.',
+      retryable: true,
+    };
+  }
+
+  // Check for network errors
+  if (errorString.includes('network') || errorString.includes('fetch') || errorString.includes('timeout') || errorString.includes('failed to fetch')) {
+    return {
+      type: AuthErrorType.NETWORK_ERROR,
+      message: 'Unable to reach authentication server. Please check your connection.',
+      retryable: true,
+    };
+  }
+
+  // Check for callback/state errors
+  if (errorString.includes('state') || errorString.includes('invalid') || errorString.includes('mismatch') || errorString.includes('no matching state')) {
+    return {
+      type: AuthErrorType.CALLBACK_FAILED,
+      message: 'Login verification failed. Please try again.',
+      retryable: true,
+    };
+  }
+
+  // Default error
+  return {
+    type: AuthErrorType.CALLBACK_FAILED,
+    message: errorMessage || 'An unexpected error occurred during sign in.',
+    retryable: true,
+  };
+}
diff --git a/crawler/frontend/src/components/AuthCallback.tsx b/crawler/frontend/src/components/AuthCallback.tsx
new file mode 100644
index 0000000..165a4e3
--- /dev/null
+++ b/crawler/frontend/src/components/AuthCallback.tsx
@@ -0,0 +1,111 @@
+import React, { useEffect, useState } from 'react';
+import { handleCallback, login, type AuthError } from '@/auth/authService';
+import { Loader2, CheckCircle, AlertCircle, Home } from 'lucide-react';
+import { Button } from './ui/button';
+
+type CallbackState = 'processing' | 'success' | 'error';
+
+const AuthCallback: React.FC = () => {
+  const [state, setState] = useState<CallbackState>('processing');
+  const [error, setError] = useState<AuthError | null>(null);
+
+  useEffect(() => {
+    const processCallback = async () => {
+      try {
+        await handleCallback();
+        setState('success');
+        // Auto-redirect after success
+        setTimeout(() => {
+          window.location.href = '/';
+        }, 1500);
+      } catch (err) {
+        setError(err as AuthError);
+        setState('error');
+      }
+    };
+
+    processCallback();
+  }, []);
+
+  const handleRetry = async () => {
+    setState('processing');
+    setError(null);
+    try {
+      await login();
+    } catch (err) {
+      setError(err as AuthError);
+      setState('error');
+    }
+  };
+
+  const handleGoHome = () => {
+    window.location.href = '/';
+  };
+
+  return (
+    <div className="min-h-screen flex items-center justify-center bg-background p-4">
+      <div className="w-full max-w-md">
+        <div className="bg-card border rounded-xl shadow-lg p-8">
+          {state === 'processing' && (
+            <div className="text-center space-y-4">
+              <div className="flex justify-center">
+                <div className="p-4 bg-primary/10 rounded-full">
+                  <Loader2 className="h-8 w-8 text-primary animate-spin" />
+                </div>
+              </div>
+              <div className="space-y-2">
+                <h1 className="text-xl font-semibold">Completing Sign In</h1>
+                <p className="text-muted-foreground">
+                  Please wait while we verify your credentials...
+                </p>
+              </div>
+            </div>
+          )}
+
+          {state === 'success' && (
+            <div className="text-center space-y-4">
+              <div className="flex justify-center">
+                <div className="p-4 bg-green-500/10 rounded-full">
+                  <CheckCircle className="h-8 w-8 text-green-500" />
+                </div>
+              </div>
+              <div className="space-y-2">
+                <h1 className="text-xl font-semibold">Welcome Back!</h1>
+                <p className="text-muted-foreground">
+                  Redirecting you to the dashboard...
+                </p>
+              </div>
+            </div>
+          )}
+
+          {state === 'error' && (
+            <div className="text-center space-y-6">
+              <div className="flex justify-center">
+                <div className="p-4 bg-destructive/10 rounded-full">
+                  <AlertCircle className="h-8 w-8 text-destructive" />
+                </div>
+              </div>
+              <div className="space-y-2">
+                <h1 className="text-xl font-semibold">Sign In Failed</h1>
+                <p className="text-muted-foreground">
+                  {error?.message || 'An unexpected error occurred.'}
+                </p>
+              </div>
+              <div className="flex flex-col sm:flex-row gap-3 justify-center">
+                <Button onClick={handleRetry} className="gap-2">
+                  Try Again
+                </Button>
+                <Button variant="outline" onClick={handleGoHome} className="gap-2">
+                  <Home className="h-4 w-4" />
+                  Go Home
+                </Button>
+              </div>
+            </div>
+          )}
+        </div>
+      </div>
+    </div>
+  );
+};
+
+export default AuthCallback;
diff --git a/crawler/frontend/src/components/LoginModal.tsx b/crawler/frontend/src/components/LoginModal.tsx
index 556f9fd..1eaf4c5 100644
--- a/crawler/frontend/src/components/LoginModal.tsx
+++ b/crawler/frontend/src/components/LoginModal.tsx
@@ -1,43 +1,127 @@
-import { login } from '@/auth/authService';
+import { login, type AuthError } from '@/auth/authService';
 import { Button } from "@/components/ui/button";
 import { DialogDescription } from '@radix-ui/react-dialog';
 import React, { useState } from 'react';
 import { Dialog, DialogContent, DialogFooter, DialogHeader, DialogTitle } from './ui/dialog';
+import { Home, LogIn, AlertCircle, Loader2 } from 'lucide-react';
 
-interface ModalProps {
-    isOpen: boolean;
+interface LoginModalProps {
+  isOpen: boolean;
 }
 
-const Modal: React.FC<ModalProps> = ({
-    isOpen,
-}) => {
-    if (!isOpen) return null;
-    const [isLoading, setIsLoading] = useState(false)
+const LoginModal: React.FC<LoginModalProps> = ({ isOpen }) => {
+  const [isLoading, setIsLoading] = useState(false);
+  const [error, setError] = useState<AuthError | null>(null);
 
-    return (
-        <Dialog open={isOpen}>
-            <form>
-                <DialogContent className="sm:max-w-[425px]">
-                    <DialogHeader>
-                        <DialogTitle>Login to Wrongmove</DialogTitle>
-                        <DialogDescription>(We are currently in closed beta; ask Viktor to send you an invitation)</DialogDescription>
+  if (!isOpen) return null;
 
-                    </DialogHeader>
-                    <DialogFooter>
-                        {isLoading && (
-                            <div>Signing in. Please wait...</div>
-                        )
-                        }
-                        <Button onClick={
-                            () => {
-                                setIsLoading(true)
-                                login()
-                            }} disabled={isLoading}>Login</Button>
-                    </DialogFooter>
-                </DialogContent>
-            </form>
-        </Dialog>
-    )
+  const handleLogin = async () => {
+    setIsLoading(true);
+    setError(null);
+    try {
+      await login();
+    } catch (err) {
+      setError(err as AuthError);
+      setIsLoading(false);
+    }
+  };
+
+  const handleRetry = () => {
+    setError(null);
+    handleLogin();
+  };
+
+  const handleCancel = () => {
+    setError(null);
+    setIsLoading(false);
+  };
+
+  return (
+    <Dialog open={isOpen}>
+      <DialogContent className="sm:max-w-[425px]">
+        <DialogHeader className="space-y-4">
+          <div className="flex items-center gap-3">
+            <div className="p-2 bg-primary/10 rounded-lg">
+              <Home className="h-6 w-6 text-primary" />
+            </div>
+            <div>
+              <DialogTitle className="text-xl">Wrongmove</DialogTitle>
+              <DialogDescription className="text-sm text-muted-foreground">
+                Your smart property search companion
+              </DialogDescription>
+            </div>
+          </div>
+        </DialogHeader>
+
+        <div className="py-4 space-y-4">
+          {/* Beta Notice */}
+          <div className="bg-muted/50 border rounded-lg p-4 text-sm">
+            <p className="text-muted-foreground">
+              We are currently in closed beta. Please contact Viktor to request an invitation.
+            </p>
+          </div>
+
+          {/* Error State */}
+          {error && (
+            <div className="bg-destructive/10 border border-destructive/30 rounded-lg p-4 flex items-start gap-3">
+              <AlertCircle className="h-5 w-5 text-destructive shrink-0 mt-0.5" />
+              <div className="flex-1 space-y-2">
+                <p className="text-sm text-destructive">{error.message}</p>
+                <div className="flex gap-2">
+                  <Button
+                    size="sm"
+                    variant="outline"
+                    onClick={handleRetry}
+                    className="text-destructive border-destructive/30 hover:bg-destructive/10"
+                  >
+                    Try Again
+                  </Button>
+                  <Button
+                    size="sm"
+                    variant="ghost"
+                    onClick={handleCancel}
+                  >
+                    Cancel
+                  </Button>
+                </div>
+              </div>
+            </div>
+          )}
+
+          {/* Loading State */}
+          {isLoading && !error && (
+            <div className="flex items-center justify-center gap-3 py-4 text-muted-foreground">
+              <Loader2 className="h-5 w-5 animate-spin" />
+              <span>Redirecting to login...</span>
+            </div>
+          )}
+        </div>
+
+        <DialogFooter>
+          {!error && (
+            <Button
+              onClick={handleLogin}
+              disabled={isLoading}
+              className="w-full gap-2"
+              size="lg"
+            >
+              {isLoading ? (
+                <>
+                  <Loader2 className="h-4 w-4 animate-spin" />
+                  Signing in...
+                </>
+              ) : (
+                <>
+                  <LogIn className="h-4 w-4" />
+                  Sign in with SSO
+                </>
+              )}
+            </Button>
+          )}
+        </DialogFooter>
+      </DialogContent>
+    </Dialog>
+  );
 };
 
-export default Modal;
+export default LoginModal;

From e8293c60421cc3d335da41cb3d53bef11e25d22d Mon Sep 17 00:00:00 2001
From: Viktor Barzin <vbarzin@gmail.com>
Date: Mon, 2 Feb 2026 21:57:45 +0000
Subject: [PATCH 2/5] Add intelligent query splitting to maximize Rightmove
 data extraction

---
 crawler/.env.sample                       |   9 +
 crawler/CLAUDE.md                         | 233 ++++++++++
 crawler/config/__init__.py                |   3 +-
 crawler/config/scraper_config.py          |  65 +++
 crawler/poetry.lock                       | 526 +++++++++++++++++++++-
 crawler/pyproject.toml                    |  31 +-
 crawler/rec/query.py                      | 190 +++++++-
 crawler/services/listing_fetcher.py       | 146 ++++++
 crawler/services/query_splitter.py        | 303 +++++++++++++
 crawler/tasks/listing_tasks.py            | 203 +++++----
 crawler/tests/unit/test_query_splitter.py | 374 +++++++++++++++
 11 files changed, 1970 insertions(+), 113 deletions(-)
 create mode 100644 crawler/CLAUDE.md
 create mode 100644 crawler/config/scraper_config.py
 create mode 100644 crawler/services/listing_fetcher.py
 create mode 100644 crawler/services/query_splitter.py
 create mode 100644 crawler/tests/unit/test_query_splitter.py

diff --git a/crawler/.env.sample b/crawler/.env.sample
index 1e7fe00..aa1c93d 100644
--- a/crawler/.env.sample
+++ b/crawler/.env.sample
@@ -7,6 +7,15 @@ export DB_CONNECTION_STRING="sqlite:///data/wrongmove.db" # by default use SQLit
 export CELERY_BROKER_URL="redis://localhost:6379/0" # processing background tasks
 export CELERY_RESULT_BACKEND="redis://localhost:6379/1"
 
+# Rightmove scraper configuration
+# These settings control query splitting to work around Rightmove's ~1500 result cap
+RIGHTMOVE_MAX_CONCURRENT=5        # Max concurrent HTTP requests
+RIGHTMOVE_REQUEST_DELAY_MS=100    # Delay between requests in milliseconds
+RIGHTMOVE_SPLIT_THRESHOLD=1200    # Split query when results exceed this threshold
+RIGHTMOVE_MIN_PRICE_BAND=100      # Minimum price band width (won't split below this)
+RIGHTMOVE_MAX_PAGES=60            # Max pages per subquery (60 * 25 = 1500 max results)
+RIGHTMOVE_PROXY_URL=              # Optional SOCKS proxy URL (e.g., socks5://localhost:9050 for Tor)
+
 # Periodic scraping schedules (JSON array)
 # Each schedule has: name, enabled, hour, minute, day_of_week, listing_type, min/max_bedrooms, min/max_price, district_names, furnish_types
 # Cron fields: minute (0-59), hour (0-23), day_of_week (0-6, 0=Sunday)
diff --git a/crawler/CLAUDE.md b/crawler/CLAUDE.md
new file mode 100644
index 0000000..14714ab
--- /dev/null
+++ b/crawler/CLAUDE.md
@@ -0,0 +1,233 @@
+# CLAUDE.md
+
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
+
+## Project Overview
+
+A real estate listing crawler and aggregator that scrapes property listings from Rightmove UK, extracts square meter data from floorplan images using OCR, calculates transit routes, and provides a web UI for browsing listings.
+
+## Development Environment
+
+**IMPORTANT**: This project runs on a remote host, not locally. Always use the remote executor to run commands:
+
+- **All shell commands** (Python, pytest, poetry, alembic, etc.) must be executed via the remote executor
+- **Starting the project**: Use the remote executor to run `./start.sh`
+- **Running tests**: Use the remote executor to run `pytest`
+- **Any CLI operations**: Use the remote executor to run `python main.py ...`
+
+Never run commands directly on the local machine - always route them through the remote executor.
+
+## Commands
+
+### Setup and Run (Docker - Recommended)
+
+```bash
+# Start all services (Redis, MySQL, API, Celery) with Docker
+./start.sh
+
+# Rebuild images and start
+./start.sh --build
+
+# Stop all containers
+./start.sh --down
+
+# View logs
+./start.sh --logs
+```
+
+### Setup and Run (Local with Poetry)
+
+```bash
+# Install dependencies
+poetry install && cp .env.sample .env
+
+# Start backend locally (requires Redis running)
+./start.sh --local
+
+# Start frontend (from frontend/ directory)
+cd frontend && ./start.sh
+```
+
+### CLI Operations
+
+The main CLI (`main.py`) uses Click with a `--data-dir` option (default: `data/rs/`):
+
+```bash
+# Dump listings from Rightmove API
+python main.py dump-listings --type rent --min-price 2000 --max-price 4000 --min-bedrooms 2
+
+# Download floorplan images
+python main.py dump-images
+
+# Extract square meters from floorplans using OCR
+python main.py detect-floorplan
+
+# Calculate transit routes (consumes Google Maps API calls)
+python main.py routing --destination-address 'Address' -m transit -l 10
+
+# Export to GeoJSON for visualization
+python main.py export-immoweb -O output.js --type rent [filter options]
+```
+
+### Testing
+
+```bash
+# Run tests with coverage
+pytest tests/ -v --cov=. --cov-report=term-missing
+
+# Run type checker
+mypy .
+```
+
+### Database Migrations
+
+```bash
+alembic upgrade head    # Apply migrations
+alembic revision -m "description"  # Create new migration
+```
+
+### Code Formatting
+
+```bash
+yapf --style .style.yapf --recursive .
+```
+
+## Architecture
+
+### Core Data Flow
+
+1. **Scraping** (`rec/query.py`): Fetches listing IDs and details from Rightmove's Android API
+2. **Processing** (`listing_processor.py`): Pipeline with steps for fetching details, downloading images, and OCR detection
+3. **Storage**: SQLModel/SQLAlchemy with MySQL or SQLite, plus JSON files in `data/rs/<listing_id>/`
+4. **API** (`api/app.py`): FastAPI endpoints authenticated via JWT from external Authentik service
+5. **Background Tasks** (`tasks/listing_tasks.py`): Celery tasks for async listing processing with Redis broker
+
+### Key Models
+
+- `models/listing.py`: SQLModel entities (`RentListing`, `BuyListing`) with `QueryParameters` for filtering
+- `data_access.py`: **DEPRECATED** - Legacy `Listing` dataclass for filesystem-based data access. Use `models.listing.RentListing` or `models.listing.BuyListing` instead.
+
+### Services Layer (Unified CLI and API)
+
+**IMPORTANT**: The `services/` directory contains unified handler functions that both the CLI and HTTP API use. This ensures consistency and code reuse.
+
+#### High-level services (use these in CLI and API):
+- **`listing_service.py`**: Listing operations
+  - `get_listings()` - Retrieve listings from database
+  - `refresh_listings()` - Fetch new listings from Rightmove (sync or async)
+  - `download_images()` - Download floorplan images
+  - `detect_floorplans()` - Run OCR on floorplans
+  - `calculate_routes()` - Calculate transit routes
+
+- **`export_service.py`**: Export operations
+  - `export_to_csv()` - Export listings to CSV file
+  - `export_to_geojson()` - Export listings to GeoJSON (file or in-memory)
+
+- **`district_service.py`**: District management
+  - `get_all_districts()` - Get district name → region ID mapping
+  - `get_district_names()` - Get list of district names
+  - `validate_districts()` - Validate district names
+
+- **`task_service.py`**: Background task management
+  - `get_task_status()` - Get Celery task status
+  - `get_user_tasks()` - Get all tasks for a user
+  - `add_task_for_user()` - Associate task with user
+
+#### Low-level services (internal implementation):
+- `listing_fetcher.py`: Fetches listing data from Rightmove API
+- `image_fetcher.py`: Downloads floorplan images
+- `floorplan_detector.py`: OCR-based square meter detection
+- `route_calculator.py`: Calculates transit routes using Google Maps API
+- `query_splitter.py`: Intelligent query splitting to maximize data extraction
+
+### Query Splitting System
+
+Rightmove's API caps search results at ~1,500 listings per query. The query splitting system works around this limitation to fetch **all matching listings**.
+
+#### How it works:
+
+1. **Initial Split**: Queries are split by district and bedroom count
+2. **Probe**: Each subquery is probed (minimal API request) to get `totalAvailableResults`
+3. **Adaptive Split**: If results exceed threshold (1,200), the price range is binary-split
+4. **Recursive Refinement**: Splitting continues until all subqueries are under threshold
+5. **Full Fetch**: Each subquery fetches up to 60 pages (1,500 results max)
+
+```
+Original: 2BR, £1000-£5000 → 3,000 results (over cap!)
+              ↓ split by price
+£1000-£3000: 1,800 (still over!)  |  £3000-£5000: 1,200 ✓
+        ↓ split again
+£1000-£2000: 900 ✓  |  £2000-£3000: 900 ✓
+
+Final: 3 subqueries → 900 + 900 + 1,200 = 3,000 total results ✓
+```
+
+#### Key components:
+- `config/scraper_config.py`: Configuration with env var loading
+- `services/query_splitter.py`: `QuerySplitter` class with `SubQuery` dataclass
+- `rec/query.py`: `probe_query()` for result count probing, `create_session()` for connection pooling
+
+### Processing Pipeline
+
+`ListingProcessor` runs sequential steps defined in `listing_processor.py`:
+1. `FetchListingDetailsStep` - Get property details from API
+2. `FetchImagesStep` - Download floorplan images
+3. `DetectFloorplanStep` - OCR to extract square meters from floorplans
+
+### Floorplan OCR
+
+`rec/floorplan.py` uses pytesseract with image preprocessing (adaptive thresholding) to extract square meter values from floorplan images.
+
+### Repository Pattern
+
+`repositories/listing_repository.py` handles database operations with SQLModel sessions.
+
+## Environment Variables
+
+- `DB_CONNECTION_STRING`: Database URL (SQLite default: `sqlite:///data/wrongmove.db`)
+- `CELERY_BROKER_URL` / `CELERY_RESULT_BACKEND`: Redis URLs
+- `ROUTING_API_KEY`: Google Maps API key for transit routing
+
+### Scraper Configuration
+
+These control the query splitting behavior (see `.env.sample` for defaults):
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `RIGHTMOVE_MAX_CONCURRENT` | 5 | Max concurrent HTTP requests |
+| `RIGHTMOVE_REQUEST_DELAY_MS` | 100 | Delay between requests (ms) |
+| `RIGHTMOVE_SPLIT_THRESHOLD` | 1200 | Split query when results exceed this |
+| `RIGHTMOVE_MIN_PRICE_BAND` | 100 | Minimum price band width (won't split below) |
+| `RIGHTMOVE_MAX_PAGES` | 60 | Max pages per subquery (60 × 25 = 1500) |
+| `RIGHTMOVE_PROXY_URL` | - | SOCKS proxy URL (e.g., `socks5://localhost:9050` for Tor) |
+
+## Project Structure
+
+- `main.py`: CLI entry point
+- `api/`: FastAPI application with auth middleware
+- `config/`: Configuration modules (scraper settings, scheduled tasks)
+- `models/`: SQLModel database entities
+- `repositories/`: Database access layer
+- `rec/`: Core business logic (query, floorplan OCR, routing, districts)
+- `services/`: Service layer modules (listing_fetcher, image_fetcher, floorplan_detector, route_calculator, query_splitter)
+- `tasks/`: Celery background tasks
+- `frontend/`: React/Vite frontend with Caddy proxy
+- `alembic/`: Database migrations
+- `tests/`: Test suite (unit and integration tests)
+
+## Type Checking
+
+The project uses strict mypy configuration with `disallow_untyped_defs=true`. Run `mypy .` to check types.
+
+## Exploration Preferences
+
+- Always ignore `node_modules` directory when exploring the codebase
+
+## Git Workflow
+
+**IMPORTANT**: After completing work items, always create separate commits for each logical change:
+- Keep each commit focused on one feature/fix
+- Do not include unrelated files
+- Use descriptive commit messages
+- Group related files together (e.g., tests with the code they test)
+
diff --git a/crawler/config/__init__.py b/crawler/config/__init__.py
index 315e8c3..b82264c 100644
--- a/crawler/config/__init__.py
+++ b/crawler/config/__init__.py
@@ -1,4 +1,5 @@
 """Configuration modules."""
 from config.schedule_config import ScheduleConfig, SchedulesConfig
+from config.scraper_config import ScraperConfig
 
-__all__ = ["ScheduleConfig", "SchedulesConfig"]
+__all__ = ["ScheduleConfig", "SchedulesConfig", "ScraperConfig"]
diff --git a/crawler/config/scraper_config.py b/crawler/config/scraper_config.py
new file mode 100644
index 0000000..e84c1d5
--- /dev/null
+++ b/crawler/config/scraper_config.py
@@ -0,0 +1,65 @@
+"""Scraper configuration with environment variable loading."""
+from __future__ import annotations
+
+import os
+from dataclasses import dataclass
+from typing import Self
+
+
+@dataclass(frozen=True)
+class ScraperConfig:
+    """Configuration for the Rightmove scraper.
+
+    Attributes:
+        max_concurrent_requests: Maximum number of concurrent HTTP requests.
+        request_delay_ms: Delay between requests in milliseconds.
+        result_cap: Maximum results Rightmove returns per query (their limit).
+        split_threshold: When results exceed this, split the query further.
+        min_price_band: Minimum width of a price band (won't split below this).
+        max_pages_per_query: Maximum pages to fetch per subquery (60 * 25 = 1500).
+        proxy_url: Optional SOCKS proxy URL (e.g., socks5://localhost:9050 for Tor).
+    """
+
+    max_concurrent_requests: int = 5
+    request_delay_ms: int = 100
+    result_cap: int = 1500
+    split_threshold: int = 1200  # Split when approaching cap
+    min_price_band: int = 100  # Minimum band width in currency units
+    max_pages_per_query: int = 60  # 60 * 25 = 1500 results max
+    proxy_url: str | None = None
+
+    @classmethod
+    def from_env(cls) -> Self:
+        """Load configuration from environment variables.
+
+        Environment variables:
+            RIGHTMOVE_MAX_CONCURRENT: Max concurrent requests (default: 5)
+            RIGHTMOVE_REQUEST_DELAY_MS: Request delay in ms (default: 100)
+            RIGHTMOVE_RESULT_CAP: Result cap per query (default: 1500)
+            RIGHTMOVE_SPLIT_THRESHOLD: Split threshold (default: 1200)
+            RIGHTMOVE_MIN_PRICE_BAND: Minimum price band width (default: 100)
+            RIGHTMOVE_MAX_PAGES: Max pages per query (default: 60)
+            RIGHTMOVE_PROXY_URL: SOCKS proxy URL (default: None)
+
+        Returns:
+            ScraperConfig instance with values from environment or defaults.
+        """
+        return cls(
+            max_concurrent_requests=int(
+                os.environ.get("RIGHTMOVE_MAX_CONCURRENT", "5")
+            ),
+            request_delay_ms=int(
+                os.environ.get("RIGHTMOVE_REQUEST_DELAY_MS", "100")
+            ),
+            result_cap=int(os.environ.get("RIGHTMOVE_RESULT_CAP", "1500")),
+            split_threshold=int(
+                os.environ.get("RIGHTMOVE_SPLIT_THRESHOLD", "1200")
+            ),
+            min_price_band=int(
+                os.environ.get("RIGHTMOVE_MIN_PRICE_BAND", "100")
+            ),
+            max_pages_per_query=int(
+                os.environ.get("RIGHTMOVE_MAX_PAGES", "60")
+            ),
+            proxy_url=os.environ.get("RIGHTMOVE_PROXY_URL") or None,
+        )
diff --git a/crawler/poetry.lock b/crawler/poetry.lock
index 87eabca..c822ef9 100644
--- a/crawler/poetry.lock
+++ b/crawler/poetry.lock
@@ -6,7 +6,7 @@ version = "2.6.1"
 description = "Happy Eyeballs for asyncio"
 optional = false
 python-versions = ">=3.9"
-groups = ["main"]
+groups = ["main", "dev"]
 files = [
     {file = "aiohappyeyeballs-2.6.1-py3-none-any.whl", hash = "sha256:f349ba8f4b75cb25c99c5c2d84e997e485204d2902a9597802b0371f09331fb8"},
     {file = "aiohappyeyeballs-2.6.1.tar.gz", hash = "sha256:c3f9d0113123803ccadfdf3f0faa505bc78e6a72d1cc4806cbd719826e943558"},
@@ -18,7 +18,7 @@ version = "3.12.13"
 description = "Async http client/server framework (asyncio)"
 optional = false
 python-versions = ">=3.9"
-groups = ["main"]
+groups = ["main", "dev"]
 files = [
     {file = "aiohttp-3.12.13-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:5421af8f22a98f640261ee48aae3a37f0c41371e99412d55eaf2f8a46d5dad29"},
     {file = "aiohttp-3.12.13-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0fcda86f6cb318ba36ed8f1396a6a4a3fd8f856f84d426584392083d10da4de0"},
@@ -120,13 +120,29 @@ yarl = ">=1.17.0,<2.0"
 [package.extras]
 speedups = ["Brotli ; platform_python_implementation == \"CPython\"", "aiodns (>=3.3.0)", "brotlicffi ; platform_python_implementation != \"CPython\""]
 
+[[package]]
+name = "aioresponses"
+version = "0.7.8"
+description = "Mock out requests made by ClientSession from aiohttp package"
+optional = false
+python-versions = "*"
+groups = ["dev"]
+files = [
+    {file = "aioresponses-0.7.8-py2.py3-none-any.whl", hash = "sha256:b73bd4400d978855e55004b23a3a84cb0f018183bcf066a85ad392800b5b9a94"},
+    {file = "aioresponses-0.7.8.tar.gz", hash = "sha256:b861cdfe5dc58f3b8afac7b0a6973d5d7b2cb608dd0f6253d16b8ee8eaf6df11"},
+]
+
+[package.dependencies]
+aiohttp = ">=3.3.0,<4.0.0"
+packaging = ">=22.0"
+
 [[package]]
 name = "aiosignal"
 version = "1.3.2"
 description = "aiosignal: a list of registered asynchronous callbacks"
 optional = false
 python-versions = ">=3.9"
-groups = ["main"]
+groups = ["main", "dev"]
 files = [
     {file = "aiosignal-1.3.2-py2.py3-none-any.whl", hash = "sha256:45cde58e409a301715980c2b01d0c28bdde3770d8290b5eb2173759d9acb31a5"},
     {file = "aiosignal-1.3.2.tar.gz", hash = "sha256:a8c255c66fafb1e499c9351d0bf32ff2d8a0321595ebac3b93713656d2436f54"},
@@ -400,7 +416,7 @@ version = "5.0.1"
 description = "Timeout context manager for asyncio programs"
 optional = false
 python-versions = ">=3.8"
-groups = ["main"]
+groups = ["main", "dev"]
 markers = "python_version == \"3.11\" and python_full_version < \"3.11.3\""
 files = [
     {file = "async_timeout-5.0.1-py3-none-any.whl", hash = "sha256:39e3809566ff85354557ec2398b55e096c8364bacac9405a7a1fa429e77fe76c"},
@@ -932,13 +948,118 @@ mypy = ["bokeh", "contourpy[bokeh,docs]", "docutils-stubs", "mypy (==1.15.0)", "
 test = ["Pillow", "contourpy[test-no-images]", "matplotlib"]
 test-no-images = ["pytest", "pytest-cov", "pytest-rerunfailures", "pytest-xdist", "wurlitzer"]
 
+[[package]]
+name = "coverage"
+version = "7.13.2"
+description = "Code coverage measurement for Python"
+optional = false
+python-versions = ">=3.10"
+groups = ["dev"]
+files = [
+    {file = "coverage-7.13.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f4af3b01763909f477ea17c962e2cca8f39b350a4e46e3a30838b2c12e31b81b"},
+    {file = "coverage-7.13.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:36393bd2841fa0b59498f75466ee9bdec4f770d3254f031f23e8fd8e140ffdd2"},
+    {file = "coverage-7.13.2-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:9cc7573518b7e2186bd229b1a0fe24a807273798832c27032c4510f47ffdb896"},
+    {file = "coverage-7.13.2-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:ca9566769b69a5e216a4e176d54b9df88f29d750c5b78dbb899e379b4e14b30c"},
+    {file = "coverage-7.13.2-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9c9bdea644e94fd66d75a6f7e9a97bb822371e1fe7eadae2cacd50fcbc28e4dc"},
+    {file = "coverage-7.13.2-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:5bd447332ec4f45838c1ad42268ce21ca87c40deb86eabd59888859b66be22a5"},
+    {file = "coverage-7.13.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:7c79ad5c28a16a1277e1187cf83ea8dafdcc689a784228a7d390f19776db7c31"},
+    {file = "coverage-7.13.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:76e06ccacd1fb6ada5d076ed98a8c6f66e2e6acd3df02819e2ee29fd637b76ad"},
+    {file = "coverage-7.13.2-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:49d49e9a5e9f4dc3d3dac95278a020afa6d6bdd41f63608a76fa05a719d5b66f"},
+    {file = "coverage-7.13.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:ed2bce0e7bfa53f7b0b01c722da289ef6ad4c18ebd52b1f93704c21f116360c8"},
+    {file = "coverage-7.13.2-cp310-cp310-win32.whl", hash = "sha256:1574983178b35b9af4db4a9f7328a18a14a0a0ce76ffaa1c1bacb4cc82089a7c"},
+    {file = "coverage-7.13.2-cp310-cp310-win_amd64.whl", hash = "sha256:a360a8baeb038928ceb996f5623a4cd508728f8f13e08d4e96ce161702f3dd99"},
+    {file = "coverage-7.13.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:060ebf6f2c51aff5ba38e1f43a2095e087389b1c69d559fde6049a4b0001320e"},
+    {file = "coverage-7.13.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c1ea8ca9db5e7469cd364552985e15911548ea5b69c48a17291f0cac70484b2e"},
+    {file = "coverage-7.13.2-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:b780090d15fd58f07cf2011943e25a5f0c1c894384b13a216b6c86c8a8a7c508"},
+    {file = "coverage-7.13.2-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:88a800258d83acb803c38175b4495d293656d5fac48659c953c18e5f539a274b"},
+    {file = "coverage-7.13.2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6326e18e9a553e674d948536a04a80d850a5eeefe2aae2e6d7cf05d54046c01b"},
+    {file = "coverage-7.13.2-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:59562de3f797979e1ff07c587e2ac36ba60ca59d16c211eceaa579c266c5022f"},
+    {file = "coverage-7.13.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:27ba1ed6f66b0e2d61bfa78874dffd4f8c3a12f8e2b5410e515ab345ba7bc9c3"},
+    {file = "coverage-7.13.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:8be48da4d47cc68754ce643ea50b3234557cbefe47c2f120495e7bd0a2756f2b"},
+    {file = "coverage-7.13.2-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:2a47a4223d3361b91176aedd9d4e05844ca67d7188456227b6bf5e436630c9a1"},
+    {file = "coverage-7.13.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:c6f141b468740197d6bd38f2b26ade124363228cc3f9858bd9924ab059e00059"},
+    {file = "coverage-7.13.2-cp311-cp311-win32.whl", hash = "sha256:89567798404af067604246e01a49ef907d112edf2b75ef814b1364d5ce267031"},
+    {file = "coverage-7.13.2-cp311-cp311-win_amd64.whl", hash = "sha256:21dd57941804ae2ac7e921771a5e21bbf9aabec317a041d164853ad0a96ce31e"},
+    {file = "coverage-7.13.2-cp311-cp311-win_arm64.whl", hash = "sha256:10758e0586c134a0bafa28f2d37dd2cdb5e4a90de25c0fc0c77dabbad46eca28"},
+    {file = "coverage-7.13.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f106b2af193f965d0d3234f3f83fc35278c7fb935dfbde56ae2da3dd2c03b84d"},
+    {file = "coverage-7.13.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:78f45d21dc4d5d6bd29323f0320089ef7eae16e4bef712dff79d184fa7330af3"},
+    {file = "coverage-7.13.2-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:fae91dfecd816444c74531a9c3d6ded17a504767e97aa674d44f638107265b99"},
+    {file = "coverage-7.13.2-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:264657171406c114787b441484de620e03d8f7202f113d62fcd3d9688baa3e6f"},
+    {file = "coverage-7.13.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ae47d8dcd3ded0155afbb59c62bd8ab07ea0fd4902e1c40567439e6db9dcaf2f"},
+    {file = "coverage-7.13.2-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:8a0b33e9fd838220b007ce8f299114d406c1e8edb21336af4c97a26ecfd185aa"},
+    {file = "coverage-7.13.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b3becbea7f3ce9a2d4d430f223ec15888e4deb31395840a79e916368d6004cce"},
+    {file = "coverage-7.13.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:f819c727a6e6eeb8711e4ce63d78c620f69630a2e9d53bc95ca5379f57b6ba94"},
+    {file = "coverage-7.13.2-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:4f7b71757a3ab19f7ba286e04c181004c1d61be921795ee8ba6970fd0ec91da5"},
+    {file = "coverage-7.13.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b7fc50d2afd2e6b4f6f2f403b70103d280a8e0cb35320cbbe6debcda02a1030b"},
+    {file = "coverage-7.13.2-cp312-cp312-win32.whl", hash = "sha256:292250282cf9bcf206b543d7608bda17ca6fc151f4cbae949fc7e115112fbd41"},
+    {file = "coverage-7.13.2-cp312-cp312-win_amd64.whl", hash = "sha256:eeea10169fac01549a7921d27a3e517194ae254b542102267bef7a93ed38c40e"},
+    {file = "coverage-7.13.2-cp312-cp312-win_arm64.whl", hash = "sha256:2a5b567f0b635b592c917f96b9a9cb3dbd4c320d03f4bf94e9084e494f2e8894"},
+    {file = "coverage-7.13.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ed75de7d1217cf3b99365d110975f83af0528c849ef5180a12fd91b5064df9d6"},
+    {file = "coverage-7.13.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:97e596de8fa9bada4d88fde64a3f4d37f1b6131e4faa32bad7808abc79887ddc"},
+    {file = "coverage-7.13.2-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:68c86173562ed4413345410c9480a8d64864ac5e54a5cda236748031e094229f"},
+    {file = "coverage-7.13.2-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:7be4d613638d678b2b3773b8f687537b284d7074695a43fe2fbbfc0e31ceaed1"},
+    {file = "coverage-7.13.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d7f63ce526a96acd0e16c4af8b50b64334239550402fb1607ce6a584a6d62ce9"},
+    {file = "coverage-7.13.2-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:406821f37f864f968e29ac14c3fccae0fec9fdeba48327f0341decf4daf92d7c"},
+    {file = "coverage-7.13.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ee68e5a4e3e5443623406b905db447dceddffee0dceb39f4e0cd9ec2a35004b5"},
+    {file = "coverage-7.13.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:2ee0e58cca0c17dd9c6c1cdde02bb705c7b3fbfa5f3b0b5afeda20d4ebff8ef4"},
+    {file = "coverage-7.13.2-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:6e5bbb5018bf76a56aabdb64246b5288d5ae1b7d0dd4d0534fe86df2c2992d1c"},
+    {file = "coverage-7.13.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a55516c68ef3e08e134e818d5e308ffa6b1337cc8b092b69b24287bf07d38e31"},
+    {file = "coverage-7.13.2-cp313-cp313-win32.whl", hash = "sha256:5b20211c47a8abf4abc3319d8ce2464864fa9f30c5fcaf958a3eed92f4f1fef8"},
+    {file = "coverage-7.13.2-cp313-cp313-win_amd64.whl", hash = "sha256:14f500232e521201cf031549fb1ebdfc0a40f401cf519157f76c397e586c3beb"},
+    {file = "coverage-7.13.2-cp313-cp313-win_arm64.whl", hash = "sha256:9779310cb5a9778a60c899f075a8514c89fa6d10131445c2207fc893e0b14557"},
+    {file = "coverage-7.13.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:e64fa5a1e41ce5df6b547cbc3d3699381c9e2c2c369c67837e716ed0f549d48e"},
+    {file = "coverage-7.13.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b01899e82a04085b6561eb233fd688474f57455e8ad35cd82286463ba06332b7"},
+    {file = "coverage-7.13.2-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:838943bea48be0e2768b0cf7819544cdedc1bbb2f28427eabb6eb8c9eb2285d3"},
+    {file = "coverage-7.13.2-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:93d1d25ec2b27e90bcfef7012992d1f5121b51161b8bffcda756a816cf13c2c3"},
+    {file = "coverage-7.13.2-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:93b57142f9621b0d12349c43fc7741fe578e4bc914c1e5a54142856cfc0bf421"},
+    {file = "coverage-7.13.2-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f06799ae1bdfff7ccb8665d75f8291c69110ba9585253de254688aa8a1ccc6c5"},
+    {file = "coverage-7.13.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:7f9405ab4f81d490811b1d91c7a20361135a2df4c170e7f0b747a794da5b7f23"},
+    {file = "coverage-7.13.2-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:f9ab1d5b86f8fbc97a5b3cd6280a3fd85fef3b028689d8a2c00918f0d82c728c"},
+    {file = "coverage-7.13.2-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:f674f59712d67e841525b99e5e2b595250e39b529c3bda14764e4f625a3fa01f"},
+    {file = "coverage-7.13.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:c6cadac7b8ace1ba9144feb1ae3cb787a6065ba6d23ffc59a934b16406c26573"},
+    {file = "coverage-7.13.2-cp313-cp313t-win32.whl", hash = "sha256:14ae4146465f8e6e6253eba0cccd57423e598a4cb925958b240c805300918343"},
+    {file = "coverage-7.13.2-cp313-cp313t-win_amd64.whl", hash = "sha256:9074896edd705a05769e3de0eac0a8388484b503b68863dd06d5e473f874fd47"},
+    {file = "coverage-7.13.2-cp313-cp313t-win_arm64.whl", hash = "sha256:69e526e14f3f854eda573d3cf40cffd29a1a91c684743d904c33dbdcd0e0f3e7"},
+    {file = "coverage-7.13.2-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:387a825f43d680e7310e6f325b2167dd093bc8ffd933b83e9aa0983cf6e0a2ef"},
+    {file = "coverage-7.13.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:f0d7fea9d8e5d778cd5a9e8fc38308ad688f02040e883cdc13311ef2748cb40f"},
+    {file = "coverage-7.13.2-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:e080afb413be106c95c4ee96b4fffdc9e2fa56a8bbf90b5c0918e5c4449412f5"},
+    {file = "coverage-7.13.2-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:a7fc042ba3c7ce25b8a9f097eb0f32a5ce1ccdb639d9eec114e26def98e1f8a4"},
+    {file = "coverage-7.13.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d0ba505e021557f7f8173ee8cd6b926373d8653e5ff7581ae2efce1b11ef4c27"},
+    {file = "coverage-7.13.2-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:7de326f80e3451bd5cc7239ab46c73ddb658fe0b7649476bc7413572d36cd548"},
+    {file = "coverage-7.13.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:abaea04f1e7e34841d4a7b343904a3f59481f62f9df39e2cd399d69a187a9660"},
+    {file = "coverage-7.13.2-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:9f93959ee0c604bccd8e0697be21de0887b1f73efcc3aa73a3ec0fd13feace92"},
+    {file = "coverage-7.13.2-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:13fe81ead04e34e105bf1b3c9f9cdf32ce31736ee5d90a8d2de02b9d3e1bcb82"},
+    {file = "coverage-7.13.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d6d16b0f71120e365741bca2cb473ca6fe38930bc5431c5e850ba949f708f892"},
+    {file = "coverage-7.13.2-cp314-cp314-win32.whl", hash = "sha256:9b2f4714bb7d99ba3790ee095b3b4ac94767e1347fe424278a0b10acb3ff04fe"},
+    {file = "coverage-7.13.2-cp314-cp314-win_amd64.whl", hash = "sha256:e4121a90823a063d717a96e0a0529c727fb31ea889369a0ee3ec00ed99bf6859"},
+    {file = "coverage-7.13.2-cp314-cp314-win_arm64.whl", hash = "sha256:6873f0271b4a15a33e7590f338d823f6f66f91ed147a03938d7ce26efd04eee6"},
+    {file = "coverage-7.13.2-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:f61d349f5b7cd95c34017f1927ee379bfbe9884300d74e07cf630ccf7a610c1b"},
+    {file = "coverage-7.13.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:a43d34ce714f4ca674c0d90beb760eb05aad906f2c47580ccee9da8fe8bfb417"},
+    {file = "coverage-7.13.2-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:bff1b04cb9d4900ce5c56c4942f047dc7efe57e2608cb7c3c8936e9970ccdbee"},
+    {file = "coverage-7.13.2-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:6ae99e4560963ad8e163e819e5d77d413d331fd00566c1e0856aa252303552c1"},
+    {file = "coverage-7.13.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e79a8c7d461820257d9aa43716c4efc55366d7b292e46b5b37165be1d377405d"},
+    {file = "coverage-7.13.2-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:060ee84f6a769d40c492711911a76811b4befb6fba50abb450371abb720f5bd6"},
+    {file = "coverage-7.13.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:3bca209d001fd03ea2d978f8a4985093240a355c93078aee3f799852c23f561a"},
+    {file = "coverage-7.13.2-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:6b8092aa38d72f091db61ef83cb66076f18f02da3e1a75039a4f218629600e04"},
+    {file = "coverage-7.13.2-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:4a3158dc2dcce5200d91ec28cd315c999eebff355437d2765840555d765a6e5f"},
+    {file = "coverage-7.13.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:3973f353b2d70bd9796cc12f532a05945232ccae966456c8ed7034cb96bbfd6f"},
+    {file = "coverage-7.13.2-cp314-cp314t-win32.whl", hash = "sha256:79f6506a678a59d4ded048dc72f1859ebede8ec2b9a2d509ebe161f01c2879d3"},
+    {file = "coverage-7.13.2-cp314-cp314t-win_amd64.whl", hash = "sha256:196bfeabdccc5a020a57d5a368c681e3a6ceb0447d153aeccc1ab4d70a5032ba"},
+    {file = "coverage-7.13.2-cp314-cp314t-win_arm64.whl", hash = "sha256:69269ab58783e090bfbf5b916ab3d188126e22d6070bbfc93098fdd474ef937c"},
+    {file = "coverage-7.13.2-py3-none-any.whl", hash = "sha256:40ce1ea1e25125556d8e76bd0b61500839a07944cc287ac21d5626f3e620cad5"},
+    {file = "coverage-7.13.2.tar.gz", hash = "sha256:044c6951ec37146b72a50cc81ef02217d27d4c3640efd2640311393cbbf143d3"},
+]
+
+[package.extras]
+toml = ["tomli ; python_full_version <= \"3.11.0a6\""]
+
 [[package]]
 name = "cryptography"
 version = "45.0.4"
 description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers."
 optional = false
 python-versions = "!=3.9.0,!=3.9.1,>=3.7"
-groups = ["main"]
+groups = ["main", "dev"]
 files = [
     {file = "cryptography-45.0.4-cp311-abi3-macosx_10_9_universal2.whl", hash = "sha256:425a9a6ac2823ee6e46a76a21a4e8342d8fa5c01e08b823c1f19a8b74f096069"},
     {file = "cryptography-45.0.4-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:680806cf63baa0039b920f4976f5f31b10e772de42f16310a6839d9f21a26b0d"},
@@ -1132,6 +1253,30 @@ files = [
 [package.extras]
 tests = ["asttokens (>=2.1.0)", "coverage", "coverage-enable-subprocess", "ipython", "littleutils", "pytest", "rich ; python_version >= \"3.11\""]
 
+[[package]]
+name = "fakeredis"
+version = "2.33.0"
+description = "Python implementation of redis API, can be used for testing purposes."
+optional = false
+python-versions = ">=3.7"
+groups = ["dev"]
+files = [
+    {file = "fakeredis-2.33.0-py3-none-any.whl", hash = "sha256:de535f3f9ccde1c56672ab2fdd6a8efbc4f2619fc2f1acc87b8737177d71c965"},
+    {file = "fakeredis-2.33.0.tar.gz", hash = "sha256:d7bc9a69d21df108a6451bbffee23b3eba432c21a654afc7ff2d295428ec5770"},
+]
+
+[package.dependencies]
+redis = {version = ">=4.3", markers = "python_version > \"3.8\""}
+sortedcontainers = ">=2"
+
+[package.extras]
+bf = ["pyprobables (>=0.6)"]
+cf = ["pyprobables (>=0.6)"]
+json = ["jsonpath-ng (>=1.6)"]
+lua = ["lupa (>=2.1)"]
+probabilistic = ["pyprobables (>=0.6)"]
+valkey = ["valkey (>=6) ; python_version >= \"3.8\""]
+
 [[package]]
 name = "fastapi"
 version = "0.115.13"
@@ -1309,7 +1454,7 @@ version = "1.7.0"
 description = "A list-like structure which implements collections.abc.MutableSequence"
 optional = false
 python-versions = ">=3.9"
-groups = ["main"]
+groups = ["main", "dev"]
 files = [
     {file = "frozenlist-1.7.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:cc4df77d638aa2ed703b878dd093725b72a824c3c546c076e8fdf276f78ee84a"},
     {file = "frozenlist-1.7.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:716a9973a2cc963160394f701964fe25012600f3d311f60c790400b00e568b61"},
@@ -1676,14 +1821,14 @@ test = ["Cython (>=0.29.24)"]
 
 [[package]]
 name = "httpx"
-version = "0.28.1"
+version = "0.27.2"
 description = "The next generation HTTP client."
 optional = false
 python-versions = ">=3.8"
 groups = ["main", "dev"]
 files = [
-    {file = "httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad"},
-    {file = "httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc"},
+    {file = "httpx-0.27.2-py3-none-any.whl", hash = "sha256:7bb2708e112d8fdd7829cd4243970f0c223274051cb35ee80c03301ee29a3df0"},
+    {file = "httpx-0.27.2.tar.gz", hash = "sha256:f7c2be1d2f3c3c3160d441802406b206c2b76f5947b11115e6df10c6c65e66c2"},
 ]
 
 [package.dependencies]
@@ -1691,6 +1836,7 @@ anyio = "*"
 certifi = "*"
 httpcore = "==1.*"
 idna = "*"
+sniffio = "*"
 
 [package.extras]
 brotli = ["brotli ; platform_python_implementation == \"CPython\"", "brotlicffi ; platform_python_implementation != \"CPython\""]
@@ -1777,6 +1923,18 @@ perf = ["ipython"]
 test = ["flufl.flake8", "importlib_resources (>=1.3) ; python_version < \"3.9\"", "jaraco.test (>=5.4)", "packaging", "pyfakefs", "pytest (>=6,!=8.1.*)", "pytest-perf (>=0.9.2)"]
 type = ["pytest-mypy"]
 
+[[package]]
+name = "iniconfig"
+version = "2.3.0"
+description = "brain-dead simple config-ini parsing"
+optional = false
+python-versions = ">=3.10"
+groups = ["dev"]
+files = [
+    {file = "iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12"},
+    {file = "iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730"},
+]
+
 [[package]]
 name = "ipdb"
 version = "0.13.13"
@@ -2360,6 +2518,93 @@ interegular = ["interegular (>=0.3.1,<0.4.0)"]
 nearley = ["js2py"]
 regex = ["regex"]
 
+[[package]]
+name = "librt"
+version = "0.7.8"
+description = "Mypyc runtime library"
+optional = false
+python-versions = ">=3.9"
+groups = ["dev"]
+markers = "platform_python_implementation != \"PyPy\""
+files = [
+    {file = "librt-0.7.8-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b45306a1fc5f53c9330fbee134d8b3227fe5da2ab09813b892790400aa49352d"},
+    {file = "librt-0.7.8-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:864c4b7083eeee250ed55135d2127b260d7eb4b5e953a9e5df09c852e327961b"},
+    {file = "librt-0.7.8-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:6938cc2de153bc927ed8d71c7d2f2ae01b4e96359126c602721340eb7ce1a92d"},
+    {file = "librt-0.7.8-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:66daa6ac5de4288a5bbfbe55b4caa7bf0cd26b3269c7a476ffe8ce45f837f87d"},
+    {file = "librt-0.7.8-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4864045f49dc9c974dadb942ac56a74cd0479a2aafa51ce272c490a82322ea3c"},
+    {file = "librt-0.7.8-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:a36515b1328dc5b3ffce79fe204985ca8572525452eacabee2166f44bb387b2c"},
+    {file = "librt-0.7.8-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:b7e7f140c5169798f90b80d6e607ed2ba5059784968a004107c88ad61fb3641d"},
+    {file = "librt-0.7.8-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:ff71447cb778a4f772ddc4ce360e6ba9c95527ed84a52096bd1bbf9fee2ec7c0"},
+    {file = "librt-0.7.8-cp310-cp310-win32.whl", hash = "sha256:047164e5f68b7a8ebdf9fae91a3c2161d3192418aadd61ddd3a86a56cbe3dc85"},
+    {file = "librt-0.7.8-cp310-cp310-win_amd64.whl", hash = "sha256:d6f254d096d84156a46a84861183c183d30734e52383602443292644d895047c"},
+    {file = "librt-0.7.8-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ff3e9c11aa260c31493d4b3197d1e28dd07768594a4f92bec4506849d736248f"},
+    {file = "librt-0.7.8-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ddb52499d0b3ed4aa88746aaf6f36a08314677d5c346234c3987ddc506404eac"},
+    {file = "librt-0.7.8-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:e9c0afebbe6ce177ae8edba0c7c4d626f2a0fc12c33bb993d163817c41a7a05c"},
+    {file = "librt-0.7.8-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:631599598e2c76ded400c0a8722dec09217c89ff64dc54b060f598ed68e7d2a8"},
+    {file = "librt-0.7.8-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9c1ba843ae20db09b9d5c80475376168feb2640ce91cd9906414f23cc267a1ff"},
+    {file = "librt-0.7.8-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:b5b007bb22ea4b255d3ee39dfd06d12534de2fcc3438567d9f48cdaf67ae1ae3"},
+    {file = "librt-0.7.8-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:dbd79caaf77a3f590cbe32dc2447f718772d6eea59656a7dcb9311161b10fa75"},
+    {file = "librt-0.7.8-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:87808a8d1e0bd62a01cafc41f0fd6818b5a5d0ca0d8a55326a81643cdda8f873"},
+    {file = "librt-0.7.8-cp311-cp311-win32.whl", hash = "sha256:31724b93baa91512bd0a376e7cf0b59d8b631ee17923b1218a65456fa9bda2e7"},
+    {file = "librt-0.7.8-cp311-cp311-win_amd64.whl", hash = "sha256:978e8b5f13e52cf23a9e80f3286d7546baa70bc4ef35b51d97a709d0b28e537c"},
+    {file = "librt-0.7.8-cp311-cp311-win_arm64.whl", hash = "sha256:20e3946863d872f7cabf7f77c6c9d370b8b3d74333d3a32471c50d3a86c0a232"},
+    {file = "librt-0.7.8-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:9b6943885b2d49c48d0cff23b16be830ba46b0152d98f62de49e735c6e655a63"},
+    {file = "librt-0.7.8-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:46ef1f4b9b6cc364b11eea0ecc0897314447a66029ee1e55859acb3dd8757c93"},
+    {file = "librt-0.7.8-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:907ad09cfab21e3c86e8f1f87858f7049d1097f77196959c033612f532b4e592"},
+    {file = "librt-0.7.8-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2991b6c3775383752b3ca0204842743256f3ad3deeb1d0adc227d56b78a9a850"},
+    {file = "librt-0.7.8-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:03679b9856932b8c8f674e87aa3c55ea11c9274301f76ae8dc4d281bda55cf62"},
+    {file = "librt-0.7.8-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3968762fec1b2ad34ce57458b6de25dbb4142713e9ca6279a0d352fa4e9f452b"},
+    {file = "librt-0.7.8-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:bb7a7807523a31f03061288cc4ffc065d684c39db7644c676b47d89553c0d714"},
+    {file = "librt-0.7.8-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ad64a14b1e56e702e19b24aae108f18ad1bf7777f3af5fcd39f87d0c5a814449"},
+    {file = "librt-0.7.8-cp312-cp312-win32.whl", hash = "sha256:0241a6ed65e6666236ea78203a73d800dbed896cf12ae25d026d75dc1fcd1dac"},
+    {file = "librt-0.7.8-cp312-cp312-win_amd64.whl", hash = "sha256:6db5faf064b5bab9675c32a873436b31e01d66ca6984c6f7f92621656033a708"},
+    {file = "librt-0.7.8-cp312-cp312-win_arm64.whl", hash = "sha256:57175aa93f804d2c08d2edb7213e09276bd49097611aefc37e3fa38d1fb99ad0"},
+    {file = "librt-0.7.8-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:4c3995abbbb60b3c129490fa985dfe6cac11d88fc3c36eeb4fb1449efbbb04fc"},
+    {file = "librt-0.7.8-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:44e0c2cbc9bebd074cf2cdbe472ca185e824be4e74b1c63a8e934cea674bebf2"},
+    {file = "librt-0.7.8-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:4d2f1e492cae964b3463a03dc77a7fe8742f7855d7258c7643f0ee32b6651dd3"},
+    {file = "librt-0.7.8-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:451e7ffcef8f785831fdb791bd69211f47e95dc4c6ddff68e589058806f044c6"},
+    {file = "librt-0.7.8-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3469e1af9f1380e093ae06bedcbdd11e407ac0b303a56bbe9afb1d6824d4982d"},
+    {file = "librt-0.7.8-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f11b300027ce19a34f6d24ebb0a25fd0e24a9d53353225a5c1e6cadbf2916b2e"},
+    {file = "librt-0.7.8-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:4adc73614f0d3c97874f02f2c7fd2a27854e7e24ad532ea6b965459c5b757eca"},
+    {file = "librt-0.7.8-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:60c299e555f87e4c01b2eca085dfccda1dde87f5a604bb45c2906b8305819a93"},
+    {file = "librt-0.7.8-cp313-cp313-win32.whl", hash = "sha256:b09c52ed43a461994716082ee7d87618096851319bf695d57ec123f2ab708951"},
+    {file = "librt-0.7.8-cp313-cp313-win_amd64.whl", hash = "sha256:f8f4a901a3fa28969d6e4519deceab56c55a09d691ea7b12ca830e2fa3461e34"},
+    {file = "librt-0.7.8-cp313-cp313-win_arm64.whl", hash = "sha256:43d4e71b50763fcdcf64725ac680d8cfa1706c928b844794a7aa0fa9ac8e5f09"},
+    {file = "librt-0.7.8-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:be927c3c94c74b05128089a955fba86501c3b544d1d300282cc1b4bd370cb418"},
+    {file = "librt-0.7.8-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:7b0803e9008c62a7ef79058233db7ff6f37a9933b8f2573c05b07ddafa226611"},
+    {file = "librt-0.7.8-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:79feb4d00b2a4e0e05c9c56df707934f41fcb5fe53fd9efb7549068d0495b758"},
+    {file = "librt-0.7.8-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b9122094e3f24aa759c38f46bd8863433820654927370250f460ae75488b66ea"},
+    {file = "librt-0.7.8-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7e03bea66af33c95ce3addf87a9bf1fcad8d33e757bc479957ddbc0e4f7207ac"},
+    {file = "librt-0.7.8-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:f1ade7f31675db00b514b98f9ab9a7698c7282dad4be7492589109471852d398"},
+    {file = "librt-0.7.8-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:a14229ac62adcf1b90a15992f1ab9c69ae8b99ffb23cb64a90878a6e8a2f5b81"},
+    {file = "librt-0.7.8-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:5bcaaf624fd24e6a0cb14beac37677f90793a96864c67c064a91458611446e83"},
+    {file = "librt-0.7.8-cp314-cp314-win32.whl", hash = "sha256:7aa7d5457b6c542ecaed79cec4ad98534373c9757383973e638ccced0f11f46d"},
+    {file = "librt-0.7.8-cp314-cp314-win_amd64.whl", hash = "sha256:3d1322800771bee4a91f3b4bd4e49abc7d35e65166821086e5afd1e6c0d9be44"},
+    {file = "librt-0.7.8-cp314-cp314-win_arm64.whl", hash = "sha256:5363427bc6a8c3b1719f8f3845ea53553d301382928a86e8fab7984426949bce"},
+    {file = "librt-0.7.8-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:ca916919793a77e4a98d4a1701e345d337ce53be4a16620f063191f7322ac80f"},
+    {file = "librt-0.7.8-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:54feb7b4f2f6706bb82325e836a01be805770443e2400f706e824e91f6441dde"},
+    {file = "librt-0.7.8-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:39a4c76fee41007070f872b648cc2f711f9abf9a13d0c7162478043377b52c8e"},
+    {file = "librt-0.7.8-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ac9c8a458245c7de80bc1b9765b177055efff5803f08e548dd4bb9ab9a8d789b"},
+    {file = "librt-0.7.8-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:95b67aa7eff150f075fda09d11f6bfb26edffd300f6ab1666759547581e8f666"},
+    {file = "librt-0.7.8-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:535929b6eff670c593c34ff435d5440c3096f20fa72d63444608a5aef64dd581"},
+    {file = "librt-0.7.8-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:63937bd0f4d1cb56653dc7ae900d6c52c41f0015e25aaf9902481ee79943b33a"},
+    {file = "librt-0.7.8-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:cf243da9e42d914036fd362ac3fa77d80a41cadcd11ad789b1b5eec4daaf67ca"},
+    {file = "librt-0.7.8-cp314-cp314t-win32.whl", hash = "sha256:171ca3a0a06c643bd0a2f62a8944e1902c94aa8e5da4db1ea9a8daf872685365"},
+    {file = "librt-0.7.8-cp314-cp314t-win_amd64.whl", hash = "sha256:445b7304145e24c60288a2f172b5ce2ca35c0f81605f5299f3fa567e189d2e32"},
+    {file = "librt-0.7.8-cp314-cp314t-win_arm64.whl", hash = "sha256:8766ece9de08527deabcd7cb1b4f1a967a385d26e33e536d6d8913db6ef74f06"},
+    {file = "librt-0.7.8-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c7e8f88f79308d86d8f39c491773cbb533d6cb7fa6476f35d711076ee04fceb6"},
+    {file = "librt-0.7.8-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:389bd25a0db916e1d6bcb014f11aa9676cedaa485e9ec3752dfe19f196fd377b"},
+    {file = "librt-0.7.8-cp39-cp39-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:73fd300f501a052f2ba52ede721232212f3b06503fa12665408ecfc9d8fd149c"},
+    {file = "librt-0.7.8-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6d772edc6a5f7835635c7562f6688e031f0b97e31d538412a852c49c9a6c92d5"},
+    {file = "librt-0.7.8-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bfde8a130bd0f239e45503ab39fab239ace094d63ee1d6b67c25a63d741c0f71"},
+    {file = "librt-0.7.8-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:fdec6e2368ae4f796fc72fad7fd4bd1753715187e6d870932b0904609e7c878e"},
+    {file = "librt-0.7.8-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:00105e7d541a8f2ee5be52caacea98a005e0478cfe78c8080fbb7b5d2b340c63"},
+    {file = "librt-0.7.8-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:c6f8947d3dfd7f91066c5b4385812c18be26c9d5a99ca56667547f2c39149d94"},
+    {file = "librt-0.7.8-cp39-cp39-win32.whl", hash = "sha256:41d7bb1e07916aeb12ae4a44e3025db3691c4149ab788d0315781b4d29b86afb"},
+    {file = "librt-0.7.8-cp39-cp39-win_amd64.whl", hash = "sha256:e90a8e237753c83b8e484d478d9a996dc5e39fd5bd4c6ce32563bc8123f132be"},
+    {file = "librt-0.7.8.tar.gz", hash = "sha256:1a4ede613941d9c3470b0368be851df6bb78ab218635512d0370b27a277a0862"},
+]
+
 [[package]]
 name = "mako"
 version = "1.3.10"
@@ -2595,7 +2840,7 @@ version = "6.5.0"
 description = "multidict implementation"
 optional = false
 python-versions = ">=3.9"
-groups = ["main"]
+groups = ["main", "dev"]
 files = [
     {file = "multidict-6.5.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:2e118a202904623b1d2606d1c8614e14c9444b59d64454b0c355044058066469"},
     {file = "multidict-6.5.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a42995bdcaff4e22cb1280ae7752c3ed3fbb398090c6991a2797a4a0e5ed16a9"},
@@ -2709,6 +2954,79 @@ files = [
     {file = "multidict-6.5.0.tar.gz", hash = "sha256:942bd8002492ba819426a8d7aefde3189c1b87099cdf18aaaefefcf7f3f7b6d2"},
 ]
 
+[[package]]
+name = "mypy"
+version = "1.19.1"
+description = "Optional static typing for Python"
+optional = false
+python-versions = ">=3.9"
+groups = ["dev"]
+files = [
+    {file = "mypy-1.19.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5f05aa3d375b385734388e844bc01733bd33c644ab48e9684faa54e5389775ec"},
+    {file = "mypy-1.19.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:022ea7279374af1a5d78dfcab853fe6a536eebfda4b59deab53cd21f6cd9f00b"},
+    {file = "mypy-1.19.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ee4c11e460685c3e0c64a4c5de82ae143622410950d6be863303a1c4ba0e36d6"},
+    {file = "mypy-1.19.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:de759aafbae8763283b2ee5869c7255391fbc4de3ff171f8f030b5ec48381b74"},
+    {file = "mypy-1.19.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:ab43590f9cd5108f41aacf9fca31841142c786827a74ab7cc8a2eacb634e09a1"},
+    {file = "mypy-1.19.1-cp310-cp310-win_amd64.whl", hash = "sha256:2899753e2f61e571b3971747e302d5f420c3fd09650e1951e99f823bc3089dac"},
+    {file = "mypy-1.19.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d8dfc6ab58ca7dda47d9237349157500468e404b17213d44fc1cb77bce532288"},
+    {file = "mypy-1.19.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e3f276d8493c3c97930e354b2595a44a21348b320d859fb4a2b9f66da9ed27ab"},
+    {file = "mypy-1.19.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2abb24cf3f17864770d18d673c85235ba52456b36a06b6afc1e07c1fdcd3d0e6"},
+    {file = "mypy-1.19.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a009ffa5a621762d0c926a078c2d639104becab69e79538a494bcccb62cc0331"},
+    {file = "mypy-1.19.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f7cee03c9a2e2ee26ec07479f38ea9c884e301d42c6d43a19d20fb014e3ba925"},
+    {file = "mypy-1.19.1-cp311-cp311-win_amd64.whl", hash = "sha256:4b84a7a18f41e167f7995200a1d07a4a6810e89d29859df936f1c3923d263042"},
+    {file = "mypy-1.19.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:a8174a03289288c1f6c46d55cef02379b478bfbc8e358e02047487cad44c6ca1"},
+    {file = "mypy-1.19.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ffcebe56eb09ff0c0885e750036a095e23793ba6c2e894e7e63f6d89ad51f22e"},
+    {file = "mypy-1.19.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b64d987153888790bcdb03a6473d321820597ab8dd9243b27a92153c4fa50fd2"},
+    {file = "mypy-1.19.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c35d298c2c4bba75feb2195655dfea8124d855dfd7343bf8b8c055421eaf0cf8"},
+    {file = "mypy-1.19.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:34c81968774648ab5ac09c29a375fdede03ba253f8f8287847bd480782f73a6a"},
+    {file = "mypy-1.19.1-cp312-cp312-win_amd64.whl", hash = "sha256:b10e7c2cd7870ba4ad9b2d8a6102eb5ffc1f16ca35e3de6bfa390c1113029d13"},
+    {file = "mypy-1.19.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e3157c7594ff2ef1634ee058aafc56a82db665c9438fd41b390f3bde1ab12250"},
+    {file = "mypy-1.19.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:bdb12f69bcc02700c2b47e070238f42cb87f18c0bc1fc4cdb4fb2bc5fd7a3b8b"},
+    {file = "mypy-1.19.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f859fb09d9583a985be9a493d5cfc5515b56b08f7447759a0c5deaf68d80506e"},
+    {file = "mypy-1.19.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c9a6538e0415310aad77cb94004ca6482330fece18036b5f360b62c45814c4ef"},
+    {file = "mypy-1.19.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:da4869fc5e7f62a88f3fe0b5c919d1d9f7ea3cef92d3689de2823fd27e40aa75"},
+    {file = "mypy-1.19.1-cp313-cp313-win_amd64.whl", hash = "sha256:016f2246209095e8eda7538944daa1d60e1e8134d98983b9fc1e92c1fc0cb8dd"},
+    {file = "mypy-1.19.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:06e6170bd5836770e8104c8fdd58e5e725cfeb309f0a6c681a811f557e97eac1"},
+    {file = "mypy-1.19.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:804bd67b8054a85447c8954215a906d6eff9cabeabe493fb6334b24f4bfff718"},
+    {file = "mypy-1.19.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:21761006a7f497cb0d4de3d8ef4ca70532256688b0523eee02baf9eec895e27b"},
+    {file = "mypy-1.19.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:28902ee51f12e0f19e1e16fbe2f8f06b6637f482c459dd393efddd0ec7f82045"},
+    {file = "mypy-1.19.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:481daf36a4c443332e2ae9c137dfee878fcea781a2e3f895d54bd3002a900957"},
+    {file = "mypy-1.19.1-cp314-cp314-win_amd64.whl", hash = "sha256:8bb5c6f6d043655e055be9b542aa5f3bdd30e4f3589163e85f93f3640060509f"},
+    {file = "mypy-1.19.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:7bcfc336a03a1aaa26dfce9fff3e287a3ba99872a157561cbfcebe67c13308e3"},
+    {file = "mypy-1.19.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b7951a701c07ea584c4fe327834b92a30825514c868b1f69c30445093fdd9d5a"},
+    {file = "mypy-1.19.1-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b13cfdd6c87fc3efb69ea4ec18ef79c74c3f98b4e5498ca9b85ab3b2c2329a67"},
+    {file = "mypy-1.19.1-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4f28f99c824ecebcdaa2e55d82953e38ff60ee5ec938476796636b86afa3956e"},
+    {file = "mypy-1.19.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:c608937067d2fc5a4dd1a5ce92fd9e1398691b8c5d012d66e1ddd430e9244376"},
+    {file = "mypy-1.19.1-cp39-cp39-win_amd64.whl", hash = "sha256:409088884802d511ee52ca067707b90c883426bd95514e8cfda8281dc2effe24"},
+    {file = "mypy-1.19.1-py3-none-any.whl", hash = "sha256:f1235f5ea01b7db5468d53ece6aaddf1ad0b88d9e7462b86ef96fe04995d7247"},
+    {file = "mypy-1.19.1.tar.gz", hash = "sha256:19d88bb05303fe63f71dd2c6270daca27cb9401c4ca8255fe50d1d920e0eb9ba"},
+]
+
+[package.dependencies]
+librt = {version = ">=0.6.2", markers = "platform_python_implementation != \"PyPy\""}
+mypy_extensions = ">=1.0.0"
+pathspec = ">=0.9.0"
+typing_extensions = ">=4.6.0"
+
+[package.extras]
+dmypy = ["psutil (>=4.0)"]
+faster-cache = ["orjson"]
+install-types = ["pip"]
+mypyc = ["setuptools (>=50)"]
+reports = ["lxml"]
+
+[[package]]
+name = "mypy-extensions"
+version = "1.1.0"
+description = "Type system extensions for programs checked with the mypy type checker."
+optional = false
+python-versions = ">=3.8"
+groups = ["dev"]
+files = [
+    {file = "mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505"},
+    {file = "mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558"},
+]
+
 [[package]]
 name = "mysqlclient"
 version = "2.2.7"
@@ -3226,6 +3544,24 @@ files = [
 qa = ["flake8 (==5.0.4)", "mypy (==0.971)", "types-setuptools (==67.2.0.1)"]
 testing = ["docopt", "pytest"]
 
+[[package]]
+name = "pathspec"
+version = "1.0.4"
+description = "Utility library for gitignore style pattern matching of file paths."
+optional = false
+python-versions = ">=3.9"
+groups = ["dev"]
+files = [
+    {file = "pathspec-1.0.4-py3-none-any.whl", hash = "sha256:fb6ae2fd4e7c921a165808a552060e722767cfa526f99ca5156ed2ce45a5c723"},
+    {file = "pathspec-1.0.4.tar.gz", hash = "sha256:0210e2ae8a21a9137c0d470578cb0e595af87edaa6ebf12ff176f14a02e0e645"},
+]
+
+[package.extras]
+hyperscan = ["hyperscan (>=0.7)"]
+optional = ["typing-extensions (>=4)"]
+re2 = ["google-re2 (>=1.1)"]
+tests = ["pytest (>=9)", "typing-extensions (>=4.15)"]
+
 [[package]]
 name = "pexpect"
 version = "4.9.0"
@@ -3357,6 +3693,22 @@ docs = ["furo (>=2024.8.6)", "proselint (>=0.14)", "sphinx (>=8.1.3)", "sphinx-a
 test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=8.3.4)", "pytest-cov (>=6)", "pytest-mock (>=3.14)"]
 type = ["mypy (>=1.14.1)"]
 
+[[package]]
+name = "pluggy"
+version = "1.6.0"
+description = "plugin and hook calling mechanisms for python"
+optional = false
+python-versions = ">=3.9"
+groups = ["dev"]
+files = [
+    {file = "pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746"},
+    {file = "pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3"},
+]
+
+[package.extras]
+dev = ["pre-commit", "tox"]
+testing = ["coverage", "pytest", "pytest-benchmark"]
+
 [[package]]
 name = "podman-compose"
 version = "1.5.0"
@@ -3412,7 +3764,7 @@ version = "0.3.2"
 description = "Accelerated property cache"
 optional = false
 python-versions = ">=3.9"
-groups = ["main"]
+groups = ["main", "dev"]
 files = [
     {file = "propcache-0.3.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:22d9962a358aedbb7a2e36187ff273adeaab9743373a272976d2e348d08c7770"},
     {file = "propcache-0.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0d0fda578d1dc3f77b6b5a5dce3b9ad69a8250a891760a548df850a5e8da87f3"},
@@ -3777,6 +4129,66 @@ files = [
 packaging = ">=21.3"
 Pillow = ">=8.0.0"
 
+[[package]]
+name = "pytest"
+version = "8.4.2"
+description = "pytest: simple powerful testing with Python"
+optional = false
+python-versions = ">=3.9"
+groups = ["dev"]
+files = [
+    {file = "pytest-8.4.2-py3-none-any.whl", hash = "sha256:872f880de3fc3a5bdc88a11b39c9710c3497a547cfa9320bc3c5e62fbf272e79"},
+    {file = "pytest-8.4.2.tar.gz", hash = "sha256:86c0d0b93306b961d58d62a4db4879f27fe25513d4b969df351abdddb3c30e01"},
+]
+
+[package.dependencies]
+colorama = {version = ">=0.4", markers = "sys_platform == \"win32\""}
+iniconfig = ">=1"
+packaging = ">=20"
+pluggy = ">=1.5,<2"
+pygments = ">=2.7.2"
+
+[package.extras]
+dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "requests", "setuptools", "xmlschema"]
+
+[[package]]
+name = "pytest-asyncio"
+version = "0.23.8"
+description = "Pytest support for asyncio"
+optional = false
+python-versions = ">=3.8"
+groups = ["dev"]
+files = [
+    {file = "pytest_asyncio-0.23.8-py3-none-any.whl", hash = "sha256:50265d892689a5faefb84df80819d1ecef566eb3549cf915dfb33569359d1ce2"},
+    {file = "pytest_asyncio-0.23.8.tar.gz", hash = "sha256:759b10b33a6dc61cce40a8bd5205e302978bbbcc00e279a8b61d9a6a3c82e4d3"},
+]
+
+[package.dependencies]
+pytest = ">=7.0.0,<9"
+
+[package.extras]
+docs = ["sphinx (>=5.3)", "sphinx-rtd-theme (>=1.0)"]
+testing = ["coverage (>=6.2)", "hypothesis (>=5.7.1)"]
+
+[[package]]
+name = "pytest-cov"
+version = "4.1.0"
+description = "Pytest plugin for measuring coverage."
+optional = false
+python-versions = ">=3.7"
+groups = ["dev"]
+files = [
+    {file = "pytest-cov-4.1.0.tar.gz", hash = "sha256:3904b13dfbfec47f003b8e77fd5b589cd11904a21ddf1ab38a64f204d6a10ef6"},
+    {file = "pytest_cov-4.1.0-py3-none-any.whl", hash = "sha256:6ba70b9e97e69fcc3fb45bfeab2d0a138fb65c4d0d6a41ef33983ad114be8c3a"},
+]
+
+[package.dependencies]
+coverage = {version = ">=5.2.1", extras = ["toml"]}
+pytest = ">=4.6"
+
+[package.extras]
+testing = ["fields", "hunter", "process-tests", "pytest-xdist", "six", "virtualenv"]
+
 [[package]]
 name = "python-dateutil"
 version = "2.9.0.post0"
@@ -4069,7 +4481,7 @@ version = "6.2.0"
 description = "Python client for Redis database and key-value store"
 optional = false
 python-versions = ">=3.9"
-groups = ["main"]
+groups = ["main", "dev"]
 files = [
     {file = "redis-6.2.0-py3-none-any.whl", hash = "sha256:c8ddf316ee0aab65f04a11229e94a64b2618451dab7a67cb2f77eb799d872d5e"},
     {file = "redis-6.2.0.tar.gz", hash = "sha256:e821f129b75dde6cb99dd35e5c76e8c49512a5a0d8dfdc560b2fbd44b85ca977"},
@@ -4603,6 +5015,18 @@ files = [
     {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"},
 ]
 
+[[package]]
+name = "sortedcontainers"
+version = "2.4.0"
+description = "Sorted Containers -- Sorted List, Sorted Dict, Sorted Set"
+optional = false
+python-versions = "*"
+groups = ["dev"]
+files = [
+    {file = "sortedcontainers-2.4.0-py2.py3-none-any.whl", hash = "sha256:a163dcaede0f1c021485e957a39245190e74249897e2ae4b2aa38595db237ee0"},
+    {file = "sortedcontainers-2.4.0.tar.gz", hash = "sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88"},
+]
+
 [[package]]
 name = "soupsieve"
 version = "2.8"
@@ -5005,6 +5429,37 @@ rich = ">=10.11.0"
 shellingham = ">=1.3.0"
 typing-extensions = ">=3.7.4.3"
 
+[[package]]
+name = "types-cffi"
+version = "1.17.0.20250915"
+description = "Typing stubs for cffi"
+optional = false
+python-versions = ">=3.9"
+groups = ["dev"]
+files = [
+    {file = "types_cffi-1.17.0.20250915-py3-none-any.whl", hash = "sha256:cef4af1116c83359c11bb4269283c50f0688e9fc1d7f0eeb390f3661546da52c"},
+    {file = "types_cffi-1.17.0.20250915.tar.gz", hash = "sha256:4362e20368f78dabd5c56bca8004752cc890e07a71605d9e0d9e069dbaac8c06"},
+]
+
+[package.dependencies]
+types-setuptools = "*"
+
+[[package]]
+name = "types-pyopenssl"
+version = "24.1.0.20240722"
+description = "Typing stubs for pyOpenSSL"
+optional = false
+python-versions = ">=3.8"
+groups = ["dev"]
+files = [
+    {file = "types-pyOpenSSL-24.1.0.20240722.tar.gz", hash = "sha256:47913b4678a01d879f503a12044468221ed8576263c1540dcb0484ca21b08c39"},
+    {file = "types_pyOpenSSL-24.1.0.20240722-py3-none-any.whl", hash = "sha256:6a7a5d2ec042537934cfb4c9d4deb0e16c4c6250b09358df1f083682fe6fda54"},
+]
+
+[package.dependencies]
+cryptography = ">=35.0.0"
+types-cffi = "*"
+
 [[package]]
 name = "types-python-dateutil"
 version = "2.9.0.20250822"
@@ -5017,6 +5472,49 @@ files = [
     {file = "types_python_dateutil-2.9.0.20250822.tar.gz", hash = "sha256:84c92c34bd8e68b117bff742bc00b692a1e8531262d4507b33afcc9f7716cd53"},
 ]
 
+[[package]]
+name = "types-redis"
+version = "4.6.0.20241004"
+description = "Typing stubs for redis"
+optional = false
+python-versions = ">=3.8"
+groups = ["dev"]
+files = [
+    {file = "types-redis-4.6.0.20241004.tar.gz", hash = "sha256:5f17d2b3f9091ab75384153bfa276619ffa1cf6a38da60e10d5e6749cc5b902e"},
+    {file = "types_redis-4.6.0.20241004-py3-none-any.whl", hash = "sha256:ef5da68cb827e5f606c8f9c0b49eeee4c2669d6d97122f301d3a55dc6a63f6ed"},
+]
+
+[package.dependencies]
+cryptography = ">=35.0.0"
+types-pyOpenSSL = "*"
+
+[[package]]
+name = "types-requests"
+version = "2.32.4.20260107"
+description = "Typing stubs for requests"
+optional = false
+python-versions = ">=3.9"
+groups = ["dev"]
+files = [
+    {file = "types_requests-2.32.4.20260107-py3-none-any.whl", hash = "sha256:b703fe72f8ce5b31ef031264fe9395cac8f46a04661a79f7ed31a80fb308730d"},
+    {file = "types_requests-2.32.4.20260107.tar.gz", hash = "sha256:018a11ac158f801bfa84857ddec1650750e393df8a004a8a9ae2a9bec6fcb24f"},
+]
+
+[package.dependencies]
+urllib3 = ">=2"
+
+[[package]]
+name = "types-setuptools"
+version = "80.10.0.20260124"
+description = "Typing stubs for setuptools"
+optional = false
+python-versions = ">=3.9"
+groups = ["dev"]
+files = [
+    {file = "types_setuptools-80.10.0.20260124-py3-none-any.whl", hash = "sha256:efed7e044f01adb9c2806c7a8e1b6aa3656b8e382379b53d5f26ee3db24d4c01"},
+    {file = "types_setuptools-80.10.0.20260124.tar.gz", hash = "sha256:1b86d9f0368858663276a0cbe5fe5a9722caf94b5acde8aba0399a6e90680f20"},
+]
+
 [[package]]
 name = "typing-extensions"
 version = "4.14.0"
@@ -5569,7 +6067,7 @@ version = "1.20.1"
 description = "Yet another URL library"
 optional = false
 python-versions = ">=3.9"
-groups = ["main"]
+groups = ["main", "dev"]
 files = [
     {file = "yarl-1.20.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:6032e6da6abd41e4acda34d75a816012717000fa6839f37124a47fcefc49bec4"},
     {file = "yarl-1.20.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2c7b34d804b8cf9b214f05015c4fee2ebe7ed05cf581e7192c06555c71f4446a"},
@@ -5705,4 +6203,4 @@ type = ["pytest-mypy"]
 [metadata]
 lock-version = "2.1"
 python-versions = ">3.11"
-content-hash = "5f53cec7fc3cc93d494341e9fd6562076c1a8952f83075f671a3507c50fcb334"
+content-hash = "10a74594d9f695ab1077ff992bcd012b93b174b25c3f2ca681d6308653abbd14"
diff --git a/crawler/pyproject.toml b/crawler/pyproject.toml
index 7e2d1a1..4028cf9 100644
--- a/crawler/pyproject.toml
+++ b/crawler/pyproject.toml
@@ -20,6 +20,7 @@ matplotlib = "^3.10.0"
 opencv-python = "^4.11.0.86"
 click = "^8.2.0"
 aiohttp = "^3.11.18"
+aiohttp-socks = "^0.8.4"
 sqlmodel = "^0.0.24"
 alembic = "^1.16.1"
 sqlalchemy = {extras = ["asyncio"], version = "^2.0.41"}
@@ -42,6 +43,15 @@ mysqlclient = "^2.2.7"
 ipdb = "^0.13.13"
 jupyterlab = "^4.4.7"
 podman-compose = "^1.5.0"
+pytest = "^8.0.0"
+pytest-asyncio = "^0.23.0"
+pytest-cov = "^4.1.0"
+httpx = "^0.27.0"
+aioresponses = "^0.7.6"
+fakeredis = "^2.21.0"
+mypy = "^1.8.0"
+types-requests = "^2.31.0"
+types-redis = "^4.6.0"
 
 [build-system]
 requires = ["poetry-core>=1.0.0"]
@@ -52,4 +62,23 @@ build-backend = "poetry.core.masonry.api"
 lint.ignore = [
     "E741", # Ambigious name
 ]
-exclude = ["*.ipynb"]
\ No newline at end of file
+exclude = ["*.ipynb"]
+
+[tool.pytest.ini_options]
+asyncio_mode = "auto"
+testpaths = ["tests"]
+asyncio_default_fixture_loop_scope = "function"
+
+[tool.mypy]
+python_version = "3.11"
+warn_return_any = true
+warn_unused_ignores = true
+disallow_untyped_defs = true
+disallow_incomplete_defs = true
+check_untyped_defs = true
+strict_optional = true
+plugins = ["pydantic.mypy"]
+
+[[tool.mypy.overrides]]
+module = ["transformers.*", "pytesseract.*", "cv2.*", "celery.*", "tqdm.*", "aiohttp.*", "aiohttp_socks.*", "tenacity.*", "geopy.*", "pandas.*", "numpy.*", "PIL.*", "sqlmodel.*", "sqlalchemy.*", "alembic.*", "apprise.*", "opentelemetry.*"]
+ignore_missing_imports = true
\ No newline at end of file
diff --git a/crawler/rec/query.py b/crawler/rec/query.py
index b589876..a2526a6 100644
--- a/crawler/rec/query.py
+++ b/crawler/rec/query.py
@@ -1,16 +1,19 @@
 import enum
 from typing import Any
+from contextlib import asynccontextmanager
+from collections.abc import AsyncIterator
+
 import aiohttp
 from models.listing import FurnishType, ListingType
 from rec import districts
 from tenacity import retry, stop_after_attempt, wait_random
+from config.scraper_config import ScraperConfig
 
 
-headers = {
+DEFAULT_HEADERS = {
     "Host": "api.rightmove.co.uk",
-    # 'Accept-Encoding': 'gzip, deflate, br',
-    "User-Agent": "okhttp/4.10.0",
-    "Connection": "close",
+    "User-Agent": "okhttp/4.12.0",
+    "Connection": "keep-alive",
 }
 
 
@@ -24,15 +27,66 @@ class PropertyType(enum.StrEnum):
     TERRACED = "terraced"
 
 
+@asynccontextmanager
+async def create_session(
+    config: ScraperConfig | None = None,
+) -> AsyncIterator[aiohttp.ClientSession]:
+    """Create an aiohttp session with optional proxy support.
+
+    Args:
+        config: Scraper configuration. Loads from environment if not provided.
+
+    Yields:
+        Configured aiohttp ClientSession.
+    """
+    if config is None:
+        config = ScraperConfig.from_env()
+
+    connector = None
+    if config.proxy_url:
+        try:
+            from aiohttp_socks import ProxyConnector
+
+            connector = ProxyConnector.from_url(config.proxy_url)
+        except ImportError:
+            raise ImportError(
+                "aiohttp-socks is required for proxy support. "
+                "Install with: pip install aiohttp-socks"
+            )
+
+    session = aiohttp.ClientSession(
+        trust_env=True,
+        connector=connector,
+        headers=DEFAULT_HEADERS,
+    )
+    try:
+        yield session
+    finally:
+        await session.close()
+
+
 @retry(wait=wait_random(min=1, max=2), stop=stop_after_attempt(3))
-async def detail_query(detail_id: int) -> dict[str, Any]:
+async def detail_query(
+    detail_id: int,
+    session: aiohttp.ClientSession | None = None,
+) -> dict[str, Any]:
+    """Fetch detailed property information.
+
+    Args:
+        detail_id: The property identifier.
+        session: Optional aiohttp session. Creates new one if not provided.
+
+    Returns:
+        Property details as a dictionary.
+    """
     params = {
         "apiApplication": "ANDROID",
         "appVersion": "3.70.0",
     }
     url = f"https://api.rightmove.co.uk/api/property/{detail_id}"
-    async with aiohttp.ClientSession(trust_env=True) as session:
-        async with session.get(url, params=params, headers=headers) as response:
+
+    async def do_request(s: aiohttp.ClientSession) -> dict[str, Any]:
+        async with s.get(url, params=params, headers=DEFAULT_HEADERS) as response:
             if response.status != 200:
                 raise Exception(
                     f"""id: {detail_id}. Status Code: {response.status}."""
@@ -40,6 +94,12 @@ async def detail_query(detail_id: int) -> dict[str, Any]:
                 )
             return await response.json()
 
+    if session:
+        return await do_request(session)
+    else:
+        async with aiohttp.ClientSession(trust_env=True) as new_session:
+            return await do_request(new_session)
+
 
 @retry(wait=wait_random(min=1, max=60), stop=stop_after_attempt(3))
 async def listing_query(
@@ -57,7 +117,29 @@ async def listing_query(
     property_type: list[PropertyType] = [],
     page_size: int = 25,
     furnish_types: list[FurnishType] = [],
+    session: aiohttp.ClientSession | None = None,
 ) -> dict[str, Any]:
+    """Execute a listing search query.
+
+    Args:
+        page: Page number to fetch (1-indexed).
+        channel: Listing type (BUY or RENT).
+        min_bedrooms: Minimum number of bedrooms.
+        max_bedrooms: Maximum number of bedrooms.
+        radius: Search radius.
+        min_price: Minimum price.
+        max_price: Maximum price.
+        district: District identifier string.
+        mustNewHome: Filter for new homes only (BUY only).
+        max_days_since_added: Maximum days since listing was added (BUY only).
+        property_type: List of property types to filter (BUY only).
+        page_size: Number of results per page (default 25).
+        furnish_types: List of furnish types to filter (RENT only).
+        session: Optional aiohttp session. Creates new one if not provided.
+
+    Returns:
+        API response as a dictionary.
+    """
     params: dict[str, str] = {
         "locationIdentifier": districts.get_districts()[district],
         "channel": str(channel).upper(),
@@ -95,19 +177,105 @@ async def listing_query(
         if furnish_types:
             params["furnishTypes"] = ",".join(furnish_types)
 
-    headers = {
+    request_headers = {
         "Host": "api.rightmove.co.uk",
         "Accept-Encoding": "gzip, deflate, br",
         "User-Agent": "okhttp/4.12.0",
         "Connection": "keep-alive",
     }
 
-    async with aiohttp.ClientSession(trust_env=True) as session:
-        async with session.get(
+    async def do_request(s: aiohttp.ClientSession) -> dict[str, Any]:
+        async with s.get(
             "https://api.rightmove.co.uk/api/property-listing",
             params=params,
-            headers=headers,
+            headers=request_headers,
         ) as response:
             if response.status != 200:
                 raise Exception(f"Failed due to: {await response.text()}")
             return await response.json()
+
+    if session:
+        return await do_request(session)
+    else:
+        async with aiohttp.ClientSession(trust_env=True) as new_session:
+            return await do_request(new_session)
+
+
+@retry(wait=wait_random(min=1, max=10), stop=stop_after_attempt(3))
+async def probe_query(
+    *,
+    session: aiohttp.ClientSession,
+    channel: ListingType,
+    min_bedrooms: int,
+    max_bedrooms: int,
+    radius: float,
+    min_price: int,
+    max_price: int,
+    district: str,
+    max_days_since_added: int = 30,
+    furnish_types: list[FurnishType] = [],
+) -> dict[str, Any]:
+    """Probe the API to get result count without fetching full results.
+
+    Makes a minimal request (page_size=1) to efficiently get totalAvailableResults.
+
+    Args:
+        session: aiohttp session for making requests.
+        channel: Listing type (BUY or RENT).
+        min_bedrooms: Minimum number of bedrooms.
+        max_bedrooms: Maximum number of bedrooms.
+        radius: Search radius.
+        min_price: Minimum price.
+        max_price: Maximum price.
+        district: District identifier string.
+        max_days_since_added: Maximum days since listing was added (BUY only).
+        furnish_types: List of furnish types to filter (RENT only).
+
+    Returns:
+        API response containing totalAvailableResults.
+    """
+    params: dict[str, str] = {
+        "locationIdentifier": districts.get_districts()[district],
+        "channel": str(channel).upper(),
+        "page": "1",
+        "numberOfPropertiesPerPage": "1",  # Minimal page size for probing
+        "radius": str(radius),
+        "sortBy": "distance",
+        "includeUnavailableProperties": "false",
+        "minPrice": str(min_price),
+        "maxPrice": str(max_price),
+        "minBedrooms": str(min_bedrooms),
+        "maxBedrooms": str(max_bedrooms),
+        "apiApplication": "ANDROID",
+        "appVersion": "4.28.0",
+    }
+
+    if channel is ListingType.BUY:
+        params["dontShow"] = "sharedOwnership,retirement"
+        if max_days_since_added is not None and max_days_since_added in [
+            1,
+            3,
+            7,
+            14,
+        ]:
+            params["maxDaysSinceAdded"] = str(max_days_since_added)
+
+    if channel is ListingType.RENT:
+        if furnish_types:
+            params["furnishTypes"] = ",".join(furnish_types)
+
+    request_headers = {
+        "Host": "api.rightmove.co.uk",
+        "Accept-Encoding": "gzip, deflate, br",
+        "User-Agent": "okhttp/4.12.0",
+        "Connection": "keep-alive",
+    }
+
+    async with session.get(
+        "https://api.rightmove.co.uk/api/property-listing",
+        params=params,
+        headers=request_headers,
+    ) as response:
+        if response.status != 200:
+            raise Exception(f"Probe failed: {await response.text()}")
+        return await response.json()
diff --git a/crawler/services/listing_fetcher.py b/crawler/services/listing_fetcher.py
new file mode 100644
index 0000000..a94f3e0
--- /dev/null
+++ b/crawler/services/listing_fetcher.py
@@ -0,0 +1,146 @@
+"""Listing fetcher service - fetches listing data from Rightmove API."""
+import asyncio
+import logging
+from typing import Any
+
+from config.scraper_config import ScraperConfig
+from listing_processor import ListingProcessor
+from rec.query import create_session, listing_query
+from models.listing import QueryParameters
+from repositories import ListingRepository
+from tqdm.asyncio import tqdm
+from models import Listing as modelListing
+from services.query_splitter import QuerySplitter, SubQuery
+
+logger = logging.getLogger("uvicorn.error")
+
+
+async def dump_listings_full(
+    parameters: QueryParameters,
+    repository: ListingRepository,
+) -> list[modelListing]:
+    """Fetches all listings, images as well as detects floorplans."""
+    new_listings = await dump_listings(parameters, repository)
+    logger.debug(f"Upserted {len(new_listings)} new listings")
+    # refresh listings
+    listings = await repository.get_listings(parameters)  # this can be better
+    new_listings = [x for x in listings if x.id in new_listings]
+    return new_listings
+
+
+async def dump_listings(
+    parameters: QueryParameters,
+    repository: ListingRepository,
+) -> list[modelListing]:
+    """Fetch listings from Rightmove API and process them.
+
+    Uses intelligent query splitting to maximize data extraction
+    while respecting Rightmove's result caps.
+    """
+    config = ScraperConfig.from_env()
+    splitter = QuerySplitter(config)
+
+    async with create_session(config) as session:
+        # Phase 1 & 2: Split and probe queries
+        logger.info("Splitting query and probing result counts...")
+        subqueries = await splitter.split(parameters, session)
+
+        total_estimated = splitter.calculate_total_estimated_results(subqueries)
+        logger.info(
+            f"Split into {len(subqueries)} subqueries, "
+            f"estimated {total_estimated} total results"
+        )
+
+        # Phase 3: Fetch all pages for each subquery
+        semaphore = asyncio.Semaphore(config.max_concurrent_requests)
+
+        async def fetch_subquery(sq: SubQuery) -> list[dict[str, Any]]:
+            """Fetch all pages for a single subquery."""
+            results: list[dict[str, Any]] = []
+
+            estimated = sq.estimated_results or 0
+            if estimated == 0:
+                return results
+
+            page_size = parameters.page_size
+            max_pages = min(
+                config.max_pages_per_query,
+                (estimated // page_size) + 1,
+            )
+
+            for page_id in range(1, max_pages + 1):
+                async with semaphore:
+                    await asyncio.sleep(config.request_delay_ms / 1000)
+                    try:
+                        result = await listing_query(
+                            page=page_id,
+                            channel=parameters.listing_type,
+                            min_bedrooms=sq.min_bedrooms,
+                            max_bedrooms=sq.max_bedrooms,
+                            radius=parameters.radius,
+                            min_price=sq.min_price,
+                            max_price=sq.max_price,
+                            district=sq.district,
+                            page_size=page_size,
+                            max_days_since_added=parameters.max_days_since_added,
+                            furnish_types=parameters.furnish_types or [],
+                            session=session,
+                        )
+                        results.append(result)
+
+                        properties = result.get("properties", [])
+                        if len(properties) < page_size:
+                            break
+
+                    except Exception as e:
+                        if "GENERIC_ERROR" in str(e):
+                            logger.debug(
+                                f"Max page for {sq.district}: {page_id - 1}"
+                            )
+                            break
+                        logger.warning(
+                            f"Error fetching page {page_id} for {sq.district}: {e}"
+                        )
+                        break
+
+            return results
+
+        # Fetch all subqueries with progress bar
+        all_results = await tqdm.gather(
+            *[fetch_subquery(sq) for sq in subqueries],
+            desc="Fetching listings",
+        )
+
+    # Extract listing identifiers from results
+    listing_ids: list[int] = []
+    for subquery_results in all_results:
+        for response_json in subquery_results:
+            if not response_json:
+                continue
+            if response_json.get("totalAvailableResults", 0) == 0:
+                continue
+            for property_data in response_json.get("properties", []):
+                identifier = property_data.get("identifier")
+                if identifier:
+                    listing_ids.append(identifier)
+
+    logger.info(f"Found {len(listing_ids)} total listings")
+
+    # Deduplicate
+    unique_ids = list(set(listing_ids))
+    logger.info(f"After deduplication: {len(unique_ids)} unique listings")
+
+    # Filter out listings already in database
+    all_listing_ids = [x.id for x in await repository.get_listings()]
+    missing_ids = [
+        listing_id for listing_id in unique_ids if listing_id not in all_listing_ids
+    ]
+
+    listing_processor = ListingProcessor(repository)
+    logger.info(f"Starting processing {len(missing_ids)} new listings")
+    processed_listings = await tqdm.gather(
+        *[listing_processor.process_listing(id) for id in missing_ids]
+    )
+    filtered_listings = [x for x in processed_listings if x is not None]
+
+    return filtered_listings
diff --git a/crawler/services/query_splitter.py b/crawler/services/query_splitter.py
new file mode 100644
index 0000000..0609634
--- /dev/null
+++ b/crawler/services/query_splitter.py
@@ -0,0 +1,303 @@
+"""Query splitting service for handling Rightmove's result cap.
+
+This module provides intelligent query splitting to work around Rightmove's
+~1,500 listing cap per search. It adaptively splits queries by price bands
+based on actual result counts.
+"""
+from __future__ import annotations
+
+import asyncio
+import logging
+from dataclasses import dataclass, replace
+from typing import Any
+
+import aiohttp
+
+from config.scraper_config import ScraperConfig
+from models.listing import ListingType, QueryParameters
+from rec.districts import get_districts
+
+logger = logging.getLogger("uvicorn.error")
+
+
+@dataclass
+class SubQuery:
+    """Represents a single query subdivision.
+
+    Attributes:
+        district: District identifier string.
+        min_bedrooms: Minimum number of bedrooms.
+        max_bedrooms: Maximum number of bedrooms.
+        min_price: Minimum price in currency units.
+        max_price: Maximum price in currency units.
+        estimated_results: Cached result count from probing (None if not probed).
+    """
+
+    district: str
+    min_bedrooms: int
+    max_bedrooms: int
+    min_price: int
+    max_price: int
+    estimated_results: int | None = None
+
+    @property
+    def price_range(self) -> int:
+        """Returns the width of the price band."""
+        return self.max_price - self.min_price
+
+
+class QuerySplitter:
+    """Splits large queries into smaller subqueries to avoid result caps.
+
+    Uses adaptive binary search on price ranges to find optimal subdivisions
+    that keep each subquery under the result threshold.
+    """
+
+    def __init__(self, config: ScraperConfig | None = None) -> None:
+        """Initialize the splitter with configuration.
+
+        Args:
+            config: Scraper configuration. Loads from environment if not provided.
+        """
+        self.config = config or ScraperConfig.from_env()
+
+    def create_initial_subqueries(
+        self,
+        parameters: QueryParameters,
+        districts: dict[str, str],
+    ) -> list[SubQuery]:
+        """Create initial subqueries by splitting on district and bedrooms.
+
+        This creates the initial split before probing for result counts.
+        Each bedroom count gets its own subquery to enable finer-grained splitting.
+
+        Args:
+            parameters: Original query parameters.
+            districts: Dictionary of district name to location ID.
+
+        Returns:
+            List of initial SubQuery objects.
+        """
+        subqueries: list[SubQuery] = []
+
+        for district in districts.keys():
+            for num_bedrooms in range(
+                parameters.min_bedrooms, parameters.max_bedrooms + 1
+            ):
+                subqueries.append(
+                    SubQuery(
+                        district=district,
+                        min_bedrooms=num_bedrooms,
+                        max_bedrooms=num_bedrooms,
+                        min_price=parameters.min_price,
+                        max_price=parameters.max_price,
+                    )
+                )
+
+        return subqueries
+
+    async def probe_result_count(
+        self,
+        subquery: SubQuery,
+        session: aiohttp.ClientSession,
+        parameters: QueryParameters,
+    ) -> int:
+        """Probe the API to get the total result count for a subquery.
+
+        Makes a minimal request (page_size=1) to get totalAvailableResults.
+
+        Args:
+            subquery: The subquery to probe.
+            session: aiohttp session for making requests.
+            parameters: Original query parameters for additional settings.
+
+        Returns:
+            Total available results for this subquery.
+        """
+        from rec.query import probe_query
+
+        try:
+            result = await probe_query(
+                session=session,
+                channel=parameters.listing_type,
+                min_bedrooms=subquery.min_bedrooms,
+                max_bedrooms=subquery.max_bedrooms,
+                radius=parameters.radius,
+                min_price=subquery.min_price,
+                max_price=subquery.max_price,
+                district=subquery.district,
+                max_days_since_added=parameters.max_days_since_added,
+                furnish_types=parameters.furnish_types or [],
+            )
+            return result.get("totalAvailableResults", 0)
+        except Exception as e:
+            logger.warning(f"Failed to probe subquery {subquery}: {e}")
+            return 0
+
+    def split_by_price(self, subquery: SubQuery) -> list[SubQuery]:
+        """Split a subquery into two by halving the price range.
+
+        Args:
+            subquery: The subquery to split.
+
+        Returns:
+            List of two subqueries covering the same price range.
+        """
+        mid_price = (subquery.min_price + subquery.max_price) // 2
+
+        return [
+            replace(
+                subquery,
+                max_price=mid_price,
+                estimated_results=None,
+            ),
+            replace(
+                subquery,
+                min_price=mid_price,
+                estimated_results=None,
+            ),
+        ]
+
+    async def adaptive_split(
+        self,
+        subquery: SubQuery,
+        session: aiohttp.ClientSession,
+        parameters: QueryParameters,
+        semaphore: asyncio.Semaphore,
+    ) -> list[SubQuery]:
+        """Recursively split a subquery until all parts are under threshold.
+
+        Uses binary search on price range to find optimal splits.
+
+        Args:
+            subquery: The subquery to split.
+            session: aiohttp session for making requests.
+            parameters: Original query parameters.
+            semaphore: Semaphore for rate limiting.
+
+        Returns:
+            List of subqueries that are all under the split threshold.
+        """
+        # Check if we can split further
+        if subquery.price_range <= self.config.min_price_band:
+            logger.warning(
+                f"Cannot split further, price band at minimum: {subquery}"
+            )
+            return [subquery]
+
+        # Split into two halves
+        halves = self.split_by_price(subquery)
+        result: list[SubQuery] = []
+
+        for half in halves:
+            async with semaphore:
+                await asyncio.sleep(self.config.request_delay_ms / 1000)
+                count = await self.probe_result_count(half, session, parameters)
+
+            half = replace(half, estimated_results=count)
+
+            if count > self.config.split_threshold:
+                # Need to split further
+                result.extend(
+                    await self.adaptive_split(
+                        half, session, parameters, semaphore
+                    )
+                )
+            else:
+                result.append(half)
+
+        return result
+
+    async def split(
+        self,
+        parameters: QueryParameters,
+        session: aiohttp.ClientSession,
+        on_progress: Any = None,
+    ) -> list[SubQuery]:
+        """Split query parameters into optimized subqueries.
+
+        Performs the full splitting algorithm:
+        1. Create initial splits by district and bedroom count
+        2. Probe each to get result counts
+        3. Adaptively split any that exceed the threshold
+
+        Args:
+            parameters: Original query parameters to split.
+            session: aiohttp session for making requests.
+            on_progress: Optional callback for progress updates.
+
+        Returns:
+            List of SubQuery objects, each under the result threshold.
+        """
+        # Get valid districts
+        if parameters.district_names:
+            districts = {
+                district: locid
+                for district, locid in get_districts().items()
+                if district in parameters.district_names
+            }
+        else:
+            districts = get_districts()
+
+        # Phase 1: Create initial subqueries
+        initial_subqueries = self.create_initial_subqueries(parameters, districts)
+        logger.info(f"Created {len(initial_subqueries)} initial subqueries")
+
+        if on_progress:
+            on_progress(
+                phase="splitting",
+                message=f"Created {len(initial_subqueries)} initial subqueries",
+            )
+
+        # Phase 2: Probe and adaptively split
+        semaphore = asyncio.Semaphore(self.config.max_concurrent_requests)
+        refined_subqueries: list[SubQuery] = []
+
+        # Probe all initial subqueries in parallel
+        async def probe_and_split(sq: SubQuery) -> list[SubQuery]:
+            async with semaphore:
+                await asyncio.sleep(self.config.request_delay_ms / 1000)
+                count = await self.probe_result_count(sq, session, parameters)
+
+            sq = replace(sq, estimated_results=count)
+
+            if count > self.config.split_threshold:
+                logger.info(
+                    f"Subquery {sq.district}/{sq.min_bedrooms}BR "
+                    f"has {count} results, splitting..."
+                )
+                return await self.adaptive_split(
+                    sq, session, parameters, semaphore
+                )
+            return [sq]
+
+        tasks = [probe_and_split(sq) for sq in initial_subqueries]
+        results = await asyncio.gather(*tasks)
+
+        for subquery_list in results:
+            refined_subqueries.extend(subquery_list)
+
+        logger.info(
+            f"Refined to {len(refined_subqueries)} subqueries after splitting"
+        )
+
+        if on_progress:
+            on_progress(
+                phase="splitting_complete",
+                message=f"Refined to {len(refined_subqueries)} subqueries",
+            )
+
+        return refined_subqueries
+
+    def calculate_total_estimated_results(
+        self, subqueries: list[SubQuery]
+    ) -> int:
+        """Calculate total estimated results across all subqueries.
+
+        Args:
+            subqueries: List of subqueries with estimated_results set.
+
+        Returns:
+            Sum of all estimated results.
+        """
+        return sum(sq.estimated_results or 0 for sq in subqueries)
diff --git a/crawler/tasks/listing_tasks.py b/crawler/tasks/listing_tasks.py
index f86f89e..1fb3041 100644
--- a/crawler/tasks/listing_tasks.py
+++ b/crawler/tasks/listing_tasks.py
@@ -1,18 +1,17 @@
 import asyncio
-import itertools
 import logging
 from typing import Any
 from celery import Task
 from celery.schedules import crontab
 from celery_app import app
 from config.schedule_config import SchedulesConfig
+from config.scraper_config import ScraperConfig
 from listing_processor import ListingProcessor
 from models.listing import Listing, QueryParameters
-from rec.districts import get_districts
-from rec.query import listing_query
+from rec.query import create_session, listing_query
 from repositories.listing_repository import ListingRepository
 from database import engine
-from services import image_fetcher, floorplan_detector
+from services.query_splitter import QuerySplitter, SubQuery
 from utils.redis_lock import redis_lock
 
 logger = logging.getLogger("uvicorn.error")
@@ -134,106 +133,138 @@ async def get_ids_to_process(
     repository: ListingRepository,
     task: Task,
 ) -> set[int]:
-    semaphore = asyncio.Semaphore(5)  # if too high, rightmove drops connections
-    districts = await get_valid_districts_to_scrape(parameters.district_names)
-    task.update_state(state="Fetching listings to scrape", meta={"progress": 0})
-    json_responses: list[list[dict[str, Any]]] = await asyncio.gather(
-        *[
-            _fetch_listings_with_semaphore(
-                task=task, semaphore=semaphore, parameters=parameters, district=district
-            )
-            for district in districts.keys()
-        ],
-    )
-    json_responses_flat = list(itertools.chain.from_iterable(json_responses))
-    logger.debug(f"Total listings fetched {len(json_responses_flat)}")
+    """Fetch all listing IDs using intelligent query splitting.
 
-    identifiers: set[int] = set()
-    for response_json in json_responses_flat:
-        if response_json == {}:
-            continue
-        if response_json["totalAvailableResults"] == 0:
-            continue
-        for property in response_json["properties"]:
-            identifier = property["identifier"]
-            identifiers.add(identifier)
+    Uses the QuerySplitter to adaptively split large queries and maximize
+    data extraction while respecting Rightmove's result caps.
 
-    # if listing is already in db, do not fetch details again
-    all_listing_ids = {l.id for l in await repository.get_listings()}
-    new_ids = identifiers - all_listing_ids
-    return new_ids
+    Args:
+        parameters: Query parameters for the search.
+        repository: Repository for checking existing listings.
+        task: Celery task for progress updates.
 
+    Returns:
+        Set of new listing IDs that need to be processed.
+    """
+    config = ScraperConfig.from_env()
+    splitter = QuerySplitter(config)
 
-async def get_valid_districts_to_scrape(
-    district_names: set[str] | None,
-) -> dict[str, str]:
-    if district_names:
-        districts = {
-            district: locid
-            for district, locid in get_districts().items()
-            if district in district_names
-        }
-    else:
-        districts = get_districts()
-    return districts
+    def on_progress(phase: str, message: str) -> None:
+        task.update_state(state=message, meta={"phase": phase})
 
-
-async def _fetch_listings_with_semaphore(
-    *,
-    task: Task,
-    semaphore: asyncio.Semaphore,
-    parameters: QueryParameters,
-    district: str,
-) -> list[dict[str, Any]]:
-    result = []
-    # split the price in N bands to avoid the 1.5k capping by rightmove
-    # basically instead of 1 query with price between 1k and 5k that is capped at 1500 results
-    # we do 10 queries each with an increment in price range so we send more queries but each
-    # has a smaller chance of returning more than 1.5k results
-
-    number_of_steps = 10
-    price_step = parameters.max_price // number_of_steps
-
-    for step in range(number_of_steps):
+    async with create_session(config) as session:
+        # Phase 1 & 2: Split and probe queries
         task.update_state(
-            state=f"Fetching listings ({step} out of {number_of_steps})",
-            meta={"progress": step / number_of_steps},
+            state="Analyzing query and splitting by price bands...",
+            meta={"phase": "splitting", "progress": 0},
         )
-        min_price = step * price_step
-        max_price = (step + 1) * price_step
-        logger.debug(
-            f"Step {step} of {number_of_steps} with {min_price=} and {max_price=}"
+        subqueries = await splitter.split(parameters, session, on_progress)
+
+        total_estimated = splitter.calculate_total_estimated_results(subqueries)
+        logger.info(
+            f"Split into {len(subqueries)} subqueries, "
+            f"estimated {total_estimated} total results"
         )
 
-        for num_bedrooms in range(parameters.min_bedrooms, parameters.max_bedrooms + 1):
-            for page_id in range(
-                1,
-                3,  # seems like all searches stop at 1500 entries (page_id * page_size)
-            ):
-                logger.debug(f"Processing {page_id=} for {district=}")
+        # Phase 3: Fetch all pages for each subquery
+        task.update_state(
+            state=f"Fetching listings from {len(subqueries)} subqueries...",
+            meta={
+                "phase": "fetching",
+                "subqueries": len(subqueries),
+                "estimated_results": total_estimated,
+            },
+        )
 
+        semaphore = asyncio.Semaphore(config.max_concurrent_requests)
+        identifiers: set[int] = set()
+
+        async def fetch_subquery(sq: SubQuery) -> list[dict[str, Any]]:
+            """Fetch all pages for a single subquery."""
+            results: list[dict[str, Any]] = []
+
+            # Calculate how many pages we need based on estimated results
+            estimated = sq.estimated_results or 0
+            if estimated == 0:
+                return results
+
+            # Fetch pages up to max_pages_per_query or until no more results
+            page_size = parameters.page_size
+            max_pages = min(
+                config.max_pages_per_query,
+                (estimated // page_size) + 1,
+            )
+
+            for page_id in range(1, max_pages + 1):
                 async with semaphore:
+                    await asyncio.sleep(config.request_delay_ms / 1000)
                     try:
-                        listing_query_result = await listing_query(
+                        result = await listing_query(
                             page=page_id,
                             channel=parameters.listing_type,
-                            # min_bedrooms=parameters.min_bedrooms,
-                            # max_bedrooms=parameters.max_bedrooms,
-                            min_bedrooms=num_bedrooms,
-                            max_bedrooms=num_bedrooms,
+                            min_bedrooms=sq.min_bedrooms,
+                            max_bedrooms=sq.max_bedrooms,
                             radius=parameters.radius,
-                            min_price=min_price,
-                            max_price=max_price,
-                            district=district,
-                            page_size=parameters.page_size,
+                            min_price=sq.min_price,
+                            max_price=sq.max_price,
+                            district=sq.district,
+                            page_size=page_size,
                             max_days_since_added=parameters.max_days_since_added,
                             furnish_types=parameters.furnish_types or [],
+                            session=session,
                         )
+                        results.append(result)
+
+                        # Check if we've received all results
+                        properties = result.get("properties", [])
+                        if len(properties) < page_size:
+                            # No more results on next page
+                            break
 
                     except Exception as e:
-                        if "GENERIC_ERROR" in str(e):  # Too big page id
-                            logger.debug(f"Max page id for {district=}: {page_id-1}")
+                        if "GENERIC_ERROR" in str(e):
+                            # Reached end of results
+                            logger.debug(
+                                f"Max page for {sq.district}: {page_id - 1}"
+                            )
                             break
-                        raise e
-                result.append(listing_query_result)
-    return result
+                        logger.warning(
+                            f"Error fetching page {page_id} for {sq.district}: {e}"
+                        )
+                        break
+
+            return results
+
+        # Fetch all subqueries concurrently
+        all_results = await asyncio.gather(
+            *[fetch_subquery(sq) for sq in subqueries]
+        )
+
+        # Extract identifiers from all results
+        for subquery_results in all_results:
+            for response_json in subquery_results:
+                if not response_json:
+                    continue
+                if response_json.get("totalAvailableResults", 0) == 0:
+                    continue
+                for property_data in response_json.get("properties", []):
+                    identifier = property_data.get("identifier")
+                    if identifier:
+                        identifiers.add(identifier)
+
+    logger.info(f"Found {len(identifiers)} unique listings")
+
+    # Filter out listings already in the database
+    all_listing_ids = {l.id for l in await repository.get_listings()}
+    new_ids = identifiers - all_listing_ids
+
+    task.update_state(
+        state=f"Found {len(new_ids)} new listings to process",
+        meta={
+            "phase": "filtering",
+            "total_found": len(identifiers),
+            "new_listings": len(new_ids),
+        },
+    )
+
+    return new_ids
diff --git a/crawler/tests/unit/test_query_splitter.py b/crawler/tests/unit/test_query_splitter.py
new file mode 100644
index 0000000..467f234
--- /dev/null
+++ b/crawler/tests/unit/test_query_splitter.py
@@ -0,0 +1,374 @@
+"""Unit tests for QuerySplitter service."""
+import pytest
+from unittest.mock import AsyncMock, patch
+
+from config.scraper_config import ScraperConfig
+from models.listing import ListingType, QueryParameters
+from services.query_splitter import QuerySplitter, SubQuery
+
+
+class TestScraperConfig:
+    """Tests for the ScraperConfig dataclass."""
+
+    def test_default_values(self) -> None:
+        """Test that default values are set correctly."""
+        config = ScraperConfig()
+        assert config.max_concurrent_requests == 5
+        assert config.request_delay_ms == 100
+        assert config.result_cap == 1500
+        assert config.split_threshold == 1200
+        assert config.min_price_band == 100
+        assert config.max_pages_per_query == 60
+        assert config.proxy_url is None
+
+    def test_from_env(self) -> None:
+        """Test loading configuration from environment variables."""
+        with patch.dict(
+            "os.environ",
+            {
+                "RIGHTMOVE_MAX_CONCURRENT": "10",
+                "RIGHTMOVE_REQUEST_DELAY_MS": "200",
+                "RIGHTMOVE_SPLIT_THRESHOLD": "1000",
+                "RIGHTMOVE_MIN_PRICE_BAND": "50",
+                "RIGHTMOVE_MAX_PAGES": "30",
+                "RIGHTMOVE_PROXY_URL": "socks5://localhost:9050",
+            },
+        ):
+            config = ScraperConfig.from_env()
+            assert config.max_concurrent_requests == 10
+            assert config.request_delay_ms == 200
+            assert config.split_threshold == 1000
+            assert config.min_price_band == 50
+            assert config.max_pages_per_query == 30
+            assert config.proxy_url == "socks5://localhost:9050"
+
+    def test_from_env_empty_proxy(self) -> None:
+        """Test that empty proxy URL is converted to None."""
+        with patch.dict(
+            "os.environ",
+            {
+                "RIGHTMOVE_PROXY_URL": "",
+            },
+            clear=False,
+        ):
+            config = ScraperConfig.from_env()
+            assert config.proxy_url is None
+
+
+class TestSubQuery:
+    """Tests for the SubQuery dataclass."""
+
+    def test_price_range_calculation(self) -> None:
+        """Test that price_range is calculated correctly."""
+        sq = SubQuery(
+            district="Kings Cross",
+            min_bedrooms=2,
+            max_bedrooms=2,
+            min_price=1000,
+            max_price=2000,
+        )
+        assert sq.price_range == 1000
+
+
+class TestQuerySplitter:
+    """Tests for the QuerySplitter class."""
+
+    @pytest.fixture
+    def config(self) -> ScraperConfig:
+        """Create a test configuration."""
+        return ScraperConfig(
+            max_concurrent_requests=5,
+            request_delay_ms=10,  # Faster for testing
+            result_cap=1500,
+            split_threshold=1200,
+            min_price_band=100,
+            max_pages_per_query=60,
+            proxy_url=None,
+        )
+
+    @pytest.fixture
+    def splitter(self, config: ScraperConfig) -> QuerySplitter:
+        """Create a QuerySplitter instance."""
+        return QuerySplitter(config)
+
+    @pytest.fixture
+    def parameters(self) -> QueryParameters:
+        """Create test query parameters."""
+        return QueryParameters(
+            listing_type=ListingType.RENT,
+            min_bedrooms=2,
+            max_bedrooms=3,
+            min_price=1000,
+            max_price=5000,
+            district_names={"Kings Cross", "Angel"},
+        )
+
+    def test_create_initial_subqueries(
+        self, splitter: QuerySplitter, parameters: QueryParameters
+    ) -> None:
+        """Test that initial subqueries are created correctly."""
+        districts = {"Kings Cross": "STATION^5168", "Angel": "STATION^1234"}
+
+        subqueries = splitter.create_initial_subqueries(parameters, districts)
+
+        # 2 districts × 2 bedroom counts (2,3) = 4 subqueries
+        assert len(subqueries) == 4
+
+        # Check first subquery
+        assert subqueries[0].district == "Kings Cross"
+        assert subqueries[0].min_bedrooms == 2
+        assert subqueries[0].max_bedrooms == 2
+        assert subqueries[0].min_price == 1000
+        assert subqueries[0].max_price == 5000
+
+    def test_split_by_price(self, splitter: QuerySplitter) -> None:
+        """Test that price splitting works correctly."""
+        sq = SubQuery(
+            district="Kings Cross",
+            min_bedrooms=2,
+            max_bedrooms=2,
+            min_price=1000,
+            max_price=5000,
+        )
+
+        halves = splitter.split_by_price(sq)
+
+        assert len(halves) == 2
+        assert halves[0].min_price == 1000
+        assert halves[0].max_price == 3000  # midpoint
+        assert halves[1].min_price == 3000
+        assert halves[1].max_price == 5000
+
+        # Both should have same bedroom range and district
+        for half in halves:
+            assert half.district == "Kings Cross"
+            assert half.min_bedrooms == 2
+            assert half.max_bedrooms == 2
+
+    @pytest.mark.asyncio
+    async def test_probe_result_count(
+        self, splitter: QuerySplitter, parameters: QueryParameters
+    ) -> None:
+        """Test probing API for result count."""
+        sq = SubQuery(
+            district="Kings Cross",
+            min_bedrooms=2,
+            max_bedrooms=2,
+            min_price=1000,
+            max_price=5000,
+        )
+
+        mock_session = AsyncMock()
+
+        # Mock the probe_query function
+        with patch("services.query_splitter.probe_query") as mock_probe:
+            mock_probe.return_value = {"totalAvailableResults": 800}
+
+            count = await splitter.probe_result_count(sq, mock_session, parameters)
+
+            assert count == 800
+            mock_probe.assert_called_once()
+
+    @pytest.mark.asyncio
+    async def test_probe_result_count_handles_error(
+        self, splitter: QuerySplitter, parameters: QueryParameters
+    ) -> None:
+        """Test that probe_result_count handles errors gracefully."""
+        sq = SubQuery(
+            district="Kings Cross",
+            min_bedrooms=2,
+            max_bedrooms=2,
+            min_price=1000,
+            max_price=5000,
+        )
+
+        mock_session = AsyncMock()
+
+        with patch("services.query_splitter.probe_query") as mock_probe:
+            mock_probe.side_effect = Exception("API error")
+
+            count = await splitter.probe_result_count(sq, mock_session, parameters)
+
+            # Should return 0 on error
+            assert count == 0
+
+    @pytest.mark.asyncio
+    async def test_adaptive_split_no_split_needed(
+        self, splitter: QuerySplitter, parameters: QueryParameters
+    ) -> None:
+        """Test adaptive split when results are below threshold."""
+        sq = SubQuery(
+            district="Kings Cross",
+            min_bedrooms=2,
+            max_bedrooms=2,
+            min_price=1000,
+            max_price=2000,
+        )
+
+        mock_session = AsyncMock()
+        mock_semaphore = AsyncMock()
+
+        with patch("services.query_splitter.probe_query") as mock_probe:
+            # First half has 600 results, second half has 500
+            mock_probe.side_effect = [
+                {"totalAvailableResults": 600},
+                {"totalAvailableResults": 500},
+            ]
+
+            result = await splitter.adaptive_split(
+                sq, mock_session, parameters, mock_semaphore
+            )
+
+            # Both halves are under threshold (1200), so we get 2 subqueries back
+            assert len(result) == 2
+            assert result[0].estimated_results == 600
+            assert result[1].estimated_results == 500
+
+    @pytest.mark.asyncio
+    async def test_adaptive_split_recursive_splitting(
+        self, splitter: QuerySplitter, parameters: QueryParameters
+    ) -> None:
+        """Test adaptive split performs recursive splitting when needed."""
+        sq = SubQuery(
+            district="Kings Cross",
+            min_bedrooms=2,
+            max_bedrooms=2,
+            min_price=1000,
+            max_price=5000,
+        )
+
+        mock_session = AsyncMock()
+        mock_semaphore = AsyncMock()
+
+        with patch("services.query_splitter.probe_query") as mock_probe:
+            # First split: 1000-3000 has 1300 (over threshold), 3000-5000 has 800
+            # Second split of 1000-3000: 1000-2000 has 700, 2000-3000 has 600
+            mock_probe.side_effect = [
+                {"totalAvailableResults": 1300},  # First half - needs more splitting
+                {"totalAvailableResults": 800},  # Second half - OK
+                {"totalAvailableResults": 700},  # First quarter - OK
+                {"totalAvailableResults": 600},  # Second quarter - OK
+            ]
+
+            result = await splitter.adaptive_split(
+                sq, mock_session, parameters, mock_semaphore
+            )
+
+            # Should get 3 subqueries: [1000-2000 (700), 2000-3000 (600), 3000-5000 (800)]
+            assert len(result) == 3
+
+    @pytest.mark.asyncio
+    async def test_adaptive_split_respects_min_price_band(
+        self, splitter: QuerySplitter, parameters: QueryParameters
+    ) -> None:
+        """Test that adaptive split stops at min_price_band."""
+        sq = SubQuery(
+            district="Kings Cross",
+            min_bedrooms=2,
+            max_bedrooms=2,
+            min_price=1000,
+            max_price=1050,  # Only 50 range, below min_price_band of 100
+            estimated_results=1500,  # Over threshold but can't split
+        )
+
+        mock_session = AsyncMock()
+        mock_semaphore = AsyncMock()
+
+        result = await splitter.adaptive_split(
+            sq, mock_session, parameters, mock_semaphore
+        )
+
+        # Can't split below min_price_band, should return original
+        assert len(result) == 1
+        assert result[0].min_price == 1000
+        assert result[0].max_price == 1050
+
+    def test_calculate_total_estimated_results(
+        self, splitter: QuerySplitter
+    ) -> None:
+        """Test calculation of total estimated results."""
+        subqueries = [
+            SubQuery(
+                district="Kings Cross",
+                min_bedrooms=2,
+                max_bedrooms=2,
+                min_price=1000,
+                max_price=2000,
+                estimated_results=500,
+            ),
+            SubQuery(
+                district="Kings Cross",
+                min_bedrooms=3,
+                max_bedrooms=3,
+                min_price=1000,
+                max_price=2000,
+                estimated_results=300,
+            ),
+            SubQuery(
+                district="Angel",
+                min_bedrooms=2,
+                max_bedrooms=2,
+                min_price=1000,
+                max_price=2000,
+                estimated_results=None,  # Not probed
+            ),
+        ]
+
+        total = splitter.calculate_total_estimated_results(subqueries)
+        assert total == 800  # 500 + 300 + 0
+
+    @pytest.mark.asyncio
+    async def test_split_integration(
+        self, splitter: QuerySplitter, parameters: QueryParameters
+    ) -> None:
+        """Integration test for the full split workflow."""
+        mock_session = AsyncMock()
+        mock_districts = {"Kings Cross": "STATION^5168", "Angel": "STATION^1234"}
+
+        with patch("services.query_splitter.get_districts", return_value=mock_districts):
+            with patch("services.query_splitter.probe_query") as mock_probe:
+                # Mock probe results for each initial subquery
+                # 2 districts × 2 bedroom counts = 4 initial subqueries
+                mock_probe.side_effect = [
+                    {"totalAvailableResults": 500},  # KC 2BR - OK
+                    {"totalAvailableResults": 1300},  # KC 3BR - needs split
+                    {"totalAvailableResults": 600},  # Angel 2BR - OK
+                    {"totalAvailableResults": 800},  # Angel 3BR - OK
+                    # Split KC 3BR
+                    {"totalAvailableResults": 700},  # KC 3BR first half
+                    {"totalAvailableResults": 600},  # KC 3BR second half
+                ]
+
+                result = await splitter.split(parameters, mock_session)
+
+                # Should have 5 subqueries total:
+                # KC 2BR (500), KC 3BR split into 2 (700+600), Angel 2BR (600), Angel 3BR (800)
+                assert len(result) == 5
+
+                # Verify total estimated results
+                total = splitter.calculate_total_estimated_results(result)
+                assert total == 3200  # 500 + 700 + 600 + 600 + 800
+
+    @pytest.mark.asyncio
+    async def test_split_with_on_progress_callback(
+        self, splitter: QuerySplitter, parameters: QueryParameters
+    ) -> None:
+        """Test that on_progress callback is called during split."""
+        mock_session = AsyncMock()
+        mock_districts = {"Kings Cross": "STATION^5168", "Angel": "STATION^1234"}
+        progress_calls = []
+
+        def on_progress(phase: str, message: str) -> None:
+            progress_calls.append((phase, message))
+
+        with patch("services.query_splitter.get_districts", return_value=mock_districts):
+            with patch("services.query_splitter.probe_query") as mock_probe:
+                mock_probe.return_value = {"totalAvailableResults": 500}
+
+                await splitter.split(parameters, mock_session, on_progress)
+
+                # Should have received at least 2 progress updates
+                assert len(progress_calls) >= 2
+                phases = [call[0] for call in progress_calls]
+                assert "splitting" in phases
+                assert "splitting_complete" in phases

From f880664a98a0b1b0096d8f59d3acbfb8d4ba49f5 Mon Sep 17 00:00:00 2001
From: Viktor Barzin <vbarzin@gmail.com>
Date: Mon, 2 Feb 2026 22:50:19 +0000
Subject: [PATCH 3/5] Add throttling detection and circuit breaker for
 Rightmove scraper

---
 crawler/.env.sample                           |   6 +
 crawler/config/scraper_config.py              |  24 ++
 crawler/rec/circuit_breaker.py                | 137 +++++++
 crawler/rec/exceptions.py                     |  74 ++++
 crawler/rec/query.py                          | 239 +++++++++++--
 crawler/rec/throttle_detector.py              | 232 ++++++++++++
 crawler/services/listing_fetcher.py           | 144 ++++----
 crawler/services/query_splitter.py            |  13 +
 .../integration/test_throttle_integration.py  | 311 ++++++++++++++++
 crawler/tests/unit/test_throttle_detection.py | 334 ++++++++++++++++++
 10 files changed, 1428 insertions(+), 86 deletions(-)
 create mode 100644 crawler/rec/circuit_breaker.py
 create mode 100644 crawler/rec/exceptions.py
 create mode 100644 crawler/rec/throttle_detector.py
 create mode 100644 crawler/tests/integration/test_throttle_integration.py
 create mode 100644 crawler/tests/unit/test_throttle_detection.py

diff --git a/crawler/.env.sample b/crawler/.env.sample
index aa1c93d..b709cde 100644
--- a/crawler/.env.sample
+++ b/crawler/.env.sample
@@ -16,6 +16,12 @@ RIGHTMOVE_MIN_PRICE_BAND=100      # Minimum price band width (won't split below
 RIGHTMOVE_MAX_PAGES=60            # Max pages per subquery (60 * 25 = 1500 max results)
 RIGHTMOVE_PROXY_URL=              # Optional SOCKS proxy URL (e.g., socks5://localhost:9050 for Tor)
 
+# Throttling detection and circuit breaker
+RIGHTMOVE_SLOW_RESPONSE_THRESHOLD=10.0  # Response time threshold in seconds
+RIGHTMOVE_ENABLE_CIRCUIT_BREAKER=true   # Enable circuit breaker protection
+RIGHTMOVE_CIRCUIT_BREAKER_FAILURES=5    # Consecutive failures to open circuit
+RIGHTMOVE_CIRCUIT_BREAKER_TIMEOUT=60.0  # Seconds to wait before recovery attempt
+
 # Periodic scraping schedules (JSON array)
 # Each schedule has: name, enabled, hour, minute, day_of_week, listing_type, min/max_bedrooms, min/max_price, district_names, furnish_types
 # Cron fields: minute (0-59), hour (0-23), day_of_week (0-6, 0=Sunday)
diff --git a/crawler/config/scraper_config.py b/crawler/config/scraper_config.py
index e84c1d5..860d343 100644
--- a/crawler/config/scraper_config.py
+++ b/crawler/config/scraper_config.py
@@ -18,6 +18,10 @@ class ScraperConfig:
         min_price_band: Minimum width of a price band (won't split below this).
         max_pages_per_query: Maximum pages to fetch per subquery (60 * 25 = 1500).
         proxy_url: Optional SOCKS proxy URL (e.g., socks5://localhost:9050 for Tor).
+        slow_response_threshold: Response time threshold in seconds for throttle detection.
+        enable_circuit_breaker: Whether to enable circuit breaker protection.
+        circuit_breaker_failure_threshold: Number of consecutive failures to open circuit.
+        circuit_breaker_recovery_timeout: Seconds to wait before testing recovery.
     """
 
     max_concurrent_requests: int = 5
@@ -27,6 +31,10 @@ class ScraperConfig:
     min_price_band: int = 100  # Minimum band width in currency units
     max_pages_per_query: int = 60  # 60 * 25 = 1500 results max
     proxy_url: str | None = None
+    slow_response_threshold: float = 10.0  # seconds
+    enable_circuit_breaker: bool = True
+    circuit_breaker_failure_threshold: int = 5
+    circuit_breaker_recovery_timeout: float = 60.0
 
     @classmethod
     def from_env(cls) -> Self:
@@ -40,6 +48,10 @@ class ScraperConfig:
             RIGHTMOVE_MIN_PRICE_BAND: Minimum price band width (default: 100)
             RIGHTMOVE_MAX_PAGES: Max pages per query (default: 60)
             RIGHTMOVE_PROXY_URL: SOCKS proxy URL (default: None)
+            RIGHTMOVE_SLOW_RESPONSE_THRESHOLD: Slow response threshold in seconds (default: 10.0)
+            RIGHTMOVE_ENABLE_CIRCUIT_BREAKER: Enable circuit breaker (default: True)
+            RIGHTMOVE_CIRCUIT_BREAKER_FAILURES: Failures to open circuit (default: 5)
+            RIGHTMOVE_CIRCUIT_BREAKER_TIMEOUT: Recovery timeout in seconds (default: 60.0)
 
         Returns:
             ScraperConfig instance with values from environment or defaults.
@@ -62,4 +74,16 @@ class ScraperConfig:
                 os.environ.get("RIGHTMOVE_MAX_PAGES", "60")
             ),
             proxy_url=os.environ.get("RIGHTMOVE_PROXY_URL") or None,
+            slow_response_threshold=float(
+                os.environ.get("RIGHTMOVE_SLOW_RESPONSE_THRESHOLD", "10.0")
+            ),
+            enable_circuit_breaker=os.environ.get(
+                "RIGHTMOVE_ENABLE_CIRCUIT_BREAKER", "true"
+            ).lower() in ("true", "1", "yes"),
+            circuit_breaker_failure_threshold=int(
+                os.environ.get("RIGHTMOVE_CIRCUIT_BREAKER_FAILURES", "5")
+            ),
+            circuit_breaker_recovery_timeout=float(
+                os.environ.get("RIGHTMOVE_CIRCUIT_BREAKER_TIMEOUT", "60.0")
+            ),
         )
diff --git a/crawler/rec/circuit_breaker.py b/crawler/rec/circuit_breaker.py
new file mode 100644
index 0000000..27bf12f
--- /dev/null
+++ b/crawler/rec/circuit_breaker.py
@@ -0,0 +1,137 @@
+"""Circuit breaker pattern for protecting against cascading failures."""
+from __future__ import annotations
+
+import enum
+import logging
+import time
+from dataclasses import dataclass
+
+from rec.exceptions import CircuitBreakerOpenError
+
+logger = logging.getLogger("uvicorn.error")
+
+
+class CircuitState(enum.Enum):
+    """Circuit breaker states."""
+
+    CLOSED = "closed"  # Normal operation
+    OPEN = "open"  # Too many failures, blocking requests
+    HALF_OPEN = "half_open"  # Testing if service recovered
+
+
+@dataclass
+class CircuitBreaker:
+    """Circuit breaker for protecting against cascading failures.
+
+    Implements the circuit breaker pattern:
+    - CLOSED: Requests pass through normally, failures are counted
+    - OPEN: After N consecutive failures, circuit opens and blocks all requests
+    - HALF_OPEN: After recovery timeout, allow one request to test if service recovered
+
+    Attributes:
+        failure_threshold: Number of consecutive failures before opening.
+        recovery_timeout: Seconds to wait before attempting half-open state.
+        state: Current circuit state.
+        failure_count: Count of consecutive failures.
+        last_failure_time: Timestamp of last failure.
+        last_state_change: Timestamp of last state change.
+    """
+
+    failure_threshold: int
+    recovery_timeout: float
+    state: CircuitState = CircuitState.CLOSED
+    failure_count: int = 0
+    last_failure_time: float = 0.0
+    last_state_change: float = 0.0
+
+    def __post_init__(self) -> None:
+        """Initialize state change timestamp."""
+        self.last_state_change = time.time()
+
+    def call(self) -> None:
+        """Check if a request should be allowed.
+
+        Raises:
+            CircuitBreakerOpenError: If circuit is open and blocking requests.
+        """
+        current_time = time.time()
+
+        if self.state == CircuitState.OPEN:
+            # Check if we should transition to half-open
+            if current_time - self.last_failure_time >= self.recovery_timeout:
+                self._transition_to_half_open()
+            else:
+                raise CircuitBreakerOpenError(
+                    f"Circuit breaker is open. "
+                    f"Waiting {self.recovery_timeout - (current_time - self.last_failure_time):.1f}s "
+                    f"before retry."
+                )
+
+        # Allow request to proceed (CLOSED or HALF_OPEN)
+
+    def record_success(self) -> None:
+        """Record a successful request."""
+        if self.state == CircuitState.HALF_OPEN:
+            # Service has recovered, close the circuit
+            self._transition_to_closed()
+
+        # Reset failure count on success
+        self.failure_count = 0
+
+    def record_failure(self) -> None:
+        """Record a failed request."""
+        self.failure_count += 1
+        self.last_failure_time = time.time()
+
+        if self.state == CircuitState.HALF_OPEN:
+            # Test request failed, reopen circuit
+            self._transition_to_open()
+        elif self.state == CircuitState.CLOSED:
+            # Check if we should open the circuit
+            if self.failure_count >= self.failure_threshold:
+                self._transition_to_open()
+
+    def _transition_to_open(self) -> None:
+        """Transition to OPEN state."""
+        self.state = CircuitState.OPEN
+        self.last_state_change = time.time()
+        logger.warning(
+            f"Circuit breaker OPENED after {self.failure_count} consecutive failures. "
+            f"Will retry in {self.recovery_timeout}s"
+        )
+
+    def _transition_to_half_open(self) -> None:
+        """Transition to HALF_OPEN state."""
+        self.state = CircuitState.HALF_OPEN
+        self.last_state_change = time.time()
+        logger.info("Circuit breaker entering HALF_OPEN state, testing service recovery")
+
+    def _transition_to_closed(self) -> None:
+        """Transition to CLOSED state."""
+        self.state = CircuitState.CLOSED
+        self.last_state_change = time.time()
+        self.failure_count = 0
+        logger.info("Circuit breaker CLOSED, service recovered")
+
+    def reset(self) -> None:
+        """Manually reset the circuit breaker to CLOSED state."""
+        self.state = CircuitState.CLOSED
+        self.failure_count = 0
+        self.last_failure_time = 0.0
+        self.last_state_change = time.time()
+        logger.info("Circuit breaker manually reset to CLOSED state")
+
+    @property
+    def is_open(self) -> bool:
+        """Check if circuit is currently open."""
+        return self.state == CircuitState.OPEN
+
+    @property
+    def is_closed(self) -> bool:
+        """Check if circuit is currently closed."""
+        return self.state == CircuitState.CLOSED
+
+    @property
+    def is_half_open(self) -> bool:
+        """Check if circuit is currently half-open."""
+        return self.state == CircuitState.HALF_OPEN
diff --git a/crawler/rec/exceptions.py b/crawler/rec/exceptions.py
new file mode 100644
index 0000000..9d24e94
--- /dev/null
+++ b/crawler/rec/exceptions.py
@@ -0,0 +1,74 @@
+"""Custom exceptions for Rightmove API errors."""
+
+
+class RightmoveAPIError(Exception):
+    """Base exception for all Rightmove API errors."""
+
+    pass
+
+
+class ThrottlingError(RightmoveAPIError):
+    """Base exception for throttling-related errors.
+
+    Indicates that Rightmove is limiting our requests and we should back off.
+    """
+
+    pass
+
+
+class RateLimitError(ThrottlingError):
+    """HTTP 429 - Too Many Requests.
+
+    Rightmove is explicitly rate limiting our requests.
+    """
+
+    pass
+
+
+class ServiceUnavailableError(ThrottlingError):
+    """HTTP 503 - Service Unavailable.
+
+    Rightmove's service is temporarily unavailable, possibly due to overload.
+    """
+
+    pass
+
+
+class IPBlockedError(ThrottlingError):
+    """HTTP 403 - Forbidden (IP blocked).
+
+    Our IP may be blocked or blacklisted by Rightmove.
+    """
+
+    pass
+
+
+class SlowResponseError(ThrottlingError):
+    """Response time exceeded threshold.
+
+    API is responding very slowly, indicating potential throttling or overload.
+    """
+
+    pass
+
+
+class UnexpectedEmptyResponseError(RightmoveAPIError):
+    """Empty response received when data was expected."""
+
+    pass
+
+
+class InvalidResponseError(RightmoveAPIError):
+    """Response contains error messages or invalid data."""
+
+    pass
+
+
+class CircuitBreakerOpenError(RightmoveAPIError):
+    """Circuit breaker is open, requests are being blocked.
+
+    The circuit breaker has detected too many failures and is preventing
+    further requests to allow the service to recover.
+    """
+
+    pass
diff --git a/crawler/rec/query.py b/crawler/rec/query.py
index a2526a6..4de8a1a 100644
--- a/crawler/rec/query.py
+++ b/crawler/rec/query.py
@@ -1,4 +1,6 @@
 import enum
+import logging
+import time
 from typing import Any
 from contextlib import asynccontextmanager
 from collections.abc import AsyncIterator
@@ -6,9 +8,26 @@ from collections.abc import AsyncIterator
 import aiohttp
 from models.listing import FurnishType, ListingType
 from rec import districts
-from tenacity import retry, stop_after_attempt, wait_random
+from rec.exceptions import (
+    CircuitBreakerOpenError,
+    ThrottlingError,
+)
+from rec.throttle_detector import get_throttle_metrics, validate_response
+from rec.circuit_breaker import CircuitBreaker
+from tenacity import (
+    retry,
+    retry_if_exception_type,
+    stop_after_attempt,
+    wait_exponential,
+    wait_random,
+)
 from config.scraper_config import ScraperConfig
 
+logger = logging.getLogger("uvicorn.error")
+
+# Global circuit breaker instance
+_circuit_breaker: CircuitBreaker | None = None
+
 
 DEFAULT_HEADERS = {
     "Host": "api.rightmove.co.uk",
@@ -65,20 +84,81 @@ async def create_session(
         await session.close()
 
 
-@retry(wait=wait_random(min=1, max=2), stop=stop_after_attempt(3))
+def get_circuit_breaker(config: ScraperConfig | None = None) -> CircuitBreaker | None:
+    """Get the global circuit breaker instance.
+
+    Args:
+        config: Configuration for initializing the circuit breaker.
+
+    Returns:
+        CircuitBreaker instance if enabled, None otherwise.
+    """
+    global _circuit_breaker
+    if config is None:
+        config = ScraperConfig.from_env()
+
+    if not config.enable_circuit_breaker:
+        return None
+
+    if _circuit_breaker is None:
+        _circuit_breaker = CircuitBreaker(
+            failure_threshold=config.circuit_breaker_failure_threshold,
+            recovery_timeout=config.circuit_breaker_recovery_timeout,
+        )
+    return _circuit_breaker
+
+
+def reset_circuit_breaker() -> None:
+    """Reset the global circuit breaker."""
+    global _circuit_breaker
+    if _circuit_breaker is not None:
+        _circuit_breaker.reset()
+
+
+def check_circuit_breaker(config: ScraperConfig | None = None) -> None:
+    """Check if the circuit breaker allows requests.
+
+    Args:
+        config: Configuration for the circuit breaker.
+
+    Raises:
+        CircuitBreakerOpenError: If the circuit is open.
+    """
+    cb = get_circuit_breaker(config)
+    if cb is not None:
+        cb.call()
+
+
+@retry(
+    retry=retry_if_exception_type(ThrottlingError),
+    wait=wait_exponential(multiplier=2, min=2, max=120),
+    stop=stop_after_attempt(5),
+)
 async def detail_query(
     detail_id: int,
     session: aiohttp.ClientSession | None = None,
+    config: ScraperConfig | None = None,
 ) -> dict[str, Any]:
     """Fetch detailed property information.
 
     Args:
         detail_id: The property identifier.
         session: Optional aiohttp session. Creates new one if not provided.
+        config: Scraper configuration. Loads from environment if not provided.
 
     Returns:
         Property details as a dictionary.
+
+    Raises:
+        CircuitBreakerOpenError: If the circuit breaker is open.
+        ThrottlingError: If the request is throttled.
     """
+    if config is None:
+        config = ScraperConfig.from_env()
+
+    check_circuit_breaker(config)
+    cb = get_circuit_breaker(config)
+
     params = {
         "apiApplication": "ANDROID",
         "appVersion": "3.70.0",
@@ -86,13 +166,38 @@ async def detail_query(
     url = f"https://api.rightmove.co.uk/api/property/{detail_id}"
 
     async def do_request(s: aiohttp.ClientSession) -> dict[str, Any]:
-        async with s.get(url, params=params, headers=DEFAULT_HEADERS) as response:
-            if response.status != 200:
-                raise Exception(
-                    f"""id: {detail_id}. Status Code: {response.status}."""
-                    f"""Failed due to: {await response.text()}"""
+        start_time = time.time()
+        try:
+            async with s.get(url, params=params, headers=DEFAULT_HEADERS) as response:
+                response_time = time.time() - start_time
+                body = await response.json() if response.status == 200 else None
+
+                # Validate response for throttling
+                validate_response(
+                    response,
+                    response_time,
+                    body,
+                    config.slow_response_threshold,
+                    expect_data=True,
                 )
-            return await response.json()
+
+                if response.status != 200:
+                    raise Exception(
+                        f"""id: {detail_id}. Status Code: {response.status}."""
+                        f"""Failed due to: {await response.text()}"""
+                    )
+
+                if cb is not None:
+                    cb.record_success()
+                return body  # type: ignore
+        except ThrottlingError:
+            if cb is not None:
+                cb.record_failure()
+            raise
+        except Exception as e:
+            if cb is not None:
+                cb.record_failure()
+            raise e
 
     if session:
         return await do_request(session)
@@ -101,7 +206,11 @@ async def detail_query(
             return await do_request(new_session)
 
 
-@retry(wait=wait_random(min=1, max=60), stop=stop_after_attempt(3))
+@retry(
+    retry=retry_if_exception_type(ThrottlingError),
+    wait=wait_exponential(multiplier=2, min=2, max=120),
+    stop=stop_after_attempt(5),
+)
 async def listing_query(
     *,
     page: int,
@@ -118,6 +227,7 @@ async def listing_query(
     page_size: int = 25,
     furnish_types: list[FurnishType] = [],
     session: aiohttp.ClientSession | None = None,
+    config: ScraperConfig | None = None,
 ) -> dict[str, Any]:
     """Execute a listing search query.
 
@@ -136,10 +246,21 @@ async def listing_query(
         page_size: Number of results per page (default 25).
         furnish_types: List of furnish types to filter (RENT only).
         session: Optional aiohttp session. Creates new one if not provided.
+        config: Scraper configuration. Loads from environment if not provided.
 
     Returns:
         API response as a dictionary.
+
+    Raises:
+        CircuitBreakerOpenError: If the circuit breaker is open.
+        ThrottlingError: If the request is throttled.
     """
+    if config is None:
+        config = ScraperConfig.from_env()
+
+    check_circuit_breaker(config)
+    cb = get_circuit_breaker(config)
+
     params: dict[str, str] = {
         "locationIdentifier": districts.get_districts()[district],
         "channel": str(channel).upper(),
@@ -185,14 +306,39 @@ async def listing_query(
     }
 
     async def do_request(s: aiohttp.ClientSession) -> dict[str, Any]:
-        async with s.get(
-            "https://api.rightmove.co.uk/api/property-listing",
-            params=params,
-            headers=request_headers,
-        ) as response:
-            if response.status != 200:
-                raise Exception(f"Failed due to: {await response.text()}")
-            return await response.json()
+        start_time = time.time()
+        try:
+            async with s.get(
+                "https://api.rightmove.co.uk/api/property-listing",
+                params=params,
+                headers=request_headers,
+            ) as response:
+                response_time = time.time() - start_time
+                body = await response.json() if response.status == 200 else None
+
+                # Validate response for throttling
+                validate_response(
+                    response,
+                    response_time,
+                    body,
+                    config.slow_response_threshold,
+                    expect_data=(page == 1),  # Only expect data on first page
+                )
+
+                if response.status != 200:
+                    raise Exception(f"Failed due to: {await response.text()}")
+
+                if cb is not None:
+                    cb.record_success()
+                return body  # type: ignore
+        except ThrottlingError:
+            if cb is not None:
+                cb.record_failure()
+            raise
+        except Exception as e:
+            if cb is not None:
+                cb.record_failure()
+            raise e
 
     if session:
         return await do_request(session)
@@ -201,7 +347,11 @@ async def listing_query(
             return await do_request(new_session)
 
 
-@retry(wait=wait_random(min=1, max=10), stop=stop_after_attempt(3))
+@retry(
+    retry=retry_if_exception_type(ThrottlingError),
+    wait=wait_exponential(multiplier=2, min=2, max=60),
+    stop=stop_after_attempt(5),
+)
 async def probe_query(
     *,
     session: aiohttp.ClientSession,
@@ -214,6 +364,7 @@ async def probe_query(
     district: str,
     max_days_since_added: int = 30,
     furnish_types: list[FurnishType] = [],
+    config: ScraperConfig | None = None,
 ) -> dict[str, Any]:
     """Probe the API to get result count without fetching full results.
 
@@ -230,10 +381,21 @@ async def probe_query(
         district: District identifier string.
         max_days_since_added: Maximum days since listing was added (BUY only).
         furnish_types: List of furnish types to filter (RENT only).
+        config: Scraper configuration. Loads from environment if not provided.
 
     Returns:
         API response containing totalAvailableResults.
+
+    Raises:
+        CircuitBreakerOpenError: If the circuit breaker is open.
+        ThrottlingError: If the request is throttled.
     """
+    if config is None:
+        config = ScraperConfig.from_env()
+
+    check_circuit_breaker(config)
+    cb = get_circuit_breaker(config)
+
     params: dict[str, str] = {
         "locationIdentifier": districts.get_districts()[district],
         "channel": str(channel).upper(),
@@ -271,11 +433,36 @@ async def probe_query(
         "Connection": "keep-alive",
     }
 
-    async with session.get(
-        "https://api.rightmove.co.uk/api/property-listing",
-        params=params,
-        headers=request_headers,
-    ) as response:
-        if response.status != 200:
-            raise Exception(f"Probe failed: {await response.text()}")
-        return await response.json()
+    start_time = time.time()
+    try:
+        async with session.get(
+            "https://api.rightmove.co.uk/api/property-listing",
+            params=params,
+            headers=request_headers,
+        ) as response:
+            response_time = time.time() - start_time
+            body = await response.json() if response.status == 200 else None
+
+            # Validate response for throttling
+            validate_response(
+                response,
+                response_time,
+                body,
+                config.slow_response_threshold,
+                expect_data=False,  # Probe doesn't need data, just count
+            )
+
+            if response.status != 200:
+                raise Exception(f"Probe failed: {await response.text()}")
+
+            if cb is not None:
+                cb.record_success()
+            return body  # type: ignore
+    except ThrottlingError:
+        if cb is not None:
+            cb.record_failure()
+        raise
+    except Exception as e:
+        if cb is not None:
+            cb.record_failure()
+        raise e
diff --git a/crawler/rec/throttle_detector.py b/crawler/rec/throttle_detector.py
new file mode 100644
index 0000000..dc999ed
--- /dev/null
+++ b/crawler/rec/throttle_detector.py
@@ -0,0 +1,232 @@
+"""Throttling detection and metrics for Rightmove API."""
+from __future__ import annotations
+
+import time
+from dataclasses import dataclass, field
+from typing import Any
+
+import aiohttp
+
+from rec.exceptions import (
+    InvalidResponseError,
+    IPBlockedError,
+    RateLimitError,
+    ServiceUnavailableError,
+    SlowResponseError,
+    UnexpectedEmptyResponseError,
+)
+
+
+@dataclass
+class ThrottleMetrics:
+    """Tracks throttling events and metrics.
+
+    Attributes:
+        rate_limit_count: Number of HTTP 429 errors.
+        service_unavailable_count: Number of HTTP 503 errors.
+        ip_blocked_count: Number of HTTP 403 errors.
+        slow_response_count: Number of slow responses.
+        empty_response_count: Number of unexpected empty responses.
+        invalid_response_count: Number of invalid/error responses.
+        total_requests: Total number of requests made.
+        total_response_time: Cumulative response time in seconds.
+    """
+
+    rate_limit_count: int = 0
+    service_unavailable_count: int = 0
+    ip_blocked_count: int = 0
+    slow_response_count: int = 0
+    empty_response_count: int = 0
+    invalid_response_count: int = 0
+    total_requests: int = 0
+    total_response_time: float = 0.0
+    _start_time: float = field(default_factory=time.time)
+
+    def record_rate_limit(self) -> None:
+        """Record a rate limit error (HTTP 429)."""
+        self.rate_limit_count += 1
+
+    def record_service_unavailable(self) -> None:
+        """Record a service unavailable error (HTTP 503)."""
+        self.service_unavailable_count += 1
+
+    def record_ip_blocked(self) -> None:
+        """Record an IP blocked error (HTTP 403)."""
+        self.ip_blocked_count += 1
+
+    def record_slow_response(self, response_time: float) -> None:
+        """Record a slow response.
+
+        Args:
+            response_time: Response time in seconds.
+        """
+        self.slow_response_count += 1
+        self.total_response_time += response_time
+        self.total_requests += 1
+
+    def record_empty_response(self) -> None:
+        """Record an unexpected empty response."""
+        self.empty_response_count += 1
+
+    def record_invalid_response(self) -> None:
+        """Record an invalid or error response."""
+        self.invalid_response_count += 1
+
+    def record_request(self, response_time: float) -> None:
+        """Record a successful request.
+
+        Args:
+            response_time: Response time in seconds.
+        """
+        self.total_requests += 1
+        self.total_response_time += response_time
+
+    @property
+    def average_response_time(self) -> float:
+        """Calculate average response time in seconds."""
+        if self.total_requests == 0:
+            return 0.0
+        return self.total_response_time / self.total_requests
+
+    @property
+    def total_throttling_events(self) -> int:
+        """Total number of throttling events."""
+        return (
+            self.rate_limit_count
+            + self.service_unavailable_count
+            + self.ip_blocked_count
+            + self.slow_response_count
+        )
+
+    @property
+    def throttle_rate(self) -> float:
+        """Percentage of requests that were throttled."""
+        if self.total_requests == 0:
+            return 0.0
+        return (self.total_throttling_events / self.total_requests) * 100
+
+    @property
+    def elapsed_time(self) -> float:
+        """Time elapsed since metrics started tracking."""
+        return time.time() - self._start_time
+
+    def summary(self) -> str:
+        """Generate a summary of throttling metrics."""
+        return (
+            f"Throttle Metrics Summary:\n"
+            f"  Total Requests: {self.total_requests}\n"
+            f"  Total Throttling Events: {self.total_throttling_events}\n"
+            f"  Throttle Rate: {self.throttle_rate:.2f}%\n"
+            f"  Rate Limit (429): {self.rate_limit_count}\n"
+            f"  Service Unavailable (503): {self.service_unavailable_count}\n"
+            f"  IP Blocked (403): {self.ip_blocked_count}\n"
+            f"  Slow Responses: {self.slow_response_count}\n"
+            f"  Empty Responses: {self.empty_response_count}\n"
+            f"  Invalid Responses: {self.invalid_response_count}\n"
+            f"  Average Response Time: {self.average_response_time:.2f}s\n"
+            f"  Elapsed Time: {self.elapsed_time:.2f}s"
+        )
+
+
+# Global metrics instance
+_global_metrics: ThrottleMetrics | None = None
+
+
+def get_throttle_metrics() -> ThrottleMetrics:
+    """Get the global throttle metrics instance.
+
+    Returns:
+        Global ThrottleMetrics instance.
+    """
+    global _global_metrics
+    if _global_metrics is None:
+        _global_metrics = ThrottleMetrics()
+    return _global_metrics
+
+
+def reset_throttle_metrics() -> None:
+    """Reset the global throttle metrics."""
+    global _global_metrics
+    _global_metrics = ThrottleMetrics()
+
+
+def validate_response(
+    response: aiohttp.ClientResponse,
+    response_time: float,
+    response_body: dict[str, Any] | None,
+    slow_response_threshold: float,
+    expect_data: bool = True,
+) -> None:
+    """Validate an API response and raise appropriate exceptions for throttling.
+
+    Args:
+        response: The aiohttp response object.
+        response_time: Time taken for the request in seconds.
+        response_body: Parsed JSON response body (if available).
+        slow_response_threshold: Threshold in seconds for slow responses.
+        expect_data: Whether we expect data in the response.
+
+    Raises:
+        RateLimitError: If HTTP 429 is returned.
+        ServiceUnavailableError: If HTTP 503 is returned.
+        IPBlockedError: If HTTP 403 is returned.
+        SlowResponseError: If response time exceeds threshold.
+        UnexpectedEmptyResponseError: If response is empty when data is expected.
+        InvalidResponseError: If response contains error messages.
+    """
+    metrics = get_throttle_metrics()
+
+    # Check HTTP status codes
+    if response.status == 429:
+        metrics.record_rate_limit()
+        raise RateLimitError(
+            f"Rate limit exceeded (HTTP 429). "
+            f"Response time: {response_time:.2f}s"
+        )
+
+    if response.status == 503:
+        metrics.record_service_unavailable()
+        raise ServiceUnavailableError(
+            f"Service unavailable (HTTP 503). "
+            f"Response time: {response_time:.2f}s"
+        )
+
+    if response.status == 403:
+        metrics.record_ip_blocked()
+        raise IPBlockedError(
+            f"Access forbidden, possible IP block (HTTP 403). "
+            f"Response time: {response_time:.2f}s"
+        )
+
+    # Check response time
+    if response_time > slow_response_threshold:
+        metrics.record_slow_response(response_time)
+        raise SlowResponseError(
+            f"Slow response detected: {response_time:.2f}s "
+            f"(threshold: {slow_response_threshold}s)"
+        )
+
+    # Check response body if available
+    if response_body is not None:
+        # Check for error messages
+        if "error" in response_body or "GENERIC_ERROR" in str(response_body):
+            metrics.record_invalid_response()
+            raise InvalidResponseError(
+                f"Error in response body: {response_body}"
+            )
+
+        # Check for unexpected empty responses
+        if expect_data:
+            properties = response_body.get("properties", [])
+            total_results = response_body.get("totalAvailableResults", 0)
+
+            # If we expect data but got none (and total shows there should be some)
+            if total_results > 0 and len(properties) == 0:
+                metrics.record_empty_response()
+                raise UnexpectedEmptyResponseError(
+                    f"Expected data but got empty response. "
+                    f"Total available: {total_results}"
+                )
+
+    # Record successful request
+    metrics.record_request(response_time)
diff --git a/crawler/services/listing_fetcher.py b/crawler/services/listing_fetcher.py
index a94f3e0..2674c9d 100644
--- a/crawler/services/listing_fetcher.py
+++ b/crawler/services/listing_fetcher.py
@@ -6,6 +6,8 @@ from typing import Any
 from config.scraper_config import ScraperConfig
 from listing_processor import ListingProcessor
 from rec.query import create_session, listing_query
+from rec.exceptions import CircuitBreakerOpenError, ThrottlingError
+from rec.throttle_detector import get_throttle_metrics, reset_throttle_metrics
 from models.listing import QueryParameters
 from repositories import ListingRepository
 from tqdm.asyncio import tqdm
@@ -40,76 +42,98 @@ async def dump_listings(
     config = ScraperConfig.from_env()
     splitter = QuerySplitter(config)
 
-    async with create_session(config) as session:
-        # Phase 1 & 2: Split and probe queries
-        logger.info("Splitting query and probing result counts...")
-        subqueries = await splitter.split(parameters, session)
+    # Reset throttle metrics at start
+    reset_throttle_metrics()
 
-        total_estimated = splitter.calculate_total_estimated_results(subqueries)
-        logger.info(
-            f"Split into {len(subqueries)} subqueries, "
-            f"estimated {total_estimated} total results"
-        )
+    try:
+        async with create_session(config) as session:
+            # Phase 1 & 2: Split and probe queries
+            logger.info("Splitting query and probing result counts...")
+            subqueries = await splitter.split(parameters, session)
 
-        # Phase 3: Fetch all pages for each subquery
-        semaphore = asyncio.Semaphore(config.max_concurrent_requests)
-
-        async def fetch_subquery(sq: SubQuery) -> list[dict[str, Any]]:
-            """Fetch all pages for a single subquery."""
-            results: list[dict[str, Any]] = []
-
-            estimated = sq.estimated_results or 0
-            if estimated == 0:
-                return results
-
-            page_size = parameters.page_size
-            max_pages = min(
-                config.max_pages_per_query,
-                (estimated // page_size) + 1,
+            total_estimated = splitter.calculate_total_estimated_results(subqueries)
+            logger.info(
+                f"Split into {len(subqueries)} subqueries, "
+                f"estimated {total_estimated} total results"
             )
 
-            for page_id in range(1, max_pages + 1):
-                async with semaphore:
-                    await asyncio.sleep(config.request_delay_ms / 1000)
-                    try:
-                        result = await listing_query(
-                            page=page_id,
-                            channel=parameters.listing_type,
-                            min_bedrooms=sq.min_bedrooms,
-                            max_bedrooms=sq.max_bedrooms,
-                            radius=parameters.radius,
-                            min_price=sq.min_price,
-                            max_price=sq.max_price,
-                            district=sq.district,
-                            page_size=page_size,
-                            max_days_since_added=parameters.max_days_since_added,
-                            furnish_types=parameters.furnish_types or [],
-                            session=session,
-                        )
-                        results.append(result)
+            # Phase 3: Fetch all pages for each subquery
+            semaphore = asyncio.Semaphore(config.max_concurrent_requests)
 
-                        properties = result.get("properties", [])
-                        if len(properties) < page_size:
+            async def fetch_subquery(sq: SubQuery) -> list[dict[str, Any]]:
+                """Fetch all pages for a single subquery."""
+                results: list[dict[str, Any]] = []
+
+                estimated = sq.estimated_results or 0
+                if estimated == 0:
+                    return results
+
+                page_size = parameters.page_size
+                max_pages = min(
+                    config.max_pages_per_query,
+                    (estimated // page_size) + 1,
+                )
+
+                for page_id in range(1, max_pages + 1):
+                    async with semaphore:
+                        await asyncio.sleep(config.request_delay_ms / 1000)
+                        try:
+                            result = await listing_query(
+                                page=page_id,
+                                channel=parameters.listing_type,
+                                min_bedrooms=sq.min_bedrooms,
+                                max_bedrooms=sq.max_bedrooms,
+                                radius=parameters.radius,
+                                min_price=sq.min_price,
+                                max_price=sq.max_price,
+                                district=sq.district,
+                                page_size=page_size,
+                                max_days_since_added=parameters.max_days_since_added,
+                                furnish_types=parameters.furnish_types or [],
+                                session=session,
+                                config=config,
+                            )
+                            results.append(result)
+
+                            properties = result.get("properties", [])
+                            if len(properties) < page_size:
+                                break
+
+                        except CircuitBreakerOpenError as e:
+                            logger.error(f"Circuit breaker open: {e}")
                             break
-
-                    except Exception as e:
-                        if "GENERIC_ERROR" in str(e):
-                            logger.debug(
-                                f"Max page for {sq.district}: {page_id - 1}"
+                        except ThrottlingError as e:
+                            logger.warning(
+                                f"Throttling error on page {page_id} for {sq.district}: {e}"
+                            )
+                            break
+                        except Exception as e:
+                            if "GENERIC_ERROR" in str(e):
+                                logger.debug(
+                                    f"Max page for {sq.district}: {page_id - 1}"
+                                )
+                                break
+                            logger.warning(
+                                f"Error fetching page {page_id} for {sq.district}: {e}"
                             )
                             break
-                        logger.warning(
-                            f"Error fetching page {page_id} for {sq.district}: {e}"
-                        )
-                        break
 
-            return results
+                return results
 
-        # Fetch all subqueries with progress bar
-        all_results = await tqdm.gather(
-            *[fetch_subquery(sq) for sq in subqueries],
-            desc="Fetching listings",
-        )
+            # Fetch all subqueries with progress bar
+            all_results = await tqdm.gather(
+                *[fetch_subquery(sq) for sq in subqueries],
+                desc="Fetching listings",
+            )
+    except CircuitBreakerOpenError as e:
+        logger.error(f"Circuit breaker prevented listing fetch: {e}")
+        logger.info(get_throttle_metrics().summary())
+        return []
+    finally:
+        # Log throttle metrics at end
+        metrics = get_throttle_metrics()
+        if metrics.total_requests > 0:
+            logger.info("\n" + metrics.summary())
 
     # Extract listing identifiers from results
     listing_ids: list[int] = []
diff --git a/crawler/services/query_splitter.py b/crawler/services/query_splitter.py
index 0609634..b183ac2 100644
--- a/crawler/services/query_splitter.py
+++ b/crawler/services/query_splitter.py
@@ -16,6 +16,7 @@ import aiohttp
 from config.scraper_config import ScraperConfig
 from models.listing import ListingType, QueryParameters
 from rec.districts import get_districts
+from rec.exceptions import CircuitBreakerOpenError, ThrottlingError
 
 logger = logging.getLogger("uvicorn.error")
 
@@ -113,6 +114,9 @@ class QuerySplitter:
 
         Returns:
             Total available results for this subquery.
+
+        Raises:
+            CircuitBreakerOpenError: If the circuit breaker is open.
         """
         from rec.query import probe_query
 
@@ -128,8 +132,17 @@ class QuerySplitter:
                 district=subquery.district,
                 max_days_since_added=parameters.max_days_since_added,
                 furnish_types=parameters.furnish_types or [],
+                config=self.config,
             )
             return result.get("totalAvailableResults", 0)
+        except CircuitBreakerOpenError:
+            logger.error("Circuit breaker is open, stopping probe operations")
+            raise
+        except ThrottlingError as e:
+            logger.warning(
+                f"Throttling detected during probe for {subquery.district}: {e}"
+            )
+            return 0
         except Exception as e:
             logger.warning(f"Failed to probe subquery {subquery}: {e}")
             return 0
diff --git a/crawler/tests/integration/test_throttle_integration.py b/crawler/tests/integration/test_throttle_integration.py
new file mode 100644
index 0000000..39be15b
--- /dev/null
+++ b/crawler/tests/integration/test_throttle_integration.py
@@ -0,0 +1,311 @@
+"""Integration tests for throttle detection and circuit breaker."""
+import asyncio
+import pytest
+from unittest.mock import AsyncMock, MagicMock, patch
+from aiohttp import ClientResponse
+
+from config.scraper_config import ScraperConfig
+from rec.exceptions import (
+    CircuitBreakerOpenError,
+    RateLimitError,
+    ServiceUnavailableError,
+    ThrottlingError,
+)
+from rec.query import (
+    detail_query,
+    listing_query,
+    probe_query,
+    get_circuit_breaker,
+    reset_circuit_breaker,
+)
+from rec.throttle_detector import reset_throttle_metrics, get_throttle_metrics
+from rec.circuit_breaker import CircuitBreaker, CircuitState
+from models.listing import ListingType
+
+
+@pytest.fixture
+def config() -> ScraperConfig:
+    """Create a test configuration."""
+    return ScraperConfig(
+        max_concurrent_requests=5,
+        request_delay_ms=10,
+        slow_response_threshold=2.0,
+        enable_circuit_breaker=True,
+        circuit_breaker_failure_threshold=3,
+        circuit_breaker_recovery_timeout=0.5,
+    )
+
+
+@pytest.fixture(autouse=True)
+def reset_globals() -> None:
+    """Reset global state before each test."""
+    reset_throttle_metrics()
+    reset_circuit_breaker()
+
+
+class MockResponse:
+    """Mock aiohttp response."""
+
+    def __init__(
+        self,
+        status: int = 200,
+        json_data: dict | None = None,
+        text: str = "",
+    ):
+        self.status = status
+        self._json_data = json_data or {}
+        self._text = text
+
+    async def json(self) -> dict:
+        return self._json_data
+
+    async def text(self) -> str:
+        return self._text
+
+    async def __aenter__(self) -> "MockResponse":
+        return self
+
+    async def __aexit__(self, *args: object) -> None:
+        pass
+
+
+class TestThrottlingRetryBehavior:
+    """Test retry behavior for throttling errors."""
+
+    @pytest.mark.asyncio
+    async def test_rate_limit_triggers_retry(self, config: ScraperConfig) -> None:
+        """Test that 429 responses trigger retry with backoff."""
+        call_count = 0
+
+        async def mock_get(*args: object, **kwargs: object) -> MockResponse:
+            nonlocal call_count
+            call_count += 1
+            if call_count < 3:
+                return MockResponse(status=429)
+            return MockResponse(
+                status=200,
+                json_data={"totalAvailableResults": 10, "properties": []},
+            )
+
+        mock_session = MagicMock()
+        mock_session.get = mock_get
+
+        # Mock district lookup
+        with patch("rec.query.districts.get_districts", return_value={"Test": "LOC1"}):
+            # The retry decorator will catch RateLimitError and retry
+            # We need to patch the tenacity wait to speed up the test
+            with patch("tenacity.wait_exponential.__call__", return_value=0):
+                result = await probe_query(
+                    session=mock_session,
+                    channel=ListingType.RENT,
+                    min_bedrooms=1,
+                    max_bedrooms=2,
+                    radius=1.0,
+                    min_price=1000,
+                    max_price=2000,
+                    district="Test",
+                    config=config,
+                )
+
+        assert result["totalAvailableResults"] == 10
+        assert call_count == 3
+
+    @pytest.mark.asyncio
+    async def test_service_unavailable_triggers_retry(
+        self, config: ScraperConfig
+    ) -> None:
+        """Test that 503 responses trigger retry."""
+        call_count = 0
+
+        async def mock_get(*args: object, **kwargs: object) -> MockResponse:
+            nonlocal call_count
+            call_count += 1
+            if call_count < 2:
+                return MockResponse(status=503)
+            return MockResponse(
+                status=200,
+                json_data={"totalAvailableResults": 5, "properties": []},
+            )
+
+        mock_session = MagicMock()
+        mock_session.get = mock_get
+
+        with patch("rec.query.districts.get_districts", return_value={"Test": "LOC1"}):
+            with patch("tenacity.wait_exponential.__call__", return_value=0):
+                result = await probe_query(
+                    session=mock_session,
+                    channel=ListingType.RENT,
+                    min_bedrooms=1,
+                    max_bedrooms=2,
+                    radius=1.0,
+                    min_price=1000,
+                    max_price=2000,
+                    district="Test",
+                    config=config,
+                )
+
+        assert call_count == 2
+
+
+class TestCircuitBreakerIntegration:
+    """Test circuit breaker integration with queries."""
+
+    @pytest.mark.asyncio
+    async def test_circuit_breaker_opens_after_failures(
+        self, config: ScraperConfig
+    ) -> None:
+        """Test that circuit breaker opens after consecutive failures."""
+        call_count = 0
+
+        async def mock_get(*args: object, **kwargs: object) -> MockResponse:
+            nonlocal call_count
+            call_count += 1
+            return MockResponse(status=429)
+
+        mock_session = MagicMock()
+        mock_session.get = mock_get
+
+        with patch("rec.query.districts.get_districts", return_value={"Test": "LOC1"}):
+            # After enough failures, circuit should open
+            with pytest.raises((RateLimitError, CircuitBreakerOpenError)):
+                with patch("tenacity.wait_exponential.__call__", return_value=0):
+                    await probe_query(
+                        session=mock_session,
+                        channel=ListingType.RENT,
+                        min_bedrooms=1,
+                        max_bedrooms=2,
+                        radius=1.0,
+                        min_price=1000,
+                        max_price=2000,
+                        district="Test",
+                        config=config,
+                    )
+
+        # Check circuit breaker state
+        cb = get_circuit_breaker(config)
+        assert cb is not None
+        # After many failures, the circuit should be open
+        assert cb.failure_count >= config.circuit_breaker_failure_threshold
+
+    @pytest.mark.asyncio
+    async def test_circuit_breaker_blocks_requests_when_open(
+        self, config: ScraperConfig
+    ) -> None:
+        """Test that open circuit breaker blocks requests immediately."""
+        # Force open the circuit breaker
+        cb = get_circuit_breaker(config)
+        assert cb is not None
+        for _ in range(config.circuit_breaker_failure_threshold):
+            cb.record_failure()
+
+        assert cb.is_open
+
+        mock_session = MagicMock()
+
+        with patch("rec.query.districts.get_districts", return_value={"Test": "LOC1"}):
+            with pytest.raises(CircuitBreakerOpenError):
+                await probe_query(
+                    session=mock_session,
+                    channel=ListingType.RENT,
+                    min_bedrooms=1,
+                    max_bedrooms=2,
+                    radius=1.0,
+                    min_price=1000,
+                    max_price=2000,
+                    district="Test",
+                    config=config,
+                )
+
+
+class TestMetricsTracking:
+    """Test throttle metrics are properly tracked."""
+
+    @pytest.mark.asyncio
+    async def test_metrics_tracked_on_rate_limit(self, config: ScraperConfig) -> None:
+        """Test that rate limit errors are tracked in metrics."""
+        async def mock_get(*args: object, **kwargs: object) -> MockResponse:
+            return MockResponse(status=429)
+
+        mock_session = MagicMock()
+        mock_session.get = mock_get
+
+        with patch("rec.query.districts.get_districts", return_value={"Test": "LOC1"}):
+            with pytest.raises(RateLimitError):
+                with patch("tenacity.wait_exponential.__call__", return_value=0):
+                    await probe_query(
+                        session=mock_session,
+                        channel=ListingType.RENT,
+                        min_bedrooms=1,
+                        max_bedrooms=2,
+                        radius=1.0,
+                        min_price=1000,
+                        max_price=2000,
+                        district="Test",
+                        config=config,
+                    )
+
+        metrics = get_throttle_metrics()
+        assert metrics.rate_limit_count > 0
+
+    @pytest.mark.asyncio
+    async def test_metrics_tracked_on_success(self, config: ScraperConfig) -> None:
+        """Test that successful requests are tracked in metrics."""
+        async def mock_get(*args: object, **kwargs: object) -> MockResponse:
+            return MockResponse(
+                status=200,
+                json_data={"totalAvailableResults": 10, "properties": []},
+            )
+
+        mock_session = MagicMock()
+        mock_session.get = mock_get
+
+        with patch("rec.query.districts.get_districts", return_value={"Test": "LOC1"}):
+            await probe_query(
+                session=mock_session,
+                channel=ListingType.RENT,
+                min_bedrooms=1,
+                max_bedrooms=2,
+                radius=1.0,
+                min_price=1000,
+                max_price=2000,
+                district="Test",
+                config=config,
+            )
+
+        metrics = get_throttle_metrics()
+        assert metrics.total_requests == 1
+        assert metrics.total_throttling_events == 0
+
+
+class TestConfigIntegration:
+    """Test configuration integration."""
+
+    def test_config_from_env_includes_throttle_settings(self) -> None:
+        """Test that config loads throttle settings from environment."""
+        import os
+
+        original_env = os.environ.copy()
+        try:
+            os.environ["RIGHTMOVE_SLOW_RESPONSE_THRESHOLD"] = "5.0"
+            os.environ["RIGHTMOVE_ENABLE_CIRCUIT_BREAKER"] = "false"
+            os.environ["RIGHTMOVE_CIRCUIT_BREAKER_FAILURES"] = "10"
+            os.environ["RIGHTMOVE_CIRCUIT_BREAKER_TIMEOUT"] = "120.0"
+
+            config = ScraperConfig.from_env()
+
+            assert config.slow_response_threshold == 5.0
+            assert config.enable_circuit_breaker is False
+            assert config.circuit_breaker_failure_threshold == 10
+            assert config.circuit_breaker_recovery_timeout == 120.0
+        finally:
+            os.environ.clear()
+            os.environ.update(original_env)
+
+    def test_circuit_breaker_disabled_returns_none(self) -> None:
+        """Test that disabled circuit breaker returns None."""
+        config = ScraperConfig(
+            enable_circuit_breaker=False,
+        )
+        reset_circuit_breaker()
+        cb = get_circuit_breaker(config)
+        assert cb is None
diff --git a/crawler/tests/unit/test_throttle_detection.py b/crawler/tests/unit/test_throttle_detection.py
new file mode 100644
index 0000000..2e786e9
--- /dev/null
+++ b/crawler/tests/unit/test_throttle_detection.py
@@ -0,0 +1,334 @@
+"""Unit tests for throttle detection and circuit breaker."""
+import pytest
+from unittest.mock import MagicMock, AsyncMock
+import time
+
+from rec.exceptions import (
+    RightmoveAPIError,
+    ThrottlingError,
+    RateLimitError,
+    ServiceUnavailableError,
+    IPBlockedError,
+    SlowResponseError,
+    UnexpectedEmptyResponseError,
+    InvalidResponseError,
+    CircuitBreakerOpenError,
+)
+from rec.throttle_detector import (
+    ThrottleMetrics,
+    validate_response,
+    get_throttle_metrics,
+    reset_throttle_metrics,
+)
+from rec.circuit_breaker import CircuitBreaker, CircuitState
+
+
+class TestExceptionHierarchy:
+    """Test custom exception hierarchy."""
+
+    def test_rightmove_api_error_is_exception(self) -> None:
+        assert issubclass(RightmoveAPIError, Exception)
+
+    def test_throttling_error_is_rightmove_api_error(self) -> None:
+        assert issubclass(ThrottlingError, RightmoveAPIError)
+
+    def test_rate_limit_error_is_throttling_error(self) -> None:
+        assert issubclass(RateLimitError, ThrottlingError)
+
+    def test_service_unavailable_error_is_throttling_error(self) -> None:
+        assert issubclass(ServiceUnavailableError, ThrottlingError)
+
+    def test_ip_blocked_error_is_throttling_error(self) -> None:
+        assert issubclass(IPBlockedError, ThrottlingError)
+
+    def test_slow_response_error_is_throttling_error(self) -> None:
+        assert issubclass(SlowResponseError, ThrottlingError)
+
+    def test_unexpected_empty_response_error_is_rightmove_api_error(self) -> None:
+        assert issubclass(UnexpectedEmptyResponseError, RightmoveAPIError)
+        assert not issubclass(UnexpectedEmptyResponseError, ThrottlingError)
+
+    def test_invalid_response_error_is_rightmove_api_error(self) -> None:
+        assert issubclass(InvalidResponseError, RightmoveAPIError)
+        assert not issubclass(InvalidResponseError, ThrottlingError)
+
+    def test_circuit_breaker_open_error_is_rightmove_api_error(self) -> None:
+        assert issubclass(CircuitBreakerOpenError, RightmoveAPIError)
+
+    def test_exception_messages(self) -> None:
+        error = RateLimitError("Too many requests")
+        assert str(error) == "Too many requests"
+
+
+class TestThrottleMetrics:
+    """Test ThrottleMetrics class."""
+
+    def test_initial_state(self) -> None:
+        metrics = ThrottleMetrics()
+        assert metrics.rate_limit_count == 0
+        assert metrics.service_unavailable_count == 0
+        assert metrics.ip_blocked_count == 0
+        assert metrics.slow_response_count == 0
+        assert metrics.empty_response_count == 0
+        assert metrics.invalid_response_count == 0
+        assert metrics.total_requests == 0
+        assert metrics.total_response_time == 0.0
+
+    def test_record_rate_limit(self) -> None:
+        metrics = ThrottleMetrics()
+        metrics.record_rate_limit()
+        assert metrics.rate_limit_count == 1
+        metrics.record_rate_limit()
+        assert metrics.rate_limit_count == 2
+
+    def test_record_service_unavailable(self) -> None:
+        metrics = ThrottleMetrics()
+        metrics.record_service_unavailable()
+        assert metrics.service_unavailable_count == 1
+
+    def test_record_ip_blocked(self) -> None:
+        metrics = ThrottleMetrics()
+        metrics.record_ip_blocked()
+        assert metrics.ip_blocked_count == 1
+
+    def test_record_slow_response(self) -> None:
+        metrics = ThrottleMetrics()
+        metrics.record_slow_response(15.0)
+        assert metrics.slow_response_count == 1
+        assert metrics.total_response_time == 15.0
+        assert metrics.total_requests == 1
+
+    def test_record_empty_response(self) -> None:
+        metrics = ThrottleMetrics()
+        metrics.record_empty_response()
+        assert metrics.empty_response_count == 1
+
+    def test_record_invalid_response(self) -> None:
+        metrics = ThrottleMetrics()
+        metrics.record_invalid_response()
+        assert metrics.invalid_response_count == 1
+
+    def test_record_request(self) -> None:
+        metrics = ThrottleMetrics()
+        metrics.record_request(0.5)
+        assert metrics.total_requests == 1
+        assert metrics.total_response_time == 0.5
+
+    def test_average_response_time(self) -> None:
+        metrics = ThrottleMetrics()
+        metrics.record_request(1.0)
+        metrics.record_request(2.0)
+        metrics.record_request(3.0)
+        assert metrics.average_response_time == 2.0
+
+    def test_average_response_time_zero_requests(self) -> None:
+        metrics = ThrottleMetrics()
+        assert metrics.average_response_time == 0.0
+
+    def test_total_throttling_events(self) -> None:
+        metrics = ThrottleMetrics()
+        metrics.record_rate_limit()
+        metrics.record_service_unavailable()
+        metrics.record_ip_blocked()
+        metrics.record_slow_response(15.0)
+        assert metrics.total_throttling_events == 4
+
+    def test_throttle_rate(self) -> None:
+        metrics = ThrottleMetrics()
+        metrics.record_request(0.5)  # 1 normal request
+        metrics.record_request(0.5)  # 2 normal requests
+        metrics.record_rate_limit()
+        metrics.record_request(0.5)  # 3 normal requests (rate limit doesn't count as request)
+        # 1 throttling event, 3 requests = 33.33%
+        assert metrics.throttle_rate == pytest.approx(33.33, rel=0.01)
+
+    def test_throttle_rate_zero_requests(self) -> None:
+        metrics = ThrottleMetrics()
+        assert metrics.throttle_rate == 0.0
+
+    def test_elapsed_time(self) -> None:
+        metrics = ThrottleMetrics()
+        time.sleep(0.1)
+        assert metrics.elapsed_time >= 0.1
+
+    def test_summary(self) -> None:
+        metrics = ThrottleMetrics()
+        metrics.record_request(1.0)
+        metrics.record_rate_limit()
+        summary = metrics.summary()
+        assert "Total Requests:" in summary
+        assert "Rate Limit (429):" in summary
+        assert "1" in summary
+
+
+class TestGlobalMetrics:
+    """Test global metrics accessor."""
+
+    def test_get_throttle_metrics_singleton(self) -> None:
+        reset_throttle_metrics()
+        m1 = get_throttle_metrics()
+        m2 = get_throttle_metrics()
+        assert m1 is m2
+
+    def test_reset_throttle_metrics(self) -> None:
+        reset_throttle_metrics()
+        metrics = get_throttle_metrics()
+        metrics.record_rate_limit()
+        assert metrics.rate_limit_count == 1
+        reset_throttle_metrics()
+        new_metrics = get_throttle_metrics()
+        assert new_metrics.rate_limit_count == 0
+
+
+class TestValidateResponse:
+    """Test validate_response function."""
+
+    def setup_method(self) -> None:
+        reset_throttle_metrics()
+
+    def create_mock_response(self, status: int) -> MagicMock:
+        response = MagicMock()
+        response.status = status
+        return response
+
+    def test_rate_limit_error(self) -> None:
+        response = self.create_mock_response(429)
+        with pytest.raises(RateLimitError):
+            validate_response(response, 0.5, None, 10.0)
+        assert get_throttle_metrics().rate_limit_count == 1
+
+    def test_service_unavailable_error(self) -> None:
+        response = self.create_mock_response(503)
+        with pytest.raises(ServiceUnavailableError):
+            validate_response(response, 0.5, None, 10.0)
+        assert get_throttle_metrics().service_unavailable_count == 1
+
+    def test_ip_blocked_error(self) -> None:
+        response = self.create_mock_response(403)
+        with pytest.raises(IPBlockedError):
+            validate_response(response, 0.5, None, 10.0)
+        assert get_throttle_metrics().ip_blocked_count == 1
+
+    def test_slow_response_error(self) -> None:
+        response = self.create_mock_response(200)
+        body = {"totalAvailableResults": 0, "properties": []}
+        with pytest.raises(SlowResponseError):
+            validate_response(response, 15.0, body, 10.0)
+        assert get_throttle_metrics().slow_response_count == 1
+
+    def test_slow_response_just_under_threshold(self) -> None:
+        response = self.create_mock_response(200)
+        body = {"totalAvailableResults": 0, "properties": []}
+        # Should not raise
+        validate_response(response, 9.9, body, 10.0)
+        assert get_throttle_metrics().slow_response_count == 0
+
+    def test_error_in_response_body(self) -> None:
+        response = self.create_mock_response(200)
+        body = {"error": "Something went wrong"}
+        with pytest.raises(InvalidResponseError):
+            validate_response(response, 0.5, body, 10.0)
+        assert get_throttle_metrics().invalid_response_count == 1
+
+    def test_generic_error_in_body(self) -> None:
+        response = self.create_mock_response(200)
+        body = {"message": "GENERIC_ERROR occurred"}
+        with pytest.raises(InvalidResponseError):
+            validate_response(response, 0.5, body, 10.0)
+
+    def test_unexpected_empty_response(self) -> None:
+        response = self.create_mock_response(200)
+        body = {"totalAvailableResults": 100, "properties": []}
+        with pytest.raises(UnexpectedEmptyResponseError):
+            validate_response(response, 0.5, body, 10.0, expect_data=True)
+        assert get_throttle_metrics().empty_response_count == 1
+
+    def test_empty_response_when_not_expecting_data(self) -> None:
+        response = self.create_mock_response(200)
+        body = {"totalAvailableResults": 100, "properties": []}
+        # Should not raise when expect_data=False
+        validate_response(response, 0.5, body, 10.0, expect_data=False)
+        assert get_throttle_metrics().empty_response_count == 0
+
+    def test_valid_response(self) -> None:
+        response = self.create_mock_response(200)
+        body = {
+            "totalAvailableResults": 10,
+            "properties": [{"id": 1}, {"id": 2}],
+        }
+        validate_response(response, 0.5, body, 10.0, expect_data=True)
+        assert get_throttle_metrics().total_requests == 1
+        assert get_throttle_metrics().total_throttling_events == 0
+
+
+class TestCircuitBreaker:
+    """Test CircuitBreaker class."""
+
+    def test_initial_state_is_closed(self) -> None:
+        cb = CircuitBreaker(failure_threshold=3, recovery_timeout=10.0)
+        assert cb.state == CircuitState.CLOSED
+        assert cb.is_closed
+        assert not cb.is_open
+        assert not cb.is_half_open
+
+    def test_allows_requests_when_closed(self) -> None:
+        cb = CircuitBreaker(failure_threshold=3, recovery_timeout=10.0)
+        # Should not raise
+        cb.call()
+
+    def test_opens_after_threshold_failures(self) -> None:
+        cb = CircuitBreaker(failure_threshold=3, recovery_timeout=10.0)
+        cb.record_failure()
+        cb.record_failure()
+        assert cb.is_closed
+        cb.record_failure()
+        assert cb.is_open
+
+    def test_blocks_requests_when_open(self) -> None:
+        cb = CircuitBreaker(failure_threshold=1, recovery_timeout=60.0)
+        cb.record_failure()
+        assert cb.is_open
+        with pytest.raises(CircuitBreakerOpenError):
+            cb.call()
+
+    def test_success_resets_failure_count(self) -> None:
+        cb = CircuitBreaker(failure_threshold=3, recovery_timeout=10.0)
+        cb.record_failure()
+        cb.record_failure()
+        assert cb.failure_count == 2
+        cb.record_success()
+        assert cb.failure_count == 0
+
+    def test_transitions_to_half_open_after_timeout(self) -> None:
+        cb = CircuitBreaker(failure_threshold=1, recovery_timeout=0.1)
+        cb.record_failure()
+        assert cb.is_open
+        time.sleep(0.15)
+        cb.call()  # Should transition to half-open
+        assert cb.is_half_open
+
+    def test_half_open_success_closes_circuit(self) -> None:
+        cb = CircuitBreaker(failure_threshold=1, recovery_timeout=0.1)
+        cb.record_failure()
+        time.sleep(0.15)
+        cb.call()  # Transition to half-open
+        assert cb.is_half_open
+        cb.record_success()
+        assert cb.is_closed
+
+    def test_half_open_failure_reopens_circuit(self) -> None:
+        cb = CircuitBreaker(failure_threshold=1, recovery_timeout=0.1)
+        cb.record_failure()
+        time.sleep(0.15)
+        cb.call()  # Transition to half-open
+        assert cb.is_half_open
+        cb.record_failure()
+        assert cb.is_open
+
+    def test_reset(self) -> None:
+        cb = CircuitBreaker(failure_threshold=1, recovery_timeout=60.0)
+        cb.record_failure()
+        assert cb.is_open
+        cb.reset()
+        assert cb.is_closed
+        assert cb.failure_count == 0

From c4b11ccfe955c90b238060af1b4e4883f37c789a Mon Sep 17 00:00:00 2001
From: Viktor Barzin <vbarzin@gmail.com>
Date: Mon, 2 Feb 2026 23:01:13 +0000
Subject: [PATCH 4/5] Add comprehensive logging to Celery tasks and listing
 processor

---
 crawler/listing_processor.py   |  60 +++++--
 crawler/tasks/listing_tasks.py | 316 +++++++++++++++++++++++----------
 2 files changed, 269 insertions(+), 107 deletions(-)

diff --git a/crawler/listing_processor.py b/crawler/listing_processor.py
index 020016f..e1151ca 100644
--- a/crawler/listing_processor.py
+++ b/crawler/listing_processor.py
@@ -13,6 +13,9 @@ from repositories.listing_repository import ListingRepository
 
 logger = logging.getLogger("uvicorn.error")
 
+# Also use celery task logger for visibility in worker output
+celery_logger = logging.getLogger("celery.task")
+
 
 class ListingProcessor:
     semaphore: asyncio.Semaphore
@@ -36,15 +39,16 @@ class ListingProcessor:
         for step in self.process_steps:
             if await step.needs_processing(listing_id):
                 async with self.semaphore:
+                    step_name = step.__class__.__name__
                     try:
                         listing = await step.process(listing_id)
+                        logger.debug(f"[{listing_id}] {step_name} completed")
                     except Exception as e:
-                        logger.error(f"Failed to process {listing_id=}: {e}")
+                        logger.error(f"[{listing_id}] {step_name} failed: {e}")
+                        celery_logger.error(f"[{listing_id}] {step_name} failed: {e}")
                         return None
         return listing
 
-    async def listing_exists(self, listing_id: int) -> bool: ...
-
 
 class Step:
     listing_repository: ListingRepository
@@ -65,19 +69,23 @@ class FetchListingDetailsStep(Step):
         existing_listings = await self.listing_repository.get_listings(
             only_ids=[listing_id]
         )
-        if (existing_listings) == 0:
+        if len(existing_listings) == 0:
             return True
         return False
 
     async def process(self, listing_id: int) -> Listing:
-        logger.debug(f"Fetching details for {listing_id=}")
+        logger.debug(f"[{listing_id}] Fetching property details from API")
+        celery_logger.info(f"[{listing_id}] Fetching details...")
+
         existing_listings = await self.listing_repository.get_listings(
             only_ids=[listing_id]
         )
         now = datetime.now()
         if len(existing_listings) > 0:
             # listing exists, do not refresh
+            logger.debug(f"[{listing_id}] Already exists, skipping refresh")
             return existing_listings[0]
+
         listing_details = await detail_query(listing_id)
 
         furnish_type_str = listing_details["property"].get("letFurnishType", "unknown")
@@ -124,7 +132,12 @@ class FetchListingDetailsStep(Step):
             additional_info=listing_details,
         )
         await self.listing_repository.upsert_listings([listing])
-        logger.debug(f"Completed fetching details for {listing_id=}")
+
+        celery_logger.info(
+            f"[{listing_id}] Details fetched: £{listing.price}, "
+            f"{listing.number_of_bedrooms}BR, {listing.agency}"
+        )
+        logger.debug(f"[{listing_id}] Details fetch complete")
         # TODO: dump to filesystem
         return listing
 
@@ -140,7 +153,8 @@ class FetchImagesStep(Step):
         return len(listing.floorplan_image_paths) == 0
 
     async def process(self, listing_id: int) -> Listing:
-        logger.debug(f"Fetching images for {listing_id=}")
+        logger.debug(f"[{listing_id}] Fetching floorplan images")
+
         existing_listings = await self.listing_repository.get_listings(
             only_ids=[listing_id]
         )
@@ -152,6 +166,12 @@ class FetchImagesStep(Step):
         all_floorplans = listing.additional_info.get("property", {}).get(
             "floorplans", []
         )
+
+        if len(all_floorplans) == 0:
+            logger.debug(f"[{listing_id}] No floorplans available")
+            return listing
+
+        downloaded = 0
         client_timeout = aiohttp.ClientTimeout(total=30)
         for floorplan_obj in all_floorplans:
             url = floorplan_obj["url"]
@@ -169,8 +189,12 @@ class FetchImagesStep(Step):
                     with open(floorplan_path, "wb") as f:
                         f.write(await response.read())
                     listing.floorplan_image_paths.append(str(floorplan_path))
+                    downloaded += 1
+
         await self.listing_repository.upsert_listings([listing])
-        logger.debug(f"Completed fetching images for {listing_id=}")
+
+        celery_logger.info(f"[{listing_id}] Downloaded {downloaded} floorplan images")
+        logger.debug(f"[{listing_id}] Image fetch complete")
         return listing
 
 
@@ -188,11 +212,19 @@ class DetectFloorplanStep(Step):
         return listings[0].square_meters is None
 
     async def process(self, listing_id: int) -> Listing:
-        logger.debug(f"Running floorplan detection for {listing_id=}")
+        logger.debug(f"[{listing_id}] Running OCR on floorplans")
+
         listings = await self.listing_repository.get_listings(only_ids=[listing_id])
         if len(listings) == 0:
             raise ValueError(f"Listing {listing_id} does not exist")
         listing = listings[0]
+
+        if len(listing.floorplan_image_paths) == 0:
+            logger.debug(f"[{listing_id}] No floorplan images to process")
+            listing.square_meters = 0
+            await self.listing_repository.upsert_listings([listing])
+            return listing
+
         sqms = []
         for floorplan_path in listing.floorplan_image_paths:
             async with self.ocr_semaphore:
@@ -201,9 +233,15 @@ class DetectFloorplanStep(Step):
                 )
             if estimated_sqm is not None:
                 sqms.append(estimated_sqm)
+
         max_sqm = max(sqms, default=0)  # try once, if we fail, keep as 0
-        # if max_sqm is not None:
         listing.square_meters = max_sqm
         await self.listing_repository.upsert_listings([listing])
-        logger.debug(f"Completed running floorplan detection for {listing_id=}")
+
+        if max_sqm > 0:
+            celery_logger.info(f"[{listing_id}] OCR detected {max_sqm} sqm")
+        else:
+            logger.debug(f"[{listing_id}] OCR: no square meters detected")
+
+        logger.debug(f"[{listing_id}] OCR complete")
         return listing
diff --git a/crawler/tasks/listing_tasks.py b/crawler/tasks/listing_tasks.py
index 1fb3041..713a56d 100644
--- a/crawler/tasks/listing_tasks.py
+++ b/crawler/tasks/listing_tasks.py
@@ -1,5 +1,6 @@
 import asyncio
 import logging
+import time
 from typing import Any
 from celery import Task
 from celery.schedules import crontab
@@ -9,6 +10,8 @@ from config.scraper_config import ScraperConfig
 from listing_processor import ListingProcessor
 from models.listing import Listing, QueryParameters
 from rec.query import create_session, listing_query
+from rec.exceptions import CircuitBreakerOpenError, ThrottlingError
+from rec.throttle_detector import get_throttle_metrics, reset_throttle_metrics
 from repositories.listing_repository import ListingRepository
 from database import engine
 from services.query_splitter import QuerySplitter, SubQuery
@@ -16,6 +19,16 @@ from utils.redis_lock import redis_lock
 
 logger = logging.getLogger("uvicorn.error")
 
+# Also configure a celery-specific logger that always outputs to stdout
+celery_logger = logging.getLogger("celery.task")
+if not celery_logger.handlers:
+    handler = logging.StreamHandler()
+    handler.setFormatter(logging.Formatter(
+        "%(asctime)s [%(levelname)s] %(name)s: %(message)s"
+    ))
+    celery_logger.addHandler(handler)
+    celery_logger.setLevel(logging.INFO)
+
 SCRAPE_LOCK_NAME = "scrape_listings"
 
 
@@ -23,12 +36,18 @@ SCRAPE_LOCK_NAME = "scrape_listings"
 def dump_listings_task(self: Task, parameters_json: str) -> dict[str, Any]:
     with redis_lock(SCRAPE_LOCK_NAME) as acquired:
         if not acquired:
-            logger.warning("Another scrape job is already running, skipping this execution")
+            msg = "Another scrape job is already running, skipping this execution"
+            logger.warning(msg)
+            celery_logger.warning(msg)
             self.update_state(state="SKIPPED", meta={"reason": "Another scrape job is running"})
             return {"status": "skipped", "reason": "another_job_running"}
 
+        celery_logger.info(f"Acquired lock: {SCRAPE_LOCK_NAME}")
         logger.info(f"Acquired lock: {SCRAPE_LOCK_NAME}")
+
         parsed_parameters = QueryParameters.model_validate_json(parameters_json)
+        celery_logger.info(f"Starting scrape with parameters: {parsed_parameters}")
+
         self.update_state(state="Starting...", meta={"progress": 0})
         asyncio.run(dump_listings_full(task=self, parameters=parsed_parameters))
         return {"progress": 0}
@@ -50,46 +69,91 @@ async def dump_listings_full(
     *, task: Task, parameters: QueryParameters
 ) -> list[Listing]:
     """Fetches all listings, images as well as detects floorplans"""
+    start_time = time.time()
+    celery_logger.info("=" * 60)
+    celery_logger.info("PHASE 1: Initializing listing fetch")
+    celery_logger.info("=" * 60)
+
     repository = ListingRepository(engine)
 
     task.update_state(state="Identifying missing listings", meta={"progress": 0})
+    celery_logger.info("Querying Rightmove API to identify new listings...")
     ids_to_process = await get_ids_to_process(
         parameters=parameters, repository=repository, task=task
     )
+
+    celery_logger.info(f"Found {len(ids_to_process)} new listings to process")
     logger.info(f"Found {len(ids_to_process)} listings to process")
 
     if len(ids_to_process) == 0:
+        elapsed = time.time() - start_time
+        celery_logger.info(f"No new listings found. Completed in {elapsed:.1f}s")
         task.update_state(
             state="No new listings found",
             meta={"progress": 1, "processed": 0, "total": 0, "message": "All listings are up to date"},
         )
         return []
 
+    celery_logger.info("=" * 60)
+    celery_logger.info("PHASE 2: Processing listings (fetch details, images, OCR)")
+    celery_logger.info("=" * 60)
+
     listing_processor = ListingProcessor(repository)
+    celery_logger.info(f"Starting processing {len(ids_to_process)} listings")
     logger.info(f"Starting processing {len(ids_to_process)} listings")
-    return await dump_listings_and_monitor(
+
+    result = await dump_listings_and_monitor(
         task=task, listing_processor=listing_processor, missing_ids=ids_to_process
     )
 
+    elapsed = time.time() - start_time
+    celery_logger.info("=" * 60)
+    celery_logger.info(f"COMPLETED: Processed {len(result)} listings in {elapsed:.1f}s")
+    celery_logger.info("=" * 60)
+
+    return result
+
 
 async def dump_listings_and_monitor(
     *, task: Task, listing_processor: ListingProcessor, missing_ids: set[int]
 ) -> list[Listing]:
     task_progress = {missing_id: 0 for missing_id in missing_ids}
+    processed_count = 0
+    failed_count = 0
+    start_time = time.time()
 
     async def process(missing_id: int) -> Listing | None:
+        nonlocal processed_count, failed_count
         listing = await listing_processor.process_listing(missing_id)
         task_progress[missing_id] = 1
+        if listing is not None:
+            processed_count += 1
+        else:
+            failed_count += 1
         return listing
 
     async def monitor() -> None:
+        last_progress = 0
         while (progress := sum(task_progress.values())) < len(missing_ids):
             progress_ratio = round(progress / len(missing_ids), 2)
-            logger.error(
-                f"Task progress: {progress_ratio * 100}% ({progress} out of {len(missing_ids)})"
-            )
+
+            # Log every 10% progress or at least every update
+            if progress_ratio >= last_progress + 0.1 or progress == 1:
+                elapsed = time.time() - start_time
+                rate = progress / elapsed if elapsed > 0 else 0
+                eta = (len(missing_ids) - progress) / rate if rate > 0 else 0
+
+                celery_logger.info(
+                    f"Progress: {progress_ratio * 100:.0f}% "
+                    f"({progress}/{len(missing_ids)}) "
+                    f"| Elapsed: {elapsed:.0f}s "
+                    f"| Rate: {rate:.1f}/s "
+                    f"| ETA: {eta:.0f}s"
+                )
+                last_progress = progress_ratio
+
             task.update_state(
-                state=f"Progress: {progress_ratio * 100}% ({progress} out of {len(missing_ids)})",
+                state=f"Processing: {progress_ratio * 100:.0f}% ({progress}/{len(missing_ids)})",
                 meta={"progress": progress_ratio, "processed": progress, "total": len(missing_ids)},
             )
             await asyncio.sleep(1)
@@ -97,7 +161,11 @@ async def dump_listings_and_monitor(
     processed_listings = await asyncio.gather(
         *[process(id) for id in missing_ids], *[monitor()]
     )
-    filtered_listings = [l for l in processed_listings if l is not None]
+    filtered_listings = [listing for listing in processed_listings if listing is not None]
+
+    celery_logger.info(
+        f"Processing complete: {processed_count} successful, {failed_count} failed"
+    )
 
     return filtered_listings
 
@@ -149,115 +217,171 @@ async def get_ids_to_process(
     config = ScraperConfig.from_env()
     splitter = QuerySplitter(config)
 
+    # Reset throttle metrics
+    reset_throttle_metrics()
+
     def on_progress(phase: str, message: str) -> None:
         task.update_state(state=message, meta={"phase": phase})
+        celery_logger.info(f"[{phase}] {message}")
 
-    async with create_session(config) as session:
-        # Phase 1 & 2: Split and probe queries
-        task.update_state(
-            state="Analyzing query and splitting by price bands...",
-            meta={"phase": "splitting", "progress": 0},
-        )
-        subqueries = await splitter.split(parameters, session, on_progress)
+    celery_logger.info("Starting query splitting and probing...")
 
-        total_estimated = splitter.calculate_total_estimated_results(subqueries)
-        logger.info(
-            f"Split into {len(subqueries)} subqueries, "
-            f"estimated {total_estimated} total results"
-        )
+    try:
+        async with create_session(config) as session:
+            # Phase 1 & 2: Split and probe queries
+            task.update_state(
+                state="Analyzing query and splitting by price bands...",
+                meta={"phase": "splitting", "progress": 0},
+            )
+            subqueries = await splitter.split(parameters, session, on_progress)
 
-        # Phase 3: Fetch all pages for each subquery
-        task.update_state(
-            state=f"Fetching listings from {len(subqueries)} subqueries...",
-            meta={
-                "phase": "fetching",
-                "subqueries": len(subqueries),
-                "estimated_results": total_estimated,
-            },
-        )
-
-        semaphore = asyncio.Semaphore(config.max_concurrent_requests)
-        identifiers: set[int] = set()
-
-        async def fetch_subquery(sq: SubQuery) -> list[dict[str, Any]]:
-            """Fetch all pages for a single subquery."""
-            results: list[dict[str, Any]] = []
-
-            # Calculate how many pages we need based on estimated results
-            estimated = sq.estimated_results or 0
-            if estimated == 0:
-                return results
-
-            # Fetch pages up to max_pages_per_query or until no more results
-            page_size = parameters.page_size
-            max_pages = min(
-                config.max_pages_per_query,
-                (estimated // page_size) + 1,
+            total_estimated = splitter.calculate_total_estimated_results(subqueries)
+            celery_logger.info(
+                f"Query split complete: {len(subqueries)} subqueries, "
+                f"~{total_estimated} estimated total results"
+            )
+            logger.info(
+                f"Split into {len(subqueries)} subqueries, "
+                f"estimated {total_estimated} total results"
             )
 
-            for page_id in range(1, max_pages + 1):
-                async with semaphore:
-                    await asyncio.sleep(config.request_delay_ms / 1000)
-                    try:
-                        result = await listing_query(
-                            page=page_id,
-                            channel=parameters.listing_type,
-                            min_bedrooms=sq.min_bedrooms,
-                            max_bedrooms=sq.max_bedrooms,
-                            radius=parameters.radius,
-                            min_price=sq.min_price,
-                            max_price=sq.max_price,
-                            district=sq.district,
-                            page_size=page_size,
-                            max_days_since_added=parameters.max_days_since_added,
-                            furnish_types=parameters.furnish_types or [],
-                            session=session,
-                        )
-                        results.append(result)
+            # Phase 3: Fetch all pages for each subquery
+            task.update_state(
+                state=f"Fetching listings from {len(subqueries)} subqueries...",
+                meta={
+                    "phase": "fetching",
+                    "subqueries": len(subqueries),
+                    "estimated_results": total_estimated,
+                },
+            )
 
-                        # Check if we've received all results
-                        properties = result.get("properties", [])
-                        if len(properties) < page_size:
-                            # No more results on next page
+            celery_logger.info(f"Fetching pages from {len(subqueries)} subqueries...")
+
+            semaphore = asyncio.Semaphore(config.max_concurrent_requests)
+            identifiers: set[int] = set()
+            completed_subqueries = 0
+            total_pages_fetched = 0
+
+            async def fetch_subquery(sq: SubQuery) -> list[dict[str, Any]]:
+                """Fetch all pages for a single subquery."""
+                nonlocal completed_subqueries, total_pages_fetched
+                results: list[dict[str, Any]] = []
+
+                # Calculate how many pages we need based on estimated results
+                estimated = sq.estimated_results or 0
+                if estimated == 0:
+                    completed_subqueries += 1
+                    return results
+
+                # Fetch pages up to max_pages_per_query or until no more results
+                page_size = parameters.page_size
+                max_pages = min(
+                    config.max_pages_per_query,
+                    (estimated // page_size) + 1,
+                )
+
+                for page_id in range(1, max_pages + 1):
+                    async with semaphore:
+                        await asyncio.sleep(config.request_delay_ms / 1000)
+                        try:
+                            result = await listing_query(
+                                page=page_id,
+                                channel=parameters.listing_type,
+                                min_bedrooms=sq.min_bedrooms,
+                                max_bedrooms=sq.max_bedrooms,
+                                radius=parameters.radius,
+                                min_price=sq.min_price,
+                                max_price=sq.max_price,
+                                district=sq.district,
+                                page_size=page_size,
+                                max_days_since_added=parameters.max_days_since_added,
+                                furnish_types=parameters.furnish_types or [],
+                                session=session,
+                                config=config,
+                            )
+                            results.append(result)
+                            total_pages_fetched += 1
+
+                            # Check if we've received all results
+                            properties = result.get("properties", [])
+                            if len(properties) < page_size:
+                                # No more results on next page
+                                break
+
+                        except CircuitBreakerOpenError as e:
+                            celery_logger.error(f"Circuit breaker open: {e}")
                             break
-
-                    except Exception as e:
-                        if "GENERIC_ERROR" in str(e):
-                            # Reached end of results
-                            logger.debug(
-                                f"Max page for {sq.district}: {page_id - 1}"
+                        except ThrottlingError as e:
+                            celery_logger.warning(
+                                f"Throttling on {sq.district} page {page_id}: {e}"
+                            )
+                            break
+                        except Exception as e:
+                            if "GENERIC_ERROR" in str(e):
+                                # Reached end of results
+                                logger.debug(
+                                    f"Max page for {sq.district}: {page_id - 1}"
+                                )
+                                break
+                            logger.warning(
+                                f"Error fetching page {page_id} for {sq.district}: {e}"
                             )
                             break
-                        logger.warning(
-                            f"Error fetching page {page_id} for {sq.district}: {e}"
-                        )
-                        break
 
-            return results
+                completed_subqueries += 1
+                return results
 
-        # Fetch all subqueries concurrently
-        all_results = await asyncio.gather(
-            *[fetch_subquery(sq) for sq in subqueries]
-        )
+            # Fetch all subqueries concurrently
+            all_results = await asyncio.gather(
+                *[fetch_subquery(sq) for sq in subqueries]
+            )
 
-        # Extract identifiers from all results
-        for subquery_results in all_results:
-            for response_json in subquery_results:
-                if not response_json:
-                    continue
-                if response_json.get("totalAvailableResults", 0) == 0:
-                    continue
-                for property_data in response_json.get("properties", []):
-                    identifier = property_data.get("identifier")
-                    if identifier:
-                        identifiers.add(identifier)
+            celery_logger.info(
+                f"Fetch complete: {total_pages_fetched} pages from "
+                f"{completed_subqueries} subqueries"
+            )
 
+            # Extract identifiers from all results
+            for subquery_results in all_results:
+                for response_json in subquery_results:
+                    if not response_json:
+                        continue
+                    if response_json.get("totalAvailableResults", 0) == 0:
+                        continue
+                    for property_data in response_json.get("properties", []):
+                        identifier = property_data.get("identifier")
+                        if identifier:
+                            identifiers.add(identifier)
+
+    except CircuitBreakerOpenError as e:
+        celery_logger.error(f"Circuit breaker prevented query: {e}")
+        # Log throttle metrics
+        metrics = get_throttle_metrics()
+        if metrics.total_requests > 0:
+            celery_logger.info(metrics.summary())
+        return set()
+    finally:
+        # Log throttle metrics
+        metrics = get_throttle_metrics()
+        if metrics.total_requests > 0:
+            celery_logger.info(f"API Stats: {metrics.total_requests} requests, "
+                               f"avg {metrics.average_response_time:.2f}s, "
+                               f"{metrics.total_throttling_events} throttled")
+
+    celery_logger.info(f"Found {len(identifiers)} unique listing IDs from API")
     logger.info(f"Found {len(identifiers)} unique listings")
 
     # Filter out listings already in the database
-    all_listing_ids = {l.id for l in await repository.get_listings()}
+    celery_logger.info("Checking database for existing listings...")
+    all_listing_ids = {listing.id for listing in await repository.get_listings()}
     new_ids = identifiers - all_listing_ids
 
+    celery_logger.info(
+        f"Filtering: {len(identifiers)} total, "
+        f"{len(all_listing_ids)} existing in DB, "
+        f"{len(new_ids)} new to process"
+    )
+
     task.update_state(
         state=f"Found {len(new_ids)} new listings to process",
         meta={

From 5514fa638135f51e36f47b2e702a3b4cf669af5c Mon Sep 17 00:00:00 2001
From: Viktor Barzin <viktorbarzin@meta.com>
Date: Fri, 6 Feb 2026 20:34:50 +0000
Subject: [PATCH 5/5] Remove 1000-result limit, add Redis caching and virtual
 scrolling

- Remove hard-coded limit=1000 default from listing_geojson and streaming
  endpoints, allowing all matching results to be returned
- Add Redis caching service (db=2, 30min TTL) that caches query results
  as Redis Lists for fast re-queries with reduced DB load
- Integrate cache into streaming endpoint: serve from cache on hit,
  populate cache on miss during DB streaming
- Invalidate cache after scrape completes (both success and no-new-listings)
- Replace ScrollArea with react-virtuoso in ListView for virtual scrolling,
  keeping only ~20-30 DOM nodes regardless of list size
- Handle metadata streaming message to show "0 / N" progress from start
- Throttle frontend state updates with requestAnimationFrame to prevent
  UI jank from rapid re-renders during cached response streaming
---
 crawler/api/app.py                            | 100 ++++---
 crawler/frontend/package-lock.json            | 252 +++++++++++++++---
 crawler/frontend/package.json                 |   6 +-
 crawler/frontend/src/App.tsx                  |  24 +-
 crawler/frontend/src/components/ListView.tsx  | 151 +++++++++++
 .../frontend/src/services/streamingService.ts | 137 ++++++++++
 crawler/services/listing_cache.py             |  99 +++++++
 crawler/tasks/listing_tasks.py                |   4 +
 8 files changed, 695 insertions(+), 78 deletions(-)
 create mode 100644 crawler/frontend/src/components/ListView.tsx
 create mode 100644 crawler/frontend/src/services/streamingService.ts
 create mode 100644 crawler/services/listing_cache.py

diff --git a/crawler/api/app.py b/crawler/api/app.py
index 96f8446..9dd3988 100644
--- a/crawler/api/app.py
+++ b/crawler/api/app.py
@@ -18,6 +18,11 @@ from fastapi.middleware.cors import CORSMiddleware
 from ui_exporter import convert_to_geojson_feature, convert_row_to_geojson
 
 from services import listing_service, export_service, district_service, task_service
+from services.listing_cache import (
+    get_cached_count,
+    get_cached_features,
+    cache_features_batch,
+)
 from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
 from api.metrics import metrics_app
 from opentelemetry.metrics import get_meter
@@ -101,7 +106,7 @@ async def get_listing(
 async def get_listing_geojson(
     user: Annotated[User, Depends(get_current_user)],
     query_parameters: Annotated[QueryParameters, Depends(get_query_parameters)],
-    limit: int = 1000,  # Default limit to prevent timeout
+    limit: int | None = None,
 ) -> dict:
     """Get listings as GeoJSON for map display."""
     repository = ListingRepository(engine)
@@ -118,7 +123,7 @@ async def stream_listing_geojson(
     user: Annotated[User, Depends(get_current_user)],
     query_parameters: Annotated[QueryParameters, Depends(get_query_parameters)],
     batch_size: int = 50,
-    limit: int = 1000,
+    limit: int | None = None,
 ) -> StreamingResponse:
     """Stream listings as NDJSON for progressive map loading.
 
@@ -128,38 +133,67 @@ async def stream_listing_geojson(
     - complete: Final message with total count
     """
     async def generate():
-        repository = ListingRepository(engine)
+        # Check cache first
+        cached_count = get_cached_count(query_parameters)
 
-        # Phase 1: Fast count for progress estimation
-        total = repository.count_listings(query_parameters)
-        effective_total = min(limit, total) if limit else total
+        if cached_count is not None and cached_count > 0:
+            # Cache HIT
+            effective_total = min(limit, cached_count) if limit else cached_count
 
-        yield json.dumps({
-            "type": "metadata",
-            "batch_size": batch_size,
-            "total_expected": effective_total,
-        }) + "\n"
+            yield json.dumps({
+                "type": "metadata",
+                "batch_size": batch_size,
+                "total_expected": effective_total,
+                "cached": True,
+            }) + "\n"
 
-        # Phase 2: Stream with column projection and keyset pagination
-        count = 0
-        batch = []
-        for row in repository.stream_listings_optimized(
-            query_parameters, limit=limit, page_size=batch_size
-        ):
-            feature = convert_row_to_geojson(row, query_parameters.listing_type.value)
-            batch.append(feature)
-            count += 1
+            count = 0
+            for feature_batch in get_cached_features(query_parameters, batch_size=batch_size):
+                if limit and count + len(feature_batch) > limit:
+                    feature_batch = feature_batch[:limit - count]
+                count += len(feature_batch)
+                yield json.dumps({"type": "batch", "features": feature_batch}) + "\n"
+                if limit and count >= limit:
+                    break
 
-            if len(batch) >= batch_size:
+            yield json.dumps({"type": "complete", "total": count}) + "\n"
+        else:
+            # Cache MISS - query DB and populate cache
+            repository = ListingRepository(engine)
+
+            # Phase 1: Fast count for progress estimation
+            total = repository.count_listings(query_parameters)
+            effective_total = min(limit, total) if limit else total
+
+            yield json.dumps({
+                "type": "metadata",
+                "batch_size": batch_size,
+                "total_expected": effective_total,
+                "cached": False,
+            }) + "\n"
+
+            # Phase 2: Stream with column projection and keyset pagination
+            count = 0
+            batch = []
+            for row in repository.stream_listings_optimized(
+                query_parameters, limit=limit, page_size=batch_size
+            ):
+                feature = convert_row_to_geojson(row, query_parameters.listing_type.value)
+                batch.append(feature)
+                count += 1
+
+                if len(batch) >= batch_size:
+                    cache_features_batch(query_parameters, batch)
+                    yield json.dumps({"type": "batch", "features": batch}) + "\n"
+                    batch = []
+
+            # Send remaining
+            if batch:
+                cache_features_batch(query_parameters, batch)
                 yield json.dumps({"type": "batch", "features": batch}) + "\n"
-                batch = []
 
-        # Send remaining
-        if batch:
-            yield json.dumps({"type": "batch", "features": batch}) + "\n"
-
-        # Final message
-        yield json.dumps({"type": "complete", "total": count}) + "\n"
+            # Final message
+            yield json.dumps({"type": "complete", "total": count}) + "\n"
 
     return StreamingResponse(
         generate(),
@@ -200,13 +234,19 @@ async def refresh_listings(
 async def get_task_status(
     user: Annotated[User, Depends(get_current_user)],
     task_id: str,
-) -> dict[str, str]:
+) -> dict[str, str | int | float | None]:
     """Get the status of a background task."""
     status = task_service.get_task_status(task_id)
     return {
         "task_id": status.task_id,
         "status": status.status,
-        "result": json.dumps(status.result) if status.result else "",
+        "result": json.dumps(status.result) if status.result else None,
+        "progress": status.progress,
+        "processed": status.processed,
+        "total": status.total,
+        "message": status.message,
+        "error": status.error,
+        "traceback": status.traceback,
     }
 
 
diff --git a/crawler/frontend/package-lock.json b/crawler/frontend/package-lock.json
index c3cbbde..07c0944 100644
--- a/crawler/frontend/package-lock.json
+++ b/crawler/frontend/package-lock.json
@@ -9,7 +9,9 @@
       "version": "0.0.0",
       "dependencies": {
         "@hookform/resolvers": "^5.1.1",
+        "@radix-ui/react-accordion": "^1.2.12",
         "@radix-ui/react-alert-dialog": "^1.1.14",
+        "@radix-ui/react-checkbox": "^1.3.3",
         "@radix-ui/react-dialog": "^1.1.14",
         "@radix-ui/react-hover-card": "^1.1.14",
         "@radix-ui/react-label": "^2.1.7",
@@ -18,6 +20,7 @@
         "@radix-ui/react-scroll-area": "^1.2.9",
         "@radix-ui/react-select": "^2.2.5",
         "@radix-ui/react-separator": "^1.1.7",
+        "@radix-ui/react-slider": "^1.3.6",
         "@radix-ui/react-slot": "^1.2.3",
         "@radix-ui/react-tooltip": "^1.2.7",
         "@tabler/icons-react": "^3.34.0",
@@ -40,7 +43,7 @@
         "react-dom": "^19.1.0",
         "react-hook-form": "^7.58.1",
         "react-oidc-context": "^3.3.0",
-        "rivets": "^0.9.6",
+        "react-virtuoso": "^4.18.1",
         "tailwind-merge": "^3.3.1",
         "tailwindcss": "^4.1.10",
         "zod": "^3.25.67"
@@ -50,7 +53,6 @@
         "@types/node": "^24.0.1",
         "@types/react": "^19.1.2",
         "@types/react-dom": "^19.1.2",
-        "@types/rivets": "^0.9.5",
         "@vitejs/plugin-react-swc": "^3.9.0",
         "eslint": "^9.25.0",
         "eslint-plugin-react-hooks": "^5.2.0",
@@ -924,6 +926,43 @@
       "integrity": "sha512-XnbHrrprsNqZKQhStrSwgRUQzoCI1glLzdw79xiZPoofhGICeZRSQ3dIxAKH1gb3OHfNf4d6f+vAv3kil2eggA==",
       "license": "MIT"
     },
+    "node_modules/@radix-ui/react-accordion": {
+      "version": "1.2.12",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-accordion/-/react-accordion-1.2.12.tgz",
+      "integrity": "sha512-T4nygeh9YE9dLRPhAHSeOZi7HBXo+0kYIPJXayZfvWOWA0+n3dESrZbjfDPUABkUNym6Hd+f2IR113To8D2GPA==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/primitive": "1.1.3",
+        "@radix-ui/react-collapsible": "1.1.12",
+        "@radix-ui/react-collection": "1.1.7",
+        "@radix-ui/react-compose-refs": "1.1.2",
+        "@radix-ui/react-context": "1.1.2",
+        "@radix-ui/react-direction": "1.1.1",
+        "@radix-ui/react-id": "1.1.1",
+        "@radix-ui/react-primitive": "2.1.3",
+        "@radix-ui/react-use-controllable-state": "1.2.2"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-accordion/node_modules/@radix-ui/primitive": {
+      "version": "1.1.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/primitive/-/primitive-1.1.3.tgz",
+      "integrity": "sha512-JTF99U/6XIjCBo0wqkU5sK10glYe27MRRsfwoiq5zzOEZLHU3A3KCMa5X/azekYRCJ0HlwI0crAXS/5dEHTzDg==",
+      "license": "MIT"
+    },
     "node_modules/@radix-ui/react-alert-dialog": {
       "version": "1.1.14",
       "resolved": "https://registry.npmjs.org/@radix-ui/react-alert-dialog/-/react-alert-dialog-1.1.14.tgz",
@@ -975,6 +1014,126 @@
         }
       }
     },
+    "node_modules/@radix-ui/react-checkbox": {
+      "version": "1.3.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-checkbox/-/react-checkbox-1.3.3.tgz",
+      "integrity": "sha512-wBbpv+NQftHDdG86Qc0pIyXk5IR3tM8Vd0nWLKDcX8nNn4nXFOFwsKuqw2okA/1D/mpaAkmuyndrPJTYDNZtFw==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/primitive": "1.1.3",
+        "@radix-ui/react-compose-refs": "1.1.2",
+        "@radix-ui/react-context": "1.1.2",
+        "@radix-ui/react-presence": "1.1.5",
+        "@radix-ui/react-primitive": "2.1.3",
+        "@radix-ui/react-use-controllable-state": "1.2.2",
+        "@radix-ui/react-use-previous": "1.1.1",
+        "@radix-ui/react-use-size": "1.1.1"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-checkbox/node_modules/@radix-ui/primitive": {
+      "version": "1.1.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/primitive/-/primitive-1.1.3.tgz",
+      "integrity": "sha512-JTF99U/6XIjCBo0wqkU5sK10glYe27MRRsfwoiq5zzOEZLHU3A3KCMa5X/azekYRCJ0HlwI0crAXS/5dEHTzDg==",
+      "license": "MIT"
+    },
+    "node_modules/@radix-ui/react-checkbox/node_modules/@radix-ui/react-presence": {
+      "version": "1.1.5",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-presence/-/react-presence-1.1.5.tgz",
+      "integrity": "sha512-/jfEwNDdQVBCNvjkGit4h6pMOzq8bHkopq458dPt2lMjx+eBQUohZNG9A7DtO/O5ukSbxuaNGXMjHicgwy6rQQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-compose-refs": "1.1.2",
+        "@radix-ui/react-use-layout-effect": "1.1.1"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-collapsible": {
+      "version": "1.1.12",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-collapsible/-/react-collapsible-1.1.12.tgz",
+      "integrity": "sha512-Uu+mSh4agx2ib1uIGPP4/CKNULyajb3p92LsVXmH2EHVMTfZWpll88XJ0j4W0z3f8NK1eYl1+Mf/szHPmcHzyA==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/primitive": "1.1.3",
+        "@radix-ui/react-compose-refs": "1.1.2",
+        "@radix-ui/react-context": "1.1.2",
+        "@radix-ui/react-id": "1.1.1",
+        "@radix-ui/react-presence": "1.1.5",
+        "@radix-ui/react-primitive": "2.1.3",
+        "@radix-ui/react-use-controllable-state": "1.2.2",
+        "@radix-ui/react-use-layout-effect": "1.1.1"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-collapsible/node_modules/@radix-ui/primitive": {
+      "version": "1.1.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/primitive/-/primitive-1.1.3.tgz",
+      "integrity": "sha512-JTF99U/6XIjCBo0wqkU5sK10glYe27MRRsfwoiq5zzOEZLHU3A3KCMa5X/azekYRCJ0HlwI0crAXS/5dEHTzDg==",
+      "license": "MIT"
+    },
+    "node_modules/@radix-ui/react-collapsible/node_modules/@radix-ui/react-presence": {
+      "version": "1.1.5",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-presence/-/react-presence-1.1.5.tgz",
+      "integrity": "sha512-/jfEwNDdQVBCNvjkGit4h6pMOzq8bHkopq458dPt2lMjx+eBQUohZNG9A7DtO/O5ukSbxuaNGXMjHicgwy6rQQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-compose-refs": "1.1.2",
+        "@radix-ui/react-use-layout-effect": "1.1.1"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
     "node_modules/@radix-ui/react-collection": {
       "version": "1.1.7",
       "resolved": "https://registry.npmjs.org/@radix-ui/react-collection/-/react-collection-1.1.7.tgz",
@@ -1482,6 +1641,45 @@
         }
       }
     },
+    "node_modules/@radix-ui/react-slider": {
+      "version": "1.3.6",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-slider/-/react-slider-1.3.6.tgz",
+      "integrity": "sha512-JPYb1GuM1bxfjMRlNLE+BcmBC8onfCi60Blk7OBqi2MLTFdS+8401U4uFjnwkOr49BLmXxLC6JHkvAsx5OJvHw==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/number": "1.1.1",
+        "@radix-ui/primitive": "1.1.3",
+        "@radix-ui/react-collection": "1.1.7",
+        "@radix-ui/react-compose-refs": "1.1.2",
+        "@radix-ui/react-context": "1.1.2",
+        "@radix-ui/react-direction": "1.1.1",
+        "@radix-ui/react-primitive": "2.1.3",
+        "@radix-ui/react-use-controllable-state": "1.2.2",
+        "@radix-ui/react-use-layout-effect": "1.1.1",
+        "@radix-ui/react-use-previous": "1.1.1",
+        "@radix-ui/react-use-size": "1.1.1"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-slider/node_modules/@radix-ui/primitive": {
+      "version": "1.1.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/primitive/-/primitive-1.1.3.tgz",
+      "integrity": "sha512-JTF99U/6XIjCBo0wqkU5sK10glYe27MRRsfwoiq5zzOEZLHU3A3KCMa5X/azekYRCJ0HlwI0crAXS/5dEHTzDg==",
+      "license": "MIT"
+    },
     "node_modules/@radix-ui/react-slot": {
       "version": "1.2.3",
       "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.3.tgz",
@@ -2773,16 +2971,6 @@
         "@types/geojson": "*"
       }
     },
-    "node_modules/@types/jquery": {
-      "version": "3.5.32",
-      "resolved": "https://registry.npmjs.org/@types/jquery/-/jquery-3.5.32.tgz",
-      "integrity": "sha512-b9Xbf4CkMqS02YH8zACqN1xzdxc3cO735Qe5AbSUFmyOiaWAbcpqh9Wna+Uk0vgACvoQHpWDg2rGdHkYPLmCiQ==",
-      "dev": true,
-      "license": "MIT",
-      "dependencies": {
-        "@types/sizzle": "*"
-      }
-    },
     "node_modules/@types/json-schema": {
       "version": "7.0.15",
       "resolved": "https://registry.npmjs.org/@types/json-schema/-/json-schema-7.0.15.tgz",
@@ -2852,23 +3040,6 @@
         "@types/react": "^19.0.0"
       }
     },
-    "node_modules/@types/rivets": {
-      "version": "0.9.5",
-      "resolved": "https://registry.npmjs.org/@types/rivets/-/rivets-0.9.5.tgz",
-      "integrity": "sha512-spCtZoSOrS8kNTJNOXamCCQurqOdF1Piak8bUQVqHQNRoTLoID6O6xVX41P5W2vvlxc9UpSG75zl4CRra0l3Eg==",
-      "dev": true,
-      "license": "MIT",
-      "dependencies": {
-        "@types/jquery": "*"
-      }
-    },
-    "node_modules/@types/sizzle": {
-      "version": "2.3.9",
-      "resolved": "https://registry.npmjs.org/@types/sizzle/-/sizzle-2.3.9.tgz",
-      "integrity": "sha512-xzLEyKB50yqCUPUJkIsrVvoWNfFUbIZI+RspLWt8u+tIW/BetMBZtgV2LY/2o+tYH8dRvQ+eoPf3NdhQCcLE2w==",
-      "dev": true,
-      "license": "MIT"
-    },
     "node_modules/@types/supercluster": {
       "version": "7.1.3",
       "resolved": "https://registry.npmjs.org/@types/supercluster/-/supercluster-7.1.3.tgz",
@@ -5348,6 +5519,16 @@
         }
       }
     },
+    "node_modules/react-virtuoso": {
+      "version": "4.18.1",
+      "resolved": "https://registry.npmjs.org/react-virtuoso/-/react-virtuoso-4.18.1.tgz",
+      "integrity": "sha512-KF474cDwaSb9+SJ380xruBB4P+yGWcVkcu26HtMqYNMTYlYbrNy8vqMkE+GpAApPPufJqgOLMoWMFG/3pJMXUA==",
+      "license": "MIT",
+      "peerDependencies": {
+        "react": ">=16 || >=17 || >= 18 || >= 19",
+        "react-dom": ">=16 || >=17 || >= 18 || >=19"
+      }
+    },
     "node_modules/resolve-from": {
       "version": "4.0.0",
       "resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-4.0.0.tgz",
@@ -5378,14 +5559,6 @@
         "node": ">=0.10.0"
       }
     },
-    "node_modules/rivets": {
-      "version": "0.9.6",
-      "resolved": "https://registry.npmjs.org/rivets/-/rivets-0.9.6.tgz",
-      "integrity": "sha512-KfdMjLRWw4+38ej9bRXegKZVfYo0jEacwadA5z6NTKya+YohwGemwdbxvJ52WCXODkTnR4Q8UmUC6HVxsdzkxA==",
-      "dependencies": {
-        "sightglass": "~0.2.4"
-      }
-    },
     "node_modules/robust-predicates": {
       "version": "3.0.2",
       "resolved": "https://registry.npmjs.org/robust-predicates/-/robust-predicates-3.0.2.tgz",
@@ -5524,11 +5697,6 @@
         "node": ">=8"
       }
     },
-    "node_modules/sightglass": {
-      "version": "0.2.6",
-      "resolved": "https://registry.npmjs.org/sightglass/-/sightglass-0.2.6.tgz",
-      "integrity": "sha512-t1fgbuhURcWc8VgZk8kJQ3QmmZk3kghDcf0wpsN8I8RaV05IUkc2b195KpGqgocKT/q8+vKk6EcB2c7N2lAd6A=="
-    },
     "node_modules/source-map-js": {
       "version": "1.2.1",
       "resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.1.tgz",
diff --git a/crawler/frontend/package.json b/crawler/frontend/package.json
index 466d325..a9c66a3 100644
--- a/crawler/frontend/package.json
+++ b/crawler/frontend/package.json
@@ -11,7 +11,9 @@
   },
   "dependencies": {
     "@hookform/resolvers": "^5.1.1",
+    "@radix-ui/react-accordion": "^1.2.12",
     "@radix-ui/react-alert-dialog": "^1.1.14",
+    "@radix-ui/react-checkbox": "^1.3.3",
     "@radix-ui/react-dialog": "^1.1.14",
     "@radix-ui/react-hover-card": "^1.1.14",
     "@radix-ui/react-label": "^2.1.7",
@@ -20,6 +22,7 @@
     "@radix-ui/react-scroll-area": "^1.2.9",
     "@radix-ui/react-select": "^2.2.5",
     "@radix-ui/react-separator": "^1.1.7",
+    "@radix-ui/react-slider": "^1.3.6",
     "@radix-ui/react-slot": "^1.2.3",
     "@radix-ui/react-tooltip": "^1.2.7",
     "@tabler/icons-react": "^3.34.0",
@@ -42,7 +45,7 @@
     "react-dom": "^19.1.0",
     "react-hook-form": "^7.58.1",
     "react-oidc-context": "^3.3.0",
-    "rivets": "^0.9.6",
+    "react-virtuoso": "^4.18.1",
     "tailwind-merge": "^3.3.1",
     "tailwindcss": "^4.1.10",
     "zod": "^3.25.67"
@@ -52,7 +55,6 @@
     "@types/node": "^24.0.1",
     "@types/react": "^19.1.2",
     "@types/react-dom": "^19.1.2",
-    "@types/rivets": "^0.9.5",
     "@vitejs/plugin-react-swc": "^3.9.0",
     "eslint": "^9.25.0",
     "eslint-plugin-react-hooks": "^5.2.0",
diff --git a/crawler/frontend/src/App.tsx b/crawler/frontend/src/App.tsx
index 5e90bdf..8933d56 100644
--- a/crawler/frontend/src/App.tsx
+++ b/crawler/frontend/src/App.tsx
@@ -67,16 +67,32 @@ function App() {
     setStreamingProgress({ count: 0 });
     setListingData(null);
 
+    let updateScheduled = false;
+
+    const flushUpdate = () => {
+      updateScheduled = false;
+      setListingData({
+        type: 'FeatureCollection',
+        features: [...accumulatedFeaturesRef.current]
+      });
+    };
+
+    const scheduleUpdate = () => {
+      if (!updateScheduled) {
+        updateScheduled = true;
+        requestAnimationFrame(flushUpdate);
+      }
+    };
+
     try {
       for await (const batch of streamListingGeoJSON(user, parameters, (progress) => {
         setStreamingProgress(progress);
       })) {
         accumulatedFeaturesRef.current.push(...batch);
-        setListingData({
-          type: 'FeatureCollection',
-          features: [...accumulatedFeaturesRef.current]
-        });
+        scheduleUpdate();
       }
+      // Final flush to ensure all data is rendered
+      flushUpdate();
     } catch (error) {
       if (error instanceof Error) {
         setSubmitError(error.message);
diff --git a/crawler/frontend/src/components/ListView.tsx b/crawler/frontend/src/components/ListView.tsx
new file mode 100644
index 0000000..790146a
--- /dev/null
+++ b/crawler/frontend/src/components/ListView.tsx
@@ -0,0 +1,151 @@
+import { useState, useMemo, useCallback } from 'react';
+import { ArrowUpDown, ArrowUp, ArrowDown } from 'lucide-react';
+import { Virtuoso } from 'react-virtuoso';
+import { Button } from './ui/button';
+import { PropertyCard } from './PropertyCard';
+import type { GeoJSONFeatureCollection, PropertyFeature, PropertyProperties } from '@/types';
+
+type SortField = 'total_price' | 'qmprice' | 'qm' | 'rooms' | 'last_seen';
+type SortOrder = 'asc' | 'desc';
+
+interface ListViewProps {
+    listingData: GeoJSONFeatureCollection;
+    onPropertyClick?: (property: PropertyProperties, coordinates: [number, number]) => void;
+    highlightedPropertyUrl?: string | null;
+}
+
+interface SortConfig {
+    field: SortField;
+    order: SortOrder;
+}
+
+const SORT_OPTIONS: { field: SortField; label: string }[] = [
+    { field: 'total_price', label: 'Price' },
+    { field: 'qmprice', label: '£/m²' },
+    { field: 'qm', label: 'Size' },
+    { field: 'rooms', label: 'Beds' },
+    { field: 'last_seen', label: 'Last Seen' },
+];
+
+export function ListView({ listingData, onPropertyClick, highlightedPropertyUrl }: ListViewProps) {
+    const [sortConfig, setSortConfig] = useState<SortConfig>({ field: 'qmprice', order: 'asc' });
+
+    // Calculate average price per sqm for "good deal" indicator
+    const avgPricePerSqm = useMemo(() => {
+        const validPrices = listingData.features
+            .map((f) => f.properties.qmprice)
+            .filter((p): p is number => typeof p === 'number' && p > 0);
+        return validPrices.length > 0
+            ? validPrices.reduce((a, b) => a + b, 0) / validPrices.length
+            : 0;
+    }, [listingData]);
+
+    // Sort features
+    const sortedFeatures = useMemo(() => {
+        const features = [...listingData.features];
+
+        features.sort((a, b) => {
+            let aValue: number | string;
+            let bValue: number | string;
+
+            switch (sortConfig.field) {
+                case 'total_price':
+                    aValue = a.properties.total_price || 0;
+                    bValue = b.properties.total_price || 0;
+                    break;
+                case 'qmprice':
+                    aValue = a.properties.qmprice || 0;
+                    bValue = b.properties.qmprice || 0;
+                    break;
+                case 'qm':
+                    aValue = a.properties.qm || 0;
+                    bValue = b.properties.qm || 0;
+                    break;
+                case 'rooms':
+                    aValue = a.properties.rooms || 0;
+                    bValue = b.properties.rooms || 0;
+                    break;
+                case 'last_seen':
+                    aValue = new Date(a.properties.last_seen).getTime();
+                    bValue = new Date(b.properties.last_seen).getTime();
+                    break;
+                default:
+                    return 0;
+            }
+
+            if (typeof aValue === 'number' && typeof bValue === 'number') {
+                return sortConfig.order === 'asc' ? aValue - bValue : bValue - aValue;
+            }
+            return 0;
+        });
+
+        return features;
+    }, [listingData.features, sortConfig]);
+
+    const handleSort = (field: SortField) => {
+        setSortConfig((prev) => ({
+            field,
+            order: prev.field === field && prev.order === 'asc' ? 'desc' : 'asc',
+        }));
+    };
+
+    const handlePropertyClick = useCallback((feature: PropertyFeature) => {
+        if (onPropertyClick) {
+            onPropertyClick(feature.properties, feature.geometry.coordinates);
+        }
+    }, [onPropertyClick]);
+
+    const SortIcon = ({ field }: { field: SortField }) => {
+        if (sortConfig.field !== field) {
+            return <ArrowUpDown className="h-3.5 w-3.5" />;
+        }
+        return sortConfig.order === 'asc'
+            ? <ArrowUp className="h-3.5 w-3.5" />
+            : <ArrowDown className="h-3.5 w-3.5" />;
+    };
+
+    return (
+        <div className="h-full flex flex-col bg-background">
+            {/* Sort controls */}
+            <div className="flex items-center gap-1 p-2 border-b overflow-x-auto">
+                <span className="text-xs text-muted-foreground mr-1 shrink-0">Sort:</span>
+                {SORT_OPTIONS.map((option) => (
+                    <Button
+                        key={option.field}
+                        variant={sortConfig.field === option.field ? 'secondary' : 'ghost'}
+                        size="sm"
+                        className="h-7 px-2 text-xs shrink-0"
+                        onClick={() => handleSort(option.field)}
+                    >
+                        {option.label}
+                        <SortIcon field={option.field} />
+                    </Button>
+                ))}
+            </div>
+
+            {/* Listing count */}
+            <div className="px-3 py-2 text-sm text-muted-foreground border-b">
+                Showing {sortedFeatures.length.toLocaleString()} properties
+            </div>
+
+            {/* Property list */}
+            <Virtuoso
+                className="flex-1"
+                data={sortedFeatures}
+                overscan={200}
+                itemContent={(_index, feature) => (
+                    <div className="px-3 pb-2 first:pt-3">
+                        <PropertyCard
+                            key={feature.properties.url}
+                            property={feature.properties}
+                            variant="compact"
+                            avgPricePerSqm={avgPricePerSqm}
+                            isHighlighted={feature.properties.url === highlightedPropertyUrl}
+                            onClick={() => handlePropertyClick(feature)}
+                        />
+                    </div>
+                )}
+            />
+        </div>
+    );
+}
diff --git a/crawler/frontend/src/services/streamingService.ts b/crawler/frontend/src/services/streamingService.ts
new file mode 100644
index 0000000..1c1d69f
--- /dev/null
+++ b/crawler/frontend/src/services/streamingService.ts
@@ -0,0 +1,137 @@
+// Streaming service for progressive listing data loading
+
+import type { User } from 'oidc-client-ts';
+import type { PropertyFeature } from '@/types';
+import type { ParameterValues } from '@/components/FilterPanel';
+import { ApiError } from '@/types';
+import { API_ENDPOINTS } from '@/constants';
+
+/**
+ * Build query string from parameters object
+ */
+function buildQueryString(params: Record<string, string | number | boolean | Date | undefined>): string {
+  const queryString = new URLSearchParams();
+
+  for (const [key, value] of Object.entries(params)) {
+    if (value !== undefined && value !== null && value !== '') {
+      if (value instanceof Date) {
+        queryString.append(key, value.toISOString());
+      } else {
+        queryString.append(key, String(value));
+      }
+    }
+  }
+
+  return queryString.toString();
+}
+
+/**
+ * Build listing query parameters from form values
+ */
+function buildListingParams(parameters: ParameterValues): Record<string, string | number | boolean | Date | undefined> {
+  return {
+    listing_type: parameters.listing_type,
+    min_bedrooms: parameters.min_bedrooms,
+    max_bedrooms: parameters.max_bedrooms,
+    max_price: parameters.max_price,
+    min_price: parameters.min_price,
+    min_sqm: parameters.min_sqm,
+    max_sqm: parameters.max_sqm,
+    min_price_per_sqm: parameters.min_price_per_sqm,
+    max_price_per_sqm: parameters.max_price_per_sqm,
+    last_seen_days: parameters.last_seen_days,
+    let_date_available_from: parameters.available_from,
+    district_names: parameters.district || undefined,
+    furnish_types: parameters.furnish_types?.join(',') || undefined,
+  };
+}
+
+export interface StreamMessage {
+  type: 'metadata' | 'batch' | 'complete';
+  features?: PropertyFeature[];
+  total?: number;
+  total_expected?: number;
+  batch_size?: number;
+  cached?: boolean;
+}
+
+export interface StreamingProgress {
+  count: number;
+  total?: number;
+}
+
+/**
+ * Stream listing GeoJSON data as an async generator.
+ * Yields batches of features as they arrive from the server.
+ */
+export async function* streamListingGeoJSON(
+  user: User,
+  parameters: ParameterValues,
+  onProgress?: (progress: StreamingProgress) => void
+): AsyncGenerator<PropertyFeature[], void, unknown> {
+  const params = buildListingParams(parameters);
+  const queryString = buildQueryString(params);
+  const url = queryString
+    ? `${API_ENDPOINTS.LISTING_GEOJSON_STREAM}?${queryString}`
+    : API_ENDPOINTS.LISTING_GEOJSON_STREAM;
+
+  const response = await fetch(url, {
+    headers: {
+      Authorization: `Bearer ${user.access_token}`,
+    },
+  });
+
+  if (!response.ok) {
+    throw new ApiError(`Error: ${response.status}`, response.status);
+  }
+
+  if (!response.body) {
+    throw new Error('No response body');
+  }
+
+  const reader = response.body.getReader();
+  const decoder = new TextDecoder();
+  let buffer = '';
+  let totalCount = 0;
+
+  while (true) {
+    const { done, value } = await reader.read();
+    if (done) break;
+
+    buffer += decoder.decode(value, { stream: true });
+    const lines = buffer.split('\n');
+    buffer = lines.pop() || ''; // Keep incomplete line in buffer
+
+    for (const line of lines) {
+      if (!line.trim()) continue;
+
+      try {
+        const message: StreamMessage = JSON.parse(line);
+
+        if (message.type === 'metadata') {
+          onProgress?.({ count: 0, total: message.total_expected });
+        } else if (message.type === 'batch' && message.features) {
+          totalCount += message.features.length;
+          onProgress?.({ count: totalCount });
+          yield message.features;
+        } else if (message.type === 'complete') {
+          onProgress?.({ count: message.total ?? totalCount, total: message.total });
+        }
+      } catch (e) {
+        console.error('Failed to parse streaming message:', e);
+      }
+    }
+  }
+
+  // Process any remaining data in the buffer
+  if (buffer.trim()) {
+    try {
+      const message: StreamMessage = JSON.parse(buffer);
+      if (message.type === 'batch' && message.features) {
+        yield message.features;
+      }
+    } catch (e) {
+      console.error('Failed to parse final streaming message:', e);
+    }
+  }
+}
diff --git a/crawler/services/listing_cache.py b/crawler/services/listing_cache.py
new file mode 100644
index 0000000..c77adfc
--- /dev/null
+++ b/crawler/services/listing_cache.py
@@ -0,0 +1,99 @@
+"""Redis-based caching for listing GeoJSON query results."""
+import hashlib
+import json
+import logging
+import os
+from typing import Generator
+
+import redis
+
+from models.listing import QueryParameters
+
+logger = logging.getLogger("uvicorn.error")
+
+CACHE_PREFIX = "listings:geojson:"
+CACHE_TTL_SECONDS = 30 * 60  # 30 minutes
+CACHE_DB = 2
+
+
+def _get_redis_client() -> redis.Redis:
+    """Get Redis client using Celery broker URL but overriding to db=2."""
+    broker_url = os.getenv("CELERY_BROKER_URL", "redis://localhost:6379/0")
+    # Replace the db number in the URL
+    base_url = broker_url.rsplit("/", 1)[0]
+    return redis.from_url(f"{base_url}/{CACHE_DB}", decode_responses=True)
+
+
+def make_cache_key(query_params: QueryParameters) -> str:
+    """Generate a cache key from query parameters."""
+    params_json = query_params.model_dump_json()
+    hash_suffix = hashlib.sha256(params_json.encode()).hexdigest()[:16]
+    return f"{CACHE_PREFIX}{hash_suffix}"
+
+
+def get_cached_count(query_params: QueryParameters) -> int | None:
+    """Return the number of cached features for a query, or None if not cached."""
+    try:
+        client = _get_redis_client()
+        key = make_cache_key(query_params)
+        if not client.exists(key):
+            return None
+        return client.llen(key)
+    except redis.RedisError as e:
+        logger.warning(f"Redis cache read error: {e}")
+        return None
+
+
+def get_cached_features(
+    query_params: QueryParameters, batch_size: int = 50
+) -> Generator[list[dict], None, None]:
+    """Yield batches of cached GeoJSON features."""
+    try:
+        client = _get_redis_client()
+        key = make_cache_key(query_params)
+        total = client.llen(key)
+
+        for start in range(0, total, batch_size):
+            end = start + batch_size - 1
+            items = client.lrange(key, start, end)
+            batch = [json.loads(item) for item in items]
+            if batch:
+                yield batch
+    except redis.RedisError as e:
+        logger.warning(f"Redis cache read error during streaming: {e}")
+
+
+def cache_features_batch(query_params: QueryParameters, features: list[dict]) -> None:
+    """Append a batch of features to the cache list."""
+    if not features:
+        return
+    try:
+        client = _get_redis_client()
+        key = make_cache_key(query_params)
+        pipeline = client.pipeline()
+        for feature in features:
+            pipeline.rpush(key, json.dumps(feature))
+        # Set/refresh TTL
+        pipeline.expire(key, CACHE_TTL_SECONDS)
+        pipeline.execute()
+    except redis.RedisError as e:
+        logger.warning(f"Redis cache write error: {e}")
+
+
+def invalidate_cache() -> None:
+    """Delete all listing GeoJSON cache entries."""
+    try:
+        client = _get_redis_client()
+        cursor = 0
+        deleted = 0
+        while True:
+            cursor, keys = client.scan(cursor, match=f"{CACHE_PREFIX}*", count=100)
+            if keys:
+                client.delete(*keys)
+                deleted += len(keys)
+            if cursor == 0:
+                break
+        if deleted:
+            logger.info(f"Invalidated {deleted} listing cache entries")
+    except redis.RedisError as e:
+        logger.warning(f"Redis cache invalidation error: {e}")
diff --git a/crawler/tasks/listing_tasks.py b/crawler/tasks/listing_tasks.py
index 713a56d..60bf2e6 100644
--- a/crawler/tasks/listing_tasks.py
+++ b/crawler/tasks/listing_tasks.py
@@ -16,6 +16,7 @@ from repositories.listing_repository import ListingRepository
 from database import engine
 from services.query_splitter import QuerySplitter, SubQuery
 from utils.redis_lock import redis_lock
+from services.listing_cache import invalidate_cache
 
 logger = logging.getLogger("uvicorn.error")
 
@@ -88,6 +89,7 @@ async def dump_listings_full(
     if len(ids_to_process) == 0:
         elapsed = time.time() - start_time
         celery_logger.info(f"No new listings found. Completed in {elapsed:.1f}s")
+        invalidate_cache()
         task.update_state(
             state="No new listings found",
             meta={"progress": 1, "processed": 0, "total": 0, "message": "All listings are up to date"},
@@ -111,6 +113,8 @@ async def dump_listings_full(
     celery_logger.info(f"COMPLETED: Processed {len(result)} listings in {elapsed:.1f}s")
     celery_logger.info("=" * 60)
 
+    invalidate_cache()
+
     return result