From 75a9dbf035d6a33f5f9f14f75750186ac3398d7f Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Mon, 2 Feb 2026 21:57:45 +0000 Subject: [PATCH] Add intelligent query splitting to maximize Rightmove data extraction --- crawler/.env.sample | 9 + crawler/CLAUDE.md | 233 ++++++++++ crawler/config/__init__.py | 3 +- crawler/config/scraper_config.py | 65 +++ crawler/poetry.lock | 526 +++++++++++++++++++++- crawler/pyproject.toml | 31 +- crawler/rec/query.py | 190 +++++++- crawler/services/listing_fetcher.py | 146 ++++++ crawler/services/query_splitter.py | 303 +++++++++++++ crawler/tasks/listing_tasks.py | 203 +++++---- crawler/tests/unit/test_query_splitter.py | 374 +++++++++++++++ 11 files changed, 1970 insertions(+), 113 deletions(-) create mode 100644 crawler/CLAUDE.md create mode 100644 crawler/config/scraper_config.py create mode 100644 crawler/services/listing_fetcher.py create mode 100644 crawler/services/query_splitter.py create mode 100644 crawler/tests/unit/test_query_splitter.py diff --git a/crawler/.env.sample b/crawler/.env.sample index 1e7fe00..aa1c93d 100644 --- a/crawler/.env.sample +++ b/crawler/.env.sample @@ -7,6 +7,15 @@ export DB_CONNECTION_STRING="sqlite:///data/wrongmove.db" # by default use SQLit export CELERY_BROKER_URL="redis://localhost:6379/0" # processing background tasks export CELERY_RESULT_BACKEND="redis://localhost:6379/1" +# Rightmove scraper configuration +# These settings control query splitting to work around Rightmove's ~1500 result cap +RIGHTMOVE_MAX_CONCURRENT=5 # Max concurrent HTTP requests +RIGHTMOVE_REQUEST_DELAY_MS=100 # Delay between requests in milliseconds +RIGHTMOVE_SPLIT_THRESHOLD=1200 # Split query when results exceed this threshold +RIGHTMOVE_MIN_PRICE_BAND=100 # Minimum price band width (won't split below this) +RIGHTMOVE_MAX_PAGES=60 # Max pages per subquery (60 * 25 = 1500 max results) +RIGHTMOVE_PROXY_URL= # Optional SOCKS proxy URL (e.g., socks5://localhost:9050 for Tor) + # Periodic scraping schedules (JSON array) # Each schedule has: name, enabled, hour, minute, day_of_week, listing_type, min/max_bedrooms, min/max_price, district_names, furnish_types # Cron fields: minute (0-59), hour (0-23), day_of_week (0-6, 0=Sunday) diff --git a/crawler/CLAUDE.md b/crawler/CLAUDE.md new file mode 100644 index 0000000..14714ab --- /dev/null +++ b/crawler/CLAUDE.md @@ -0,0 +1,233 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project Overview + +A real estate listing crawler and aggregator that scrapes property listings from Rightmove UK, extracts square meter data from floorplan images using OCR, calculates transit routes, and provides a web UI for browsing listings. + +## Development Environment + +**IMPORTANT**: This project runs on a remote host, not locally. Always use the remote executor to run commands: + +- **All shell commands** (Python, pytest, poetry, alembic, etc.) must be executed via the remote executor +- **Starting the project**: Use the remote executor to run `./start.sh` +- **Running tests**: Use the remote executor to run `pytest` +- **Any CLI operations**: Use the remote executor to run `python main.py ...` + +Never run commands directly on the local machine - always route them through the remote executor. + +## Commands + +### Setup and Run (Docker - Recommended) + +```bash +# Start all services (Redis, MySQL, API, Celery) with Docker +./start.sh + +# Rebuild images and start +./start.sh --build + +# Stop all containers +./start.sh --down + +# View logs +./start.sh --logs +``` + +### Setup and Run (Local with Poetry) + +```bash +# Install dependencies +poetry install && cp .env.sample .env + +# Start backend locally (requires Redis running) +./start.sh --local + +# Start frontend (from frontend/ directory) +cd frontend && ./start.sh +``` + +### CLI Operations + +The main CLI (`main.py`) uses Click with a `--data-dir` option (default: `data/rs/`): + +```bash +# Dump listings from Rightmove API +python main.py dump-listings --type rent --min-price 2000 --max-price 4000 --min-bedrooms 2 + +# Download floorplan images +python main.py dump-images + +# Extract square meters from floorplans using OCR +python main.py detect-floorplan + +# Calculate transit routes (consumes Google Maps API calls) +python main.py routing --destination-address 'Address' -m transit -l 10 + +# Export to GeoJSON for visualization +python main.py export-immoweb -O output.js --type rent [filter options] +``` + +### Testing + +```bash +# Run tests with coverage +pytest tests/ -v --cov=. --cov-report=term-missing + +# Run type checker +mypy . +``` + +### Database Migrations + +```bash +alembic upgrade head # Apply migrations +alembic revision -m "description" # Create new migration +``` + +### Code Formatting + +```bash +yapf --style .style.yapf --recursive . +``` + +## Architecture + +### Core Data Flow + +1. **Scraping** (`rec/query.py`): Fetches listing IDs and details from Rightmove's Android API +2. **Processing** (`listing_processor.py`): Pipeline with steps for fetching details, downloading images, and OCR detection +3. **Storage**: SQLModel/SQLAlchemy with MySQL or SQLite, plus JSON files in `data/rs//` +4. **API** (`api/app.py`): FastAPI endpoints authenticated via JWT from external Authentik service +5. **Background Tasks** (`tasks/listing_tasks.py`): Celery tasks for async listing processing with Redis broker + +### Key Models + +- `models/listing.py`: SQLModel entities (`RentListing`, `BuyListing`) with `QueryParameters` for filtering +- `data_access.py`: **DEPRECATED** - Legacy `Listing` dataclass for filesystem-based data access. Use `models.listing.RentListing` or `models.listing.BuyListing` instead. + +### Services Layer (Unified CLI and API) + +**IMPORTANT**: The `services/` directory contains unified handler functions that both the CLI and HTTP API use. This ensures consistency and code reuse. + +#### High-level services (use these in CLI and API): +- **`listing_service.py`**: Listing operations + - `get_listings()` - Retrieve listings from database + - `refresh_listings()` - Fetch new listings from Rightmove (sync or async) + - `download_images()` - Download floorplan images + - `detect_floorplans()` - Run OCR on floorplans + - `calculate_routes()` - Calculate transit routes + +- **`export_service.py`**: Export operations + - `export_to_csv()` - Export listings to CSV file + - `export_to_geojson()` - Export listings to GeoJSON (file or in-memory) + +- **`district_service.py`**: District management + - `get_all_districts()` - Get district name → region ID mapping + - `get_district_names()` - Get list of district names + - `validate_districts()` - Validate district names + +- **`task_service.py`**: Background task management + - `get_task_status()` - Get Celery task status + - `get_user_tasks()` - Get all tasks for a user + - `add_task_for_user()` - Associate task with user + +#### Low-level services (internal implementation): +- `listing_fetcher.py`: Fetches listing data from Rightmove API +- `image_fetcher.py`: Downloads floorplan images +- `floorplan_detector.py`: OCR-based square meter detection +- `route_calculator.py`: Calculates transit routes using Google Maps API +- `query_splitter.py`: Intelligent query splitting to maximize data extraction + +### Query Splitting System + +Rightmove's API caps search results at ~1,500 listings per query. The query splitting system works around this limitation to fetch **all matching listings**. + +#### How it works: + +1. **Initial Split**: Queries are split by district and bedroom count +2. **Probe**: Each subquery is probed (minimal API request) to get `totalAvailableResults` +3. **Adaptive Split**: If results exceed threshold (1,200), the price range is binary-split +4. **Recursive Refinement**: Splitting continues until all subqueries are under threshold +5. **Full Fetch**: Each subquery fetches up to 60 pages (1,500 results max) + +``` +Original: 2BR, £1000-£5000 → 3,000 results (over cap!) + ↓ split by price +£1000-£3000: 1,800 (still over!) | £3000-£5000: 1,200 ✓ + ↓ split again +£1000-£2000: 900 ✓ | £2000-£3000: 900 ✓ + +Final: 3 subqueries → 900 + 900 + 1,200 = 3,000 total results ✓ +``` + +#### Key components: +- `config/scraper_config.py`: Configuration with env var loading +- `services/query_splitter.py`: `QuerySplitter` class with `SubQuery` dataclass +- `rec/query.py`: `probe_query()` for result count probing, `create_session()` for connection pooling + +### Processing Pipeline + +`ListingProcessor` runs sequential steps defined in `listing_processor.py`: +1. `FetchListingDetailsStep` - Get property details from API +2. `FetchImagesStep` - Download floorplan images +3. `DetectFloorplanStep` - OCR to extract square meters from floorplans + +### Floorplan OCR + +`rec/floorplan.py` uses pytesseract with image preprocessing (adaptive thresholding) to extract square meter values from floorplan images. + +### Repository Pattern + +`repositories/listing_repository.py` handles database operations with SQLModel sessions. + +## Environment Variables + +- `DB_CONNECTION_STRING`: Database URL (SQLite default: `sqlite:///data/wrongmove.db`) +- `CELERY_BROKER_URL` / `CELERY_RESULT_BACKEND`: Redis URLs +- `ROUTING_API_KEY`: Google Maps API key for transit routing + +### Scraper Configuration + +These control the query splitting behavior (see `.env.sample` for defaults): + +| Variable | Default | Description | +|----------|---------|-------------| +| `RIGHTMOVE_MAX_CONCURRENT` | 5 | Max concurrent HTTP requests | +| `RIGHTMOVE_REQUEST_DELAY_MS` | 100 | Delay between requests (ms) | +| `RIGHTMOVE_SPLIT_THRESHOLD` | 1200 | Split query when results exceed this | +| `RIGHTMOVE_MIN_PRICE_BAND` | 100 | Minimum price band width (won't split below) | +| `RIGHTMOVE_MAX_PAGES` | 60 | Max pages per subquery (60 × 25 = 1500) | +| `RIGHTMOVE_PROXY_URL` | - | SOCKS proxy URL (e.g., `socks5://localhost:9050` for Tor) | + +## Project Structure + +- `main.py`: CLI entry point +- `api/`: FastAPI application with auth middleware +- `config/`: Configuration modules (scraper settings, scheduled tasks) +- `models/`: SQLModel database entities +- `repositories/`: Database access layer +- `rec/`: Core business logic (query, floorplan OCR, routing, districts) +- `services/`: Service layer modules (listing_fetcher, image_fetcher, floorplan_detector, route_calculator, query_splitter) +- `tasks/`: Celery background tasks +- `frontend/`: React/Vite frontend with Caddy proxy +- `alembic/`: Database migrations +- `tests/`: Test suite (unit and integration tests) + +## Type Checking + +The project uses strict mypy configuration with `disallow_untyped_defs=true`. Run `mypy .` to check types. + +## Exploration Preferences + +- Always ignore `node_modules` directory when exploring the codebase + +## Git Workflow + +**IMPORTANT**: After completing work items, always create separate commits for each logical change: +- Keep each commit focused on one feature/fix +- Do not include unrelated files +- Use descriptive commit messages +- Group related files together (e.g., tests with the code they test) + diff --git a/crawler/config/__init__.py b/crawler/config/__init__.py index 315e8c3..b82264c 100644 --- a/crawler/config/__init__.py +++ b/crawler/config/__init__.py @@ -1,4 +1,5 @@ """Configuration modules.""" from config.schedule_config import ScheduleConfig, SchedulesConfig +from config.scraper_config import ScraperConfig -__all__ = ["ScheduleConfig", "SchedulesConfig"] +__all__ = ["ScheduleConfig", "SchedulesConfig", "ScraperConfig"] diff --git a/crawler/config/scraper_config.py b/crawler/config/scraper_config.py new file mode 100644 index 0000000..e84c1d5 --- /dev/null +++ b/crawler/config/scraper_config.py @@ -0,0 +1,65 @@ +"""Scraper configuration with environment variable loading.""" +from __future__ import annotations + +import os +from dataclasses import dataclass +from typing import Self + + +@dataclass(frozen=True) +class ScraperConfig: + """Configuration for the Rightmove scraper. + + Attributes: + max_concurrent_requests: Maximum number of concurrent HTTP requests. + request_delay_ms: Delay between requests in milliseconds. + result_cap: Maximum results Rightmove returns per query (their limit). + split_threshold: When results exceed this, split the query further. + min_price_band: Minimum width of a price band (won't split below this). + max_pages_per_query: Maximum pages to fetch per subquery (60 * 25 = 1500). + proxy_url: Optional SOCKS proxy URL (e.g., socks5://localhost:9050 for Tor). + """ + + max_concurrent_requests: int = 5 + request_delay_ms: int = 100 + result_cap: int = 1500 + split_threshold: int = 1200 # Split when approaching cap + min_price_band: int = 100 # Minimum band width in currency units + max_pages_per_query: int = 60 # 60 * 25 = 1500 results max + proxy_url: str | None = None + + @classmethod + def from_env(cls) -> Self: + """Load configuration from environment variables. + + Environment variables: + RIGHTMOVE_MAX_CONCURRENT: Max concurrent requests (default: 5) + RIGHTMOVE_REQUEST_DELAY_MS: Request delay in ms (default: 100) + RIGHTMOVE_RESULT_CAP: Result cap per query (default: 1500) + RIGHTMOVE_SPLIT_THRESHOLD: Split threshold (default: 1200) + RIGHTMOVE_MIN_PRICE_BAND: Minimum price band width (default: 100) + RIGHTMOVE_MAX_PAGES: Max pages per query (default: 60) + RIGHTMOVE_PROXY_URL: SOCKS proxy URL (default: None) + + Returns: + ScraperConfig instance with values from environment or defaults. + """ + return cls( + max_concurrent_requests=int( + os.environ.get("RIGHTMOVE_MAX_CONCURRENT", "5") + ), + request_delay_ms=int( + os.environ.get("RIGHTMOVE_REQUEST_DELAY_MS", "100") + ), + result_cap=int(os.environ.get("RIGHTMOVE_RESULT_CAP", "1500")), + split_threshold=int( + os.environ.get("RIGHTMOVE_SPLIT_THRESHOLD", "1200") + ), + min_price_band=int( + os.environ.get("RIGHTMOVE_MIN_PRICE_BAND", "100") + ), + max_pages_per_query=int( + os.environ.get("RIGHTMOVE_MAX_PAGES", "60") + ), + proxy_url=os.environ.get("RIGHTMOVE_PROXY_URL") or None, + ) diff --git a/crawler/poetry.lock b/crawler/poetry.lock index 87eabca..c822ef9 100644 --- a/crawler/poetry.lock +++ b/crawler/poetry.lock @@ -6,7 +6,7 @@ version = "2.6.1" description = "Happy Eyeballs for asyncio" optional = false python-versions = ">=3.9" -groups = ["main"] +groups = ["main", "dev"] files = [ {file = "aiohappyeyeballs-2.6.1-py3-none-any.whl", hash = "sha256:f349ba8f4b75cb25c99c5c2d84e997e485204d2902a9597802b0371f09331fb8"}, {file = "aiohappyeyeballs-2.6.1.tar.gz", hash = "sha256:c3f9d0113123803ccadfdf3f0faa505bc78e6a72d1cc4806cbd719826e943558"}, @@ -18,7 +18,7 @@ version = "3.12.13" description = "Async http client/server framework (asyncio)" optional = false python-versions = ">=3.9" -groups = ["main"] +groups = ["main", "dev"] files = [ {file = "aiohttp-3.12.13-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:5421af8f22a98f640261ee48aae3a37f0c41371e99412d55eaf2f8a46d5dad29"}, {file = "aiohttp-3.12.13-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0fcda86f6cb318ba36ed8f1396a6a4a3fd8f856f84d426584392083d10da4de0"}, @@ -120,13 +120,29 @@ yarl = ">=1.17.0,<2.0" [package.extras] speedups = ["Brotli ; platform_python_implementation == \"CPython\"", "aiodns (>=3.3.0)", "brotlicffi ; platform_python_implementation != \"CPython\""] +[[package]] +name = "aioresponses" +version = "0.7.8" +description = "Mock out requests made by ClientSession from aiohttp package" +optional = false +python-versions = "*" +groups = ["dev"] +files = [ + {file = "aioresponses-0.7.8-py2.py3-none-any.whl", hash = "sha256:b73bd4400d978855e55004b23a3a84cb0f018183bcf066a85ad392800b5b9a94"}, + {file = "aioresponses-0.7.8.tar.gz", hash = "sha256:b861cdfe5dc58f3b8afac7b0a6973d5d7b2cb608dd0f6253d16b8ee8eaf6df11"}, +] + +[package.dependencies] +aiohttp = ">=3.3.0,<4.0.0" +packaging = ">=22.0" + [[package]] name = "aiosignal" version = "1.3.2" description = "aiosignal: a list of registered asynchronous callbacks" optional = false python-versions = ">=3.9" -groups = ["main"] +groups = ["main", "dev"] files = [ {file = "aiosignal-1.3.2-py2.py3-none-any.whl", hash = "sha256:45cde58e409a301715980c2b01d0c28bdde3770d8290b5eb2173759d9acb31a5"}, {file = "aiosignal-1.3.2.tar.gz", hash = "sha256:a8c255c66fafb1e499c9351d0bf32ff2d8a0321595ebac3b93713656d2436f54"}, @@ -400,7 +416,7 @@ version = "5.0.1" description = "Timeout context manager for asyncio programs" optional = false python-versions = ">=3.8" -groups = ["main"] +groups = ["main", "dev"] markers = "python_version == \"3.11\" and python_full_version < \"3.11.3\"" files = [ {file = "async_timeout-5.0.1-py3-none-any.whl", hash = "sha256:39e3809566ff85354557ec2398b55e096c8364bacac9405a7a1fa429e77fe76c"}, @@ -932,13 +948,118 @@ mypy = ["bokeh", "contourpy[bokeh,docs]", "docutils-stubs", "mypy (==1.15.0)", " test = ["Pillow", "contourpy[test-no-images]", "matplotlib"] test-no-images = ["pytest", "pytest-cov", "pytest-rerunfailures", "pytest-xdist", "wurlitzer"] +[[package]] +name = "coverage" +version = "7.13.2" +description = "Code coverage measurement for Python" +optional = false +python-versions = ">=3.10" +groups = ["dev"] +files = [ + {file = "coverage-7.13.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f4af3b01763909f477ea17c962e2cca8f39b350a4e46e3a30838b2c12e31b81b"}, + {file = "coverage-7.13.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:36393bd2841fa0b59498f75466ee9bdec4f770d3254f031f23e8fd8e140ffdd2"}, + {file = "coverage-7.13.2-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:9cc7573518b7e2186bd229b1a0fe24a807273798832c27032c4510f47ffdb896"}, + {file = "coverage-7.13.2-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:ca9566769b69a5e216a4e176d54b9df88f29d750c5b78dbb899e379b4e14b30c"}, + {file = "coverage-7.13.2-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9c9bdea644e94fd66d75a6f7e9a97bb822371e1fe7eadae2cacd50fcbc28e4dc"}, + {file = "coverage-7.13.2-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:5bd447332ec4f45838c1ad42268ce21ca87c40deb86eabd59888859b66be22a5"}, + {file = "coverage-7.13.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:7c79ad5c28a16a1277e1187cf83ea8dafdcc689a784228a7d390f19776db7c31"}, + {file = "coverage-7.13.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:76e06ccacd1fb6ada5d076ed98a8c6f66e2e6acd3df02819e2ee29fd637b76ad"}, + {file = "coverage-7.13.2-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:49d49e9a5e9f4dc3d3dac95278a020afa6d6bdd41f63608a76fa05a719d5b66f"}, + {file = "coverage-7.13.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:ed2bce0e7bfa53f7b0b01c722da289ef6ad4c18ebd52b1f93704c21f116360c8"}, + {file = "coverage-7.13.2-cp310-cp310-win32.whl", hash = "sha256:1574983178b35b9af4db4a9f7328a18a14a0a0ce76ffaa1c1bacb4cc82089a7c"}, + {file = "coverage-7.13.2-cp310-cp310-win_amd64.whl", hash = "sha256:a360a8baeb038928ceb996f5623a4cd508728f8f13e08d4e96ce161702f3dd99"}, + {file = "coverage-7.13.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:060ebf6f2c51aff5ba38e1f43a2095e087389b1c69d559fde6049a4b0001320e"}, + {file = "coverage-7.13.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c1ea8ca9db5e7469cd364552985e15911548ea5b69c48a17291f0cac70484b2e"}, + {file = "coverage-7.13.2-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:b780090d15fd58f07cf2011943e25a5f0c1c894384b13a216b6c86c8a8a7c508"}, + {file = "coverage-7.13.2-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:88a800258d83acb803c38175b4495d293656d5fac48659c953c18e5f539a274b"}, + {file = "coverage-7.13.2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6326e18e9a553e674d948536a04a80d850a5eeefe2aae2e6d7cf05d54046c01b"}, + {file = "coverage-7.13.2-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:59562de3f797979e1ff07c587e2ac36ba60ca59d16c211eceaa579c266c5022f"}, + {file = "coverage-7.13.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:27ba1ed6f66b0e2d61bfa78874dffd4f8c3a12f8e2b5410e515ab345ba7bc9c3"}, + {file = "coverage-7.13.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:8be48da4d47cc68754ce643ea50b3234557cbefe47c2f120495e7bd0a2756f2b"}, + {file = "coverage-7.13.2-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:2a47a4223d3361b91176aedd9d4e05844ca67d7188456227b6bf5e436630c9a1"}, + {file = "coverage-7.13.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:c6f141b468740197d6bd38f2b26ade124363228cc3f9858bd9924ab059e00059"}, + {file = "coverage-7.13.2-cp311-cp311-win32.whl", hash = "sha256:89567798404af067604246e01a49ef907d112edf2b75ef814b1364d5ce267031"}, + {file = "coverage-7.13.2-cp311-cp311-win_amd64.whl", hash = "sha256:21dd57941804ae2ac7e921771a5e21bbf9aabec317a041d164853ad0a96ce31e"}, + {file = "coverage-7.13.2-cp311-cp311-win_arm64.whl", hash = "sha256:10758e0586c134a0bafa28f2d37dd2cdb5e4a90de25c0fc0c77dabbad46eca28"}, + {file = "coverage-7.13.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f106b2af193f965d0d3234f3f83fc35278c7fb935dfbde56ae2da3dd2c03b84d"}, + {file = "coverage-7.13.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:78f45d21dc4d5d6bd29323f0320089ef7eae16e4bef712dff79d184fa7330af3"}, + {file = "coverage-7.13.2-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:fae91dfecd816444c74531a9c3d6ded17a504767e97aa674d44f638107265b99"}, + {file = "coverage-7.13.2-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:264657171406c114787b441484de620e03d8f7202f113d62fcd3d9688baa3e6f"}, + {file = "coverage-7.13.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ae47d8dcd3ded0155afbb59c62bd8ab07ea0fd4902e1c40567439e6db9dcaf2f"}, + {file = "coverage-7.13.2-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:8a0b33e9fd838220b007ce8f299114d406c1e8edb21336af4c97a26ecfd185aa"}, + {file = "coverage-7.13.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b3becbea7f3ce9a2d4d430f223ec15888e4deb31395840a79e916368d6004cce"}, + {file = "coverage-7.13.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:f819c727a6e6eeb8711e4ce63d78c620f69630a2e9d53bc95ca5379f57b6ba94"}, + {file = "coverage-7.13.2-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:4f7b71757a3ab19f7ba286e04c181004c1d61be921795ee8ba6970fd0ec91da5"}, + {file = "coverage-7.13.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b7fc50d2afd2e6b4f6f2f403b70103d280a8e0cb35320cbbe6debcda02a1030b"}, + {file = "coverage-7.13.2-cp312-cp312-win32.whl", hash = "sha256:292250282cf9bcf206b543d7608bda17ca6fc151f4cbae949fc7e115112fbd41"}, + {file = "coverage-7.13.2-cp312-cp312-win_amd64.whl", hash = "sha256:eeea10169fac01549a7921d27a3e517194ae254b542102267bef7a93ed38c40e"}, + {file = "coverage-7.13.2-cp312-cp312-win_arm64.whl", hash = "sha256:2a5b567f0b635b592c917f96b9a9cb3dbd4c320d03f4bf94e9084e494f2e8894"}, + {file = "coverage-7.13.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ed75de7d1217cf3b99365d110975f83af0528c849ef5180a12fd91b5064df9d6"}, + {file = "coverage-7.13.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:97e596de8fa9bada4d88fde64a3f4d37f1b6131e4faa32bad7808abc79887ddc"}, + {file = "coverage-7.13.2-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:68c86173562ed4413345410c9480a8d64864ac5e54a5cda236748031e094229f"}, + {file = "coverage-7.13.2-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:7be4d613638d678b2b3773b8f687537b284d7074695a43fe2fbbfc0e31ceaed1"}, + {file = "coverage-7.13.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d7f63ce526a96acd0e16c4af8b50b64334239550402fb1607ce6a584a6d62ce9"}, + {file = "coverage-7.13.2-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:406821f37f864f968e29ac14c3fccae0fec9fdeba48327f0341decf4daf92d7c"}, + {file = "coverage-7.13.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ee68e5a4e3e5443623406b905db447dceddffee0dceb39f4e0cd9ec2a35004b5"}, + {file = "coverage-7.13.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:2ee0e58cca0c17dd9c6c1cdde02bb705c7b3fbfa5f3b0b5afeda20d4ebff8ef4"}, + {file = "coverage-7.13.2-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:6e5bbb5018bf76a56aabdb64246b5288d5ae1b7d0dd4d0534fe86df2c2992d1c"}, + {file = "coverage-7.13.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a55516c68ef3e08e134e818d5e308ffa6b1337cc8b092b69b24287bf07d38e31"}, + {file = "coverage-7.13.2-cp313-cp313-win32.whl", hash = "sha256:5b20211c47a8abf4abc3319d8ce2464864fa9f30c5fcaf958a3eed92f4f1fef8"}, + {file = "coverage-7.13.2-cp313-cp313-win_amd64.whl", hash = "sha256:14f500232e521201cf031549fb1ebdfc0a40f401cf519157f76c397e586c3beb"}, + {file = "coverage-7.13.2-cp313-cp313-win_arm64.whl", hash = "sha256:9779310cb5a9778a60c899f075a8514c89fa6d10131445c2207fc893e0b14557"}, + {file = "coverage-7.13.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:e64fa5a1e41ce5df6b547cbc3d3699381c9e2c2c369c67837e716ed0f549d48e"}, + {file = "coverage-7.13.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b01899e82a04085b6561eb233fd688474f57455e8ad35cd82286463ba06332b7"}, + {file = "coverage-7.13.2-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:838943bea48be0e2768b0cf7819544cdedc1bbb2f28427eabb6eb8c9eb2285d3"}, + {file = "coverage-7.13.2-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:93d1d25ec2b27e90bcfef7012992d1f5121b51161b8bffcda756a816cf13c2c3"}, + {file = "coverage-7.13.2-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:93b57142f9621b0d12349c43fc7741fe578e4bc914c1e5a54142856cfc0bf421"}, + {file = "coverage-7.13.2-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f06799ae1bdfff7ccb8665d75f8291c69110ba9585253de254688aa8a1ccc6c5"}, + {file = "coverage-7.13.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:7f9405ab4f81d490811b1d91c7a20361135a2df4c170e7f0b747a794da5b7f23"}, + {file = "coverage-7.13.2-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:f9ab1d5b86f8fbc97a5b3cd6280a3fd85fef3b028689d8a2c00918f0d82c728c"}, + {file = "coverage-7.13.2-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:f674f59712d67e841525b99e5e2b595250e39b529c3bda14764e4f625a3fa01f"}, + {file = "coverage-7.13.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:c6cadac7b8ace1ba9144feb1ae3cb787a6065ba6d23ffc59a934b16406c26573"}, + {file = "coverage-7.13.2-cp313-cp313t-win32.whl", hash = "sha256:14ae4146465f8e6e6253eba0cccd57423e598a4cb925958b240c805300918343"}, + {file = "coverage-7.13.2-cp313-cp313t-win_amd64.whl", hash = "sha256:9074896edd705a05769e3de0eac0a8388484b503b68863dd06d5e473f874fd47"}, + {file = "coverage-7.13.2-cp313-cp313t-win_arm64.whl", hash = "sha256:69e526e14f3f854eda573d3cf40cffd29a1a91c684743d904c33dbdcd0e0f3e7"}, + {file = "coverage-7.13.2-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:387a825f43d680e7310e6f325b2167dd093bc8ffd933b83e9aa0983cf6e0a2ef"}, + {file = "coverage-7.13.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:f0d7fea9d8e5d778cd5a9e8fc38308ad688f02040e883cdc13311ef2748cb40f"}, + {file = "coverage-7.13.2-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:e080afb413be106c95c4ee96b4fffdc9e2fa56a8bbf90b5c0918e5c4449412f5"}, + {file = "coverage-7.13.2-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:a7fc042ba3c7ce25b8a9f097eb0f32a5ce1ccdb639d9eec114e26def98e1f8a4"}, + {file = "coverage-7.13.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d0ba505e021557f7f8173ee8cd6b926373d8653e5ff7581ae2efce1b11ef4c27"}, + {file = "coverage-7.13.2-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:7de326f80e3451bd5cc7239ab46c73ddb658fe0b7649476bc7413572d36cd548"}, + {file = "coverage-7.13.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:abaea04f1e7e34841d4a7b343904a3f59481f62f9df39e2cd399d69a187a9660"}, + {file = "coverage-7.13.2-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:9f93959ee0c604bccd8e0697be21de0887b1f73efcc3aa73a3ec0fd13feace92"}, + {file = "coverage-7.13.2-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:13fe81ead04e34e105bf1b3c9f9cdf32ce31736ee5d90a8d2de02b9d3e1bcb82"}, + {file = "coverage-7.13.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d6d16b0f71120e365741bca2cb473ca6fe38930bc5431c5e850ba949f708f892"}, + {file = "coverage-7.13.2-cp314-cp314-win32.whl", hash = "sha256:9b2f4714bb7d99ba3790ee095b3b4ac94767e1347fe424278a0b10acb3ff04fe"}, + {file = "coverage-7.13.2-cp314-cp314-win_amd64.whl", hash = "sha256:e4121a90823a063d717a96e0a0529c727fb31ea889369a0ee3ec00ed99bf6859"}, + {file = "coverage-7.13.2-cp314-cp314-win_arm64.whl", hash = "sha256:6873f0271b4a15a33e7590f338d823f6f66f91ed147a03938d7ce26efd04eee6"}, + {file = "coverage-7.13.2-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:f61d349f5b7cd95c34017f1927ee379bfbe9884300d74e07cf630ccf7a610c1b"}, + {file = "coverage-7.13.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:a43d34ce714f4ca674c0d90beb760eb05aad906f2c47580ccee9da8fe8bfb417"}, + {file = "coverage-7.13.2-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:bff1b04cb9d4900ce5c56c4942f047dc7efe57e2608cb7c3c8936e9970ccdbee"}, + {file = "coverage-7.13.2-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:6ae99e4560963ad8e163e819e5d77d413d331fd00566c1e0856aa252303552c1"}, + {file = "coverage-7.13.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e79a8c7d461820257d9aa43716c4efc55366d7b292e46b5b37165be1d377405d"}, + {file = "coverage-7.13.2-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:060ee84f6a769d40c492711911a76811b4befb6fba50abb450371abb720f5bd6"}, + {file = "coverage-7.13.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:3bca209d001fd03ea2d978f8a4985093240a355c93078aee3f799852c23f561a"}, + {file = "coverage-7.13.2-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:6b8092aa38d72f091db61ef83cb66076f18f02da3e1a75039a4f218629600e04"}, + {file = "coverage-7.13.2-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:4a3158dc2dcce5200d91ec28cd315c999eebff355437d2765840555d765a6e5f"}, + {file = "coverage-7.13.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:3973f353b2d70bd9796cc12f532a05945232ccae966456c8ed7034cb96bbfd6f"}, + {file = "coverage-7.13.2-cp314-cp314t-win32.whl", hash = "sha256:79f6506a678a59d4ded048dc72f1859ebede8ec2b9a2d509ebe161f01c2879d3"}, + {file = "coverage-7.13.2-cp314-cp314t-win_amd64.whl", hash = "sha256:196bfeabdccc5a020a57d5a368c681e3a6ceb0447d153aeccc1ab4d70a5032ba"}, + {file = "coverage-7.13.2-cp314-cp314t-win_arm64.whl", hash = "sha256:69269ab58783e090bfbf5b916ab3d188126e22d6070bbfc93098fdd474ef937c"}, + {file = "coverage-7.13.2-py3-none-any.whl", hash = "sha256:40ce1ea1e25125556d8e76bd0b61500839a07944cc287ac21d5626f3e620cad5"}, + {file = "coverage-7.13.2.tar.gz", hash = "sha256:044c6951ec37146b72a50cc81ef02217d27d4c3640efd2640311393cbbf143d3"}, +] + +[package.extras] +toml = ["tomli ; python_full_version <= \"3.11.0a6\""] + [[package]] name = "cryptography" version = "45.0.4" description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." optional = false python-versions = "!=3.9.0,!=3.9.1,>=3.7" -groups = ["main"] +groups = ["main", "dev"] files = [ {file = "cryptography-45.0.4-cp311-abi3-macosx_10_9_universal2.whl", hash = "sha256:425a9a6ac2823ee6e46a76a21a4e8342d8fa5c01e08b823c1f19a8b74f096069"}, {file = "cryptography-45.0.4-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:680806cf63baa0039b920f4976f5f31b10e772de42f16310a6839d9f21a26b0d"}, @@ -1132,6 +1253,30 @@ files = [ [package.extras] tests = ["asttokens (>=2.1.0)", "coverage", "coverage-enable-subprocess", "ipython", "littleutils", "pytest", "rich ; python_version >= \"3.11\""] +[[package]] +name = "fakeredis" +version = "2.33.0" +description = "Python implementation of redis API, can be used for testing purposes." +optional = false +python-versions = ">=3.7" +groups = ["dev"] +files = [ + {file = "fakeredis-2.33.0-py3-none-any.whl", hash = "sha256:de535f3f9ccde1c56672ab2fdd6a8efbc4f2619fc2f1acc87b8737177d71c965"}, + {file = "fakeredis-2.33.0.tar.gz", hash = "sha256:d7bc9a69d21df108a6451bbffee23b3eba432c21a654afc7ff2d295428ec5770"}, +] + +[package.dependencies] +redis = {version = ">=4.3", markers = "python_version > \"3.8\""} +sortedcontainers = ">=2" + +[package.extras] +bf = ["pyprobables (>=0.6)"] +cf = ["pyprobables (>=0.6)"] +json = ["jsonpath-ng (>=1.6)"] +lua = ["lupa (>=2.1)"] +probabilistic = ["pyprobables (>=0.6)"] +valkey = ["valkey (>=6) ; python_version >= \"3.8\""] + [[package]] name = "fastapi" version = "0.115.13" @@ -1309,7 +1454,7 @@ version = "1.7.0" description = "A list-like structure which implements collections.abc.MutableSequence" optional = false python-versions = ">=3.9" -groups = ["main"] +groups = ["main", "dev"] files = [ {file = "frozenlist-1.7.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:cc4df77d638aa2ed703b878dd093725b72a824c3c546c076e8fdf276f78ee84a"}, {file = "frozenlist-1.7.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:716a9973a2cc963160394f701964fe25012600f3d311f60c790400b00e568b61"}, @@ -1676,14 +1821,14 @@ test = ["Cython (>=0.29.24)"] [[package]] name = "httpx" -version = "0.28.1" +version = "0.27.2" description = "The next generation HTTP client." optional = false python-versions = ">=3.8" groups = ["main", "dev"] files = [ - {file = "httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad"}, - {file = "httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc"}, + {file = "httpx-0.27.2-py3-none-any.whl", hash = "sha256:7bb2708e112d8fdd7829cd4243970f0c223274051cb35ee80c03301ee29a3df0"}, + {file = "httpx-0.27.2.tar.gz", hash = "sha256:f7c2be1d2f3c3c3160d441802406b206c2b76f5947b11115e6df10c6c65e66c2"}, ] [package.dependencies] @@ -1691,6 +1836,7 @@ anyio = "*" certifi = "*" httpcore = "==1.*" idna = "*" +sniffio = "*" [package.extras] brotli = ["brotli ; platform_python_implementation == \"CPython\"", "brotlicffi ; platform_python_implementation != \"CPython\""] @@ -1777,6 +1923,18 @@ perf = ["ipython"] test = ["flufl.flake8", "importlib_resources (>=1.3) ; python_version < \"3.9\"", "jaraco.test (>=5.4)", "packaging", "pyfakefs", "pytest (>=6,!=8.1.*)", "pytest-perf (>=0.9.2)"] type = ["pytest-mypy"] +[[package]] +name = "iniconfig" +version = "2.3.0" +description = "brain-dead simple config-ini parsing" +optional = false +python-versions = ">=3.10" +groups = ["dev"] +files = [ + {file = "iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12"}, + {file = "iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730"}, +] + [[package]] name = "ipdb" version = "0.13.13" @@ -2360,6 +2518,93 @@ interegular = ["interegular (>=0.3.1,<0.4.0)"] nearley = ["js2py"] regex = ["regex"] +[[package]] +name = "librt" +version = "0.7.8" +description = "Mypyc runtime library" +optional = false +python-versions = ">=3.9" +groups = ["dev"] +markers = "platform_python_implementation != \"PyPy\"" +files = [ + {file = "librt-0.7.8-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b45306a1fc5f53c9330fbee134d8b3227fe5da2ab09813b892790400aa49352d"}, + {file = "librt-0.7.8-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:864c4b7083eeee250ed55135d2127b260d7eb4b5e953a9e5df09c852e327961b"}, + {file = "librt-0.7.8-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:6938cc2de153bc927ed8d71c7d2f2ae01b4e96359126c602721340eb7ce1a92d"}, + {file = "librt-0.7.8-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:66daa6ac5de4288a5bbfbe55b4caa7bf0cd26b3269c7a476ffe8ce45f837f87d"}, + {file = "librt-0.7.8-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4864045f49dc9c974dadb942ac56a74cd0479a2aafa51ce272c490a82322ea3c"}, + {file = "librt-0.7.8-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:a36515b1328dc5b3ffce79fe204985ca8572525452eacabee2166f44bb387b2c"}, + {file = "librt-0.7.8-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:b7e7f140c5169798f90b80d6e607ed2ba5059784968a004107c88ad61fb3641d"}, + {file = "librt-0.7.8-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:ff71447cb778a4f772ddc4ce360e6ba9c95527ed84a52096bd1bbf9fee2ec7c0"}, + {file = "librt-0.7.8-cp310-cp310-win32.whl", hash = "sha256:047164e5f68b7a8ebdf9fae91a3c2161d3192418aadd61ddd3a86a56cbe3dc85"}, + {file = "librt-0.7.8-cp310-cp310-win_amd64.whl", hash = "sha256:d6f254d096d84156a46a84861183c183d30734e52383602443292644d895047c"}, + {file = "librt-0.7.8-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ff3e9c11aa260c31493d4b3197d1e28dd07768594a4f92bec4506849d736248f"}, + {file = "librt-0.7.8-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ddb52499d0b3ed4aa88746aaf6f36a08314677d5c346234c3987ddc506404eac"}, + {file = "librt-0.7.8-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:e9c0afebbe6ce177ae8edba0c7c4d626f2a0fc12c33bb993d163817c41a7a05c"}, + {file = "librt-0.7.8-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:631599598e2c76ded400c0a8722dec09217c89ff64dc54b060f598ed68e7d2a8"}, + {file = "librt-0.7.8-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9c1ba843ae20db09b9d5c80475376168feb2640ce91cd9906414f23cc267a1ff"}, + {file = "librt-0.7.8-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:b5b007bb22ea4b255d3ee39dfd06d12534de2fcc3438567d9f48cdaf67ae1ae3"}, + {file = "librt-0.7.8-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:dbd79caaf77a3f590cbe32dc2447f718772d6eea59656a7dcb9311161b10fa75"}, + {file = "librt-0.7.8-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:87808a8d1e0bd62a01cafc41f0fd6818b5a5d0ca0d8a55326a81643cdda8f873"}, + {file = "librt-0.7.8-cp311-cp311-win32.whl", hash = "sha256:31724b93baa91512bd0a376e7cf0b59d8b631ee17923b1218a65456fa9bda2e7"}, + {file = "librt-0.7.8-cp311-cp311-win_amd64.whl", hash = "sha256:978e8b5f13e52cf23a9e80f3286d7546baa70bc4ef35b51d97a709d0b28e537c"}, + {file = "librt-0.7.8-cp311-cp311-win_arm64.whl", hash = "sha256:20e3946863d872f7cabf7f77c6c9d370b8b3d74333d3a32471c50d3a86c0a232"}, + {file = "librt-0.7.8-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:9b6943885b2d49c48d0cff23b16be830ba46b0152d98f62de49e735c6e655a63"}, + {file = "librt-0.7.8-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:46ef1f4b9b6cc364b11eea0ecc0897314447a66029ee1e55859acb3dd8757c93"}, + {file = "librt-0.7.8-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:907ad09cfab21e3c86e8f1f87858f7049d1097f77196959c033612f532b4e592"}, + {file = "librt-0.7.8-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2991b6c3775383752b3ca0204842743256f3ad3deeb1d0adc227d56b78a9a850"}, + {file = "librt-0.7.8-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:03679b9856932b8c8f674e87aa3c55ea11c9274301f76ae8dc4d281bda55cf62"}, + {file = "librt-0.7.8-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3968762fec1b2ad34ce57458b6de25dbb4142713e9ca6279a0d352fa4e9f452b"}, + {file = "librt-0.7.8-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:bb7a7807523a31f03061288cc4ffc065d684c39db7644c676b47d89553c0d714"}, + {file = "librt-0.7.8-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ad64a14b1e56e702e19b24aae108f18ad1bf7777f3af5fcd39f87d0c5a814449"}, + {file = "librt-0.7.8-cp312-cp312-win32.whl", hash = "sha256:0241a6ed65e6666236ea78203a73d800dbed896cf12ae25d026d75dc1fcd1dac"}, + {file = "librt-0.7.8-cp312-cp312-win_amd64.whl", hash = "sha256:6db5faf064b5bab9675c32a873436b31e01d66ca6984c6f7f92621656033a708"}, + {file = "librt-0.7.8-cp312-cp312-win_arm64.whl", hash = "sha256:57175aa93f804d2c08d2edb7213e09276bd49097611aefc37e3fa38d1fb99ad0"}, + {file = "librt-0.7.8-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:4c3995abbbb60b3c129490fa985dfe6cac11d88fc3c36eeb4fb1449efbbb04fc"}, + {file = "librt-0.7.8-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:44e0c2cbc9bebd074cf2cdbe472ca185e824be4e74b1c63a8e934cea674bebf2"}, + {file = "librt-0.7.8-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:4d2f1e492cae964b3463a03dc77a7fe8742f7855d7258c7643f0ee32b6651dd3"}, + {file = "librt-0.7.8-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:451e7ffcef8f785831fdb791bd69211f47e95dc4c6ddff68e589058806f044c6"}, + {file = "librt-0.7.8-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3469e1af9f1380e093ae06bedcbdd11e407ac0b303a56bbe9afb1d6824d4982d"}, + {file = "librt-0.7.8-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f11b300027ce19a34f6d24ebb0a25fd0e24a9d53353225a5c1e6cadbf2916b2e"}, + {file = "librt-0.7.8-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:4adc73614f0d3c97874f02f2c7fd2a27854e7e24ad532ea6b965459c5b757eca"}, + {file = "librt-0.7.8-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:60c299e555f87e4c01b2eca085dfccda1dde87f5a604bb45c2906b8305819a93"}, + {file = "librt-0.7.8-cp313-cp313-win32.whl", hash = "sha256:b09c52ed43a461994716082ee7d87618096851319bf695d57ec123f2ab708951"}, + {file = "librt-0.7.8-cp313-cp313-win_amd64.whl", hash = "sha256:f8f4a901a3fa28969d6e4519deceab56c55a09d691ea7b12ca830e2fa3461e34"}, + {file = "librt-0.7.8-cp313-cp313-win_arm64.whl", hash = "sha256:43d4e71b50763fcdcf64725ac680d8cfa1706c928b844794a7aa0fa9ac8e5f09"}, + {file = "librt-0.7.8-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:be927c3c94c74b05128089a955fba86501c3b544d1d300282cc1b4bd370cb418"}, + {file = "librt-0.7.8-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:7b0803e9008c62a7ef79058233db7ff6f37a9933b8f2573c05b07ddafa226611"}, + {file = "librt-0.7.8-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:79feb4d00b2a4e0e05c9c56df707934f41fcb5fe53fd9efb7549068d0495b758"}, + {file = "librt-0.7.8-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b9122094e3f24aa759c38f46bd8863433820654927370250f460ae75488b66ea"}, + {file = "librt-0.7.8-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7e03bea66af33c95ce3addf87a9bf1fcad8d33e757bc479957ddbc0e4f7207ac"}, + {file = "librt-0.7.8-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:f1ade7f31675db00b514b98f9ab9a7698c7282dad4be7492589109471852d398"}, + {file = "librt-0.7.8-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:a14229ac62adcf1b90a15992f1ab9c69ae8b99ffb23cb64a90878a6e8a2f5b81"}, + {file = "librt-0.7.8-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:5bcaaf624fd24e6a0cb14beac37677f90793a96864c67c064a91458611446e83"}, + {file = "librt-0.7.8-cp314-cp314-win32.whl", hash = "sha256:7aa7d5457b6c542ecaed79cec4ad98534373c9757383973e638ccced0f11f46d"}, + {file = "librt-0.7.8-cp314-cp314-win_amd64.whl", hash = "sha256:3d1322800771bee4a91f3b4bd4e49abc7d35e65166821086e5afd1e6c0d9be44"}, + {file = "librt-0.7.8-cp314-cp314-win_arm64.whl", hash = "sha256:5363427bc6a8c3b1719f8f3845ea53553d301382928a86e8fab7984426949bce"}, + {file = "librt-0.7.8-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:ca916919793a77e4a98d4a1701e345d337ce53be4a16620f063191f7322ac80f"}, + {file = "librt-0.7.8-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:54feb7b4f2f6706bb82325e836a01be805770443e2400f706e824e91f6441dde"}, + {file = "librt-0.7.8-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:39a4c76fee41007070f872b648cc2f711f9abf9a13d0c7162478043377b52c8e"}, + {file = "librt-0.7.8-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ac9c8a458245c7de80bc1b9765b177055efff5803f08e548dd4bb9ab9a8d789b"}, + {file = "librt-0.7.8-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:95b67aa7eff150f075fda09d11f6bfb26edffd300f6ab1666759547581e8f666"}, + {file = "librt-0.7.8-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:535929b6eff670c593c34ff435d5440c3096f20fa72d63444608a5aef64dd581"}, + {file = "librt-0.7.8-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:63937bd0f4d1cb56653dc7ae900d6c52c41f0015e25aaf9902481ee79943b33a"}, + {file = "librt-0.7.8-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:cf243da9e42d914036fd362ac3fa77d80a41cadcd11ad789b1b5eec4daaf67ca"}, + {file = "librt-0.7.8-cp314-cp314t-win32.whl", hash = "sha256:171ca3a0a06c643bd0a2f62a8944e1902c94aa8e5da4db1ea9a8daf872685365"}, + {file = "librt-0.7.8-cp314-cp314t-win_amd64.whl", hash = "sha256:445b7304145e24c60288a2f172b5ce2ca35c0f81605f5299f3fa567e189d2e32"}, + {file = "librt-0.7.8-cp314-cp314t-win_arm64.whl", hash = "sha256:8766ece9de08527deabcd7cb1b4f1a967a385d26e33e536d6d8913db6ef74f06"}, + {file = "librt-0.7.8-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c7e8f88f79308d86d8f39c491773cbb533d6cb7fa6476f35d711076ee04fceb6"}, + {file = "librt-0.7.8-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:389bd25a0db916e1d6bcb014f11aa9676cedaa485e9ec3752dfe19f196fd377b"}, + {file = "librt-0.7.8-cp39-cp39-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:73fd300f501a052f2ba52ede721232212f3b06503fa12665408ecfc9d8fd149c"}, + {file = "librt-0.7.8-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6d772edc6a5f7835635c7562f6688e031f0b97e31d538412a852c49c9a6c92d5"}, + {file = "librt-0.7.8-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bfde8a130bd0f239e45503ab39fab239ace094d63ee1d6b67c25a63d741c0f71"}, + {file = "librt-0.7.8-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:fdec6e2368ae4f796fc72fad7fd4bd1753715187e6d870932b0904609e7c878e"}, + {file = "librt-0.7.8-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:00105e7d541a8f2ee5be52caacea98a005e0478cfe78c8080fbb7b5d2b340c63"}, + {file = "librt-0.7.8-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:c6f8947d3dfd7f91066c5b4385812c18be26c9d5a99ca56667547f2c39149d94"}, + {file = "librt-0.7.8-cp39-cp39-win32.whl", hash = "sha256:41d7bb1e07916aeb12ae4a44e3025db3691c4149ab788d0315781b4d29b86afb"}, + {file = "librt-0.7.8-cp39-cp39-win_amd64.whl", hash = "sha256:e90a8e237753c83b8e484d478d9a996dc5e39fd5bd4c6ce32563bc8123f132be"}, + {file = "librt-0.7.8.tar.gz", hash = "sha256:1a4ede613941d9c3470b0368be851df6bb78ab218635512d0370b27a277a0862"}, +] + [[package]] name = "mako" version = "1.3.10" @@ -2595,7 +2840,7 @@ version = "6.5.0" description = "multidict implementation" optional = false python-versions = ">=3.9" -groups = ["main"] +groups = ["main", "dev"] files = [ {file = "multidict-6.5.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:2e118a202904623b1d2606d1c8614e14c9444b59d64454b0c355044058066469"}, {file = "multidict-6.5.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a42995bdcaff4e22cb1280ae7752c3ed3fbb398090c6991a2797a4a0e5ed16a9"}, @@ -2709,6 +2954,79 @@ files = [ {file = "multidict-6.5.0.tar.gz", hash = "sha256:942bd8002492ba819426a8d7aefde3189c1b87099cdf18aaaefefcf7f3f7b6d2"}, ] +[[package]] +name = "mypy" +version = "1.19.1" +description = "Optional static typing for Python" +optional = false +python-versions = ">=3.9" +groups = ["dev"] +files = [ + {file = "mypy-1.19.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5f05aa3d375b385734388e844bc01733bd33c644ab48e9684faa54e5389775ec"}, + {file = "mypy-1.19.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:022ea7279374af1a5d78dfcab853fe6a536eebfda4b59deab53cd21f6cd9f00b"}, + {file = "mypy-1.19.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ee4c11e460685c3e0c64a4c5de82ae143622410950d6be863303a1c4ba0e36d6"}, + {file = "mypy-1.19.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:de759aafbae8763283b2ee5869c7255391fbc4de3ff171f8f030b5ec48381b74"}, + {file = "mypy-1.19.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:ab43590f9cd5108f41aacf9fca31841142c786827a74ab7cc8a2eacb634e09a1"}, + {file = "mypy-1.19.1-cp310-cp310-win_amd64.whl", hash = "sha256:2899753e2f61e571b3971747e302d5f420c3fd09650e1951e99f823bc3089dac"}, + {file = "mypy-1.19.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d8dfc6ab58ca7dda47d9237349157500468e404b17213d44fc1cb77bce532288"}, + {file = "mypy-1.19.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e3f276d8493c3c97930e354b2595a44a21348b320d859fb4a2b9f66da9ed27ab"}, + {file = "mypy-1.19.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2abb24cf3f17864770d18d673c85235ba52456b36a06b6afc1e07c1fdcd3d0e6"}, + {file = "mypy-1.19.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a009ffa5a621762d0c926a078c2d639104becab69e79538a494bcccb62cc0331"}, + {file = "mypy-1.19.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f7cee03c9a2e2ee26ec07479f38ea9c884e301d42c6d43a19d20fb014e3ba925"}, + {file = "mypy-1.19.1-cp311-cp311-win_amd64.whl", hash = "sha256:4b84a7a18f41e167f7995200a1d07a4a6810e89d29859df936f1c3923d263042"}, + {file = "mypy-1.19.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:a8174a03289288c1f6c46d55cef02379b478bfbc8e358e02047487cad44c6ca1"}, + {file = "mypy-1.19.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ffcebe56eb09ff0c0885e750036a095e23793ba6c2e894e7e63f6d89ad51f22e"}, + {file = "mypy-1.19.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b64d987153888790bcdb03a6473d321820597ab8dd9243b27a92153c4fa50fd2"}, + {file = "mypy-1.19.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c35d298c2c4bba75feb2195655dfea8124d855dfd7343bf8b8c055421eaf0cf8"}, + {file = "mypy-1.19.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:34c81968774648ab5ac09c29a375fdede03ba253f8f8287847bd480782f73a6a"}, + {file = "mypy-1.19.1-cp312-cp312-win_amd64.whl", hash = "sha256:b10e7c2cd7870ba4ad9b2d8a6102eb5ffc1f16ca35e3de6bfa390c1113029d13"}, + {file = "mypy-1.19.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e3157c7594ff2ef1634ee058aafc56a82db665c9438fd41b390f3bde1ab12250"}, + {file = "mypy-1.19.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:bdb12f69bcc02700c2b47e070238f42cb87f18c0bc1fc4cdb4fb2bc5fd7a3b8b"}, + {file = "mypy-1.19.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f859fb09d9583a985be9a493d5cfc5515b56b08f7447759a0c5deaf68d80506e"}, + {file = "mypy-1.19.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c9a6538e0415310aad77cb94004ca6482330fece18036b5f360b62c45814c4ef"}, + {file = "mypy-1.19.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:da4869fc5e7f62a88f3fe0b5c919d1d9f7ea3cef92d3689de2823fd27e40aa75"}, + {file = "mypy-1.19.1-cp313-cp313-win_amd64.whl", hash = "sha256:016f2246209095e8eda7538944daa1d60e1e8134d98983b9fc1e92c1fc0cb8dd"}, + {file = "mypy-1.19.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:06e6170bd5836770e8104c8fdd58e5e725cfeb309f0a6c681a811f557e97eac1"}, + {file = "mypy-1.19.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:804bd67b8054a85447c8954215a906d6eff9cabeabe493fb6334b24f4bfff718"}, + {file = "mypy-1.19.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:21761006a7f497cb0d4de3d8ef4ca70532256688b0523eee02baf9eec895e27b"}, + {file = "mypy-1.19.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:28902ee51f12e0f19e1e16fbe2f8f06b6637f482c459dd393efddd0ec7f82045"}, + {file = "mypy-1.19.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:481daf36a4c443332e2ae9c137dfee878fcea781a2e3f895d54bd3002a900957"}, + {file = "mypy-1.19.1-cp314-cp314-win_amd64.whl", hash = "sha256:8bb5c6f6d043655e055be9b542aa5f3bdd30e4f3589163e85f93f3640060509f"}, + {file = "mypy-1.19.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:7bcfc336a03a1aaa26dfce9fff3e287a3ba99872a157561cbfcebe67c13308e3"}, + {file = "mypy-1.19.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b7951a701c07ea584c4fe327834b92a30825514c868b1f69c30445093fdd9d5a"}, + {file = "mypy-1.19.1-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b13cfdd6c87fc3efb69ea4ec18ef79c74c3f98b4e5498ca9b85ab3b2c2329a67"}, + {file = "mypy-1.19.1-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4f28f99c824ecebcdaa2e55d82953e38ff60ee5ec938476796636b86afa3956e"}, + {file = "mypy-1.19.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:c608937067d2fc5a4dd1a5ce92fd9e1398691b8c5d012d66e1ddd430e9244376"}, + {file = "mypy-1.19.1-cp39-cp39-win_amd64.whl", hash = "sha256:409088884802d511ee52ca067707b90c883426bd95514e8cfda8281dc2effe24"}, + {file = "mypy-1.19.1-py3-none-any.whl", hash = "sha256:f1235f5ea01b7db5468d53ece6aaddf1ad0b88d9e7462b86ef96fe04995d7247"}, + {file = "mypy-1.19.1.tar.gz", hash = "sha256:19d88bb05303fe63f71dd2c6270daca27cb9401c4ca8255fe50d1d920e0eb9ba"}, +] + +[package.dependencies] +librt = {version = ">=0.6.2", markers = "platform_python_implementation != \"PyPy\""} +mypy_extensions = ">=1.0.0" +pathspec = ">=0.9.0" +typing_extensions = ">=4.6.0" + +[package.extras] +dmypy = ["psutil (>=4.0)"] +faster-cache = ["orjson"] +install-types = ["pip"] +mypyc = ["setuptools (>=50)"] +reports = ["lxml"] + +[[package]] +name = "mypy-extensions" +version = "1.1.0" +description = "Type system extensions for programs checked with the mypy type checker." +optional = false +python-versions = ">=3.8" +groups = ["dev"] +files = [ + {file = "mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505"}, + {file = "mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558"}, +] + [[package]] name = "mysqlclient" version = "2.2.7" @@ -3226,6 +3544,24 @@ files = [ qa = ["flake8 (==5.0.4)", "mypy (==0.971)", "types-setuptools (==67.2.0.1)"] testing = ["docopt", "pytest"] +[[package]] +name = "pathspec" +version = "1.0.4" +description = "Utility library for gitignore style pattern matching of file paths." +optional = false +python-versions = ">=3.9" +groups = ["dev"] +files = [ + {file = "pathspec-1.0.4-py3-none-any.whl", hash = "sha256:fb6ae2fd4e7c921a165808a552060e722767cfa526f99ca5156ed2ce45a5c723"}, + {file = "pathspec-1.0.4.tar.gz", hash = "sha256:0210e2ae8a21a9137c0d470578cb0e595af87edaa6ebf12ff176f14a02e0e645"}, +] + +[package.extras] +hyperscan = ["hyperscan (>=0.7)"] +optional = ["typing-extensions (>=4)"] +re2 = ["google-re2 (>=1.1)"] +tests = ["pytest (>=9)", "typing-extensions (>=4.15)"] + [[package]] name = "pexpect" version = "4.9.0" @@ -3357,6 +3693,22 @@ docs = ["furo (>=2024.8.6)", "proselint (>=0.14)", "sphinx (>=8.1.3)", "sphinx-a test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=8.3.4)", "pytest-cov (>=6)", "pytest-mock (>=3.14)"] type = ["mypy (>=1.14.1)"] +[[package]] +name = "pluggy" +version = "1.6.0" +description = "plugin and hook calling mechanisms for python" +optional = false +python-versions = ">=3.9" +groups = ["dev"] +files = [ + {file = "pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746"}, + {file = "pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3"}, +] + +[package.extras] +dev = ["pre-commit", "tox"] +testing = ["coverage", "pytest", "pytest-benchmark"] + [[package]] name = "podman-compose" version = "1.5.0" @@ -3412,7 +3764,7 @@ version = "0.3.2" description = "Accelerated property cache" optional = false python-versions = ">=3.9" -groups = ["main"] +groups = ["main", "dev"] files = [ {file = "propcache-0.3.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:22d9962a358aedbb7a2e36187ff273adeaab9743373a272976d2e348d08c7770"}, {file = "propcache-0.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0d0fda578d1dc3f77b6b5a5dce3b9ad69a8250a891760a548df850a5e8da87f3"}, @@ -3777,6 +4129,66 @@ files = [ packaging = ">=21.3" Pillow = ">=8.0.0" +[[package]] +name = "pytest" +version = "8.4.2" +description = "pytest: simple powerful testing with Python" +optional = false +python-versions = ">=3.9" +groups = ["dev"] +files = [ + {file = "pytest-8.4.2-py3-none-any.whl", hash = "sha256:872f880de3fc3a5bdc88a11b39c9710c3497a547cfa9320bc3c5e62fbf272e79"}, + {file = "pytest-8.4.2.tar.gz", hash = "sha256:86c0d0b93306b961d58d62a4db4879f27fe25513d4b969df351abdddb3c30e01"}, +] + +[package.dependencies] +colorama = {version = ">=0.4", markers = "sys_platform == \"win32\""} +iniconfig = ">=1" +packaging = ">=20" +pluggy = ">=1.5,<2" +pygments = ">=2.7.2" + +[package.extras] +dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "requests", "setuptools", "xmlschema"] + +[[package]] +name = "pytest-asyncio" +version = "0.23.8" +description = "Pytest support for asyncio" +optional = false +python-versions = ">=3.8" +groups = ["dev"] +files = [ + {file = "pytest_asyncio-0.23.8-py3-none-any.whl", hash = "sha256:50265d892689a5faefb84df80819d1ecef566eb3549cf915dfb33569359d1ce2"}, + {file = "pytest_asyncio-0.23.8.tar.gz", hash = "sha256:759b10b33a6dc61cce40a8bd5205e302978bbbcc00e279a8b61d9a6a3c82e4d3"}, +] + +[package.dependencies] +pytest = ">=7.0.0,<9" + +[package.extras] +docs = ["sphinx (>=5.3)", "sphinx-rtd-theme (>=1.0)"] +testing = ["coverage (>=6.2)", "hypothesis (>=5.7.1)"] + +[[package]] +name = "pytest-cov" +version = "4.1.0" +description = "Pytest plugin for measuring coverage." +optional = false +python-versions = ">=3.7" +groups = ["dev"] +files = [ + {file = "pytest-cov-4.1.0.tar.gz", hash = "sha256:3904b13dfbfec47f003b8e77fd5b589cd11904a21ddf1ab38a64f204d6a10ef6"}, + {file = "pytest_cov-4.1.0-py3-none-any.whl", hash = "sha256:6ba70b9e97e69fcc3fb45bfeab2d0a138fb65c4d0d6a41ef33983ad114be8c3a"}, +] + +[package.dependencies] +coverage = {version = ">=5.2.1", extras = ["toml"]} +pytest = ">=4.6" + +[package.extras] +testing = ["fields", "hunter", "process-tests", "pytest-xdist", "six", "virtualenv"] + [[package]] name = "python-dateutil" version = "2.9.0.post0" @@ -4069,7 +4481,7 @@ version = "6.2.0" description = "Python client for Redis database and key-value store" optional = false python-versions = ">=3.9" -groups = ["main"] +groups = ["main", "dev"] files = [ {file = "redis-6.2.0-py3-none-any.whl", hash = "sha256:c8ddf316ee0aab65f04a11229e94a64b2618451dab7a67cb2f77eb799d872d5e"}, {file = "redis-6.2.0.tar.gz", hash = "sha256:e821f129b75dde6cb99dd35e5c76e8c49512a5a0d8dfdc560b2fbd44b85ca977"}, @@ -4603,6 +5015,18 @@ files = [ {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"}, ] +[[package]] +name = "sortedcontainers" +version = "2.4.0" +description = "Sorted Containers -- Sorted List, Sorted Dict, Sorted Set" +optional = false +python-versions = "*" +groups = ["dev"] +files = [ + {file = "sortedcontainers-2.4.0-py2.py3-none-any.whl", hash = "sha256:a163dcaede0f1c021485e957a39245190e74249897e2ae4b2aa38595db237ee0"}, + {file = "sortedcontainers-2.4.0.tar.gz", hash = "sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88"}, +] + [[package]] name = "soupsieve" version = "2.8" @@ -5005,6 +5429,37 @@ rich = ">=10.11.0" shellingham = ">=1.3.0" typing-extensions = ">=3.7.4.3" +[[package]] +name = "types-cffi" +version = "1.17.0.20250915" +description = "Typing stubs for cffi" +optional = false +python-versions = ">=3.9" +groups = ["dev"] +files = [ + {file = "types_cffi-1.17.0.20250915-py3-none-any.whl", hash = "sha256:cef4af1116c83359c11bb4269283c50f0688e9fc1d7f0eeb390f3661546da52c"}, + {file = "types_cffi-1.17.0.20250915.tar.gz", hash = "sha256:4362e20368f78dabd5c56bca8004752cc890e07a71605d9e0d9e069dbaac8c06"}, +] + +[package.dependencies] +types-setuptools = "*" + +[[package]] +name = "types-pyopenssl" +version = "24.1.0.20240722" +description = "Typing stubs for pyOpenSSL" +optional = false +python-versions = ">=3.8" +groups = ["dev"] +files = [ + {file = "types-pyOpenSSL-24.1.0.20240722.tar.gz", hash = "sha256:47913b4678a01d879f503a12044468221ed8576263c1540dcb0484ca21b08c39"}, + {file = "types_pyOpenSSL-24.1.0.20240722-py3-none-any.whl", hash = "sha256:6a7a5d2ec042537934cfb4c9d4deb0e16c4c6250b09358df1f083682fe6fda54"}, +] + +[package.dependencies] +cryptography = ">=35.0.0" +types-cffi = "*" + [[package]] name = "types-python-dateutil" version = "2.9.0.20250822" @@ -5017,6 +5472,49 @@ files = [ {file = "types_python_dateutil-2.9.0.20250822.tar.gz", hash = "sha256:84c92c34bd8e68b117bff742bc00b692a1e8531262d4507b33afcc9f7716cd53"}, ] +[[package]] +name = "types-redis" +version = "4.6.0.20241004" +description = "Typing stubs for redis" +optional = false +python-versions = ">=3.8" +groups = ["dev"] +files = [ + {file = "types-redis-4.6.0.20241004.tar.gz", hash = "sha256:5f17d2b3f9091ab75384153bfa276619ffa1cf6a38da60e10d5e6749cc5b902e"}, + {file = "types_redis-4.6.0.20241004-py3-none-any.whl", hash = "sha256:ef5da68cb827e5f606c8f9c0b49eeee4c2669d6d97122f301d3a55dc6a63f6ed"}, +] + +[package.dependencies] +cryptography = ">=35.0.0" +types-pyOpenSSL = "*" + +[[package]] +name = "types-requests" +version = "2.32.4.20260107" +description = "Typing stubs for requests" +optional = false +python-versions = ">=3.9" +groups = ["dev"] +files = [ + {file = "types_requests-2.32.4.20260107-py3-none-any.whl", hash = "sha256:b703fe72f8ce5b31ef031264fe9395cac8f46a04661a79f7ed31a80fb308730d"}, + {file = "types_requests-2.32.4.20260107.tar.gz", hash = "sha256:018a11ac158f801bfa84857ddec1650750e393df8a004a8a9ae2a9bec6fcb24f"}, +] + +[package.dependencies] +urllib3 = ">=2" + +[[package]] +name = "types-setuptools" +version = "80.10.0.20260124" +description = "Typing stubs for setuptools" +optional = false +python-versions = ">=3.9" +groups = ["dev"] +files = [ + {file = "types_setuptools-80.10.0.20260124-py3-none-any.whl", hash = "sha256:efed7e044f01adb9c2806c7a8e1b6aa3656b8e382379b53d5f26ee3db24d4c01"}, + {file = "types_setuptools-80.10.0.20260124.tar.gz", hash = "sha256:1b86d9f0368858663276a0cbe5fe5a9722caf94b5acde8aba0399a6e90680f20"}, +] + [[package]] name = "typing-extensions" version = "4.14.0" @@ -5569,7 +6067,7 @@ version = "1.20.1" description = "Yet another URL library" optional = false python-versions = ">=3.9" -groups = ["main"] +groups = ["main", "dev"] files = [ {file = "yarl-1.20.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:6032e6da6abd41e4acda34d75a816012717000fa6839f37124a47fcefc49bec4"}, {file = "yarl-1.20.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2c7b34d804b8cf9b214f05015c4fee2ebe7ed05cf581e7192c06555c71f4446a"}, @@ -5705,4 +6203,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.1" python-versions = ">3.11" -content-hash = "5f53cec7fc3cc93d494341e9fd6562076c1a8952f83075f671a3507c50fcb334" +content-hash = "10a74594d9f695ab1077ff992bcd012b93b174b25c3f2ca681d6308653abbd14" diff --git a/crawler/pyproject.toml b/crawler/pyproject.toml index 7e2d1a1..4028cf9 100644 --- a/crawler/pyproject.toml +++ b/crawler/pyproject.toml @@ -20,6 +20,7 @@ matplotlib = "^3.10.0" opencv-python = "^4.11.0.86" click = "^8.2.0" aiohttp = "^3.11.18" +aiohttp-socks = "^0.8.4" sqlmodel = "^0.0.24" alembic = "^1.16.1" sqlalchemy = {extras = ["asyncio"], version = "^2.0.41"} @@ -42,6 +43,15 @@ mysqlclient = "^2.2.7" ipdb = "^0.13.13" jupyterlab = "^4.4.7" podman-compose = "^1.5.0" +pytest = "^8.0.0" +pytest-asyncio = "^0.23.0" +pytest-cov = "^4.1.0" +httpx = "^0.27.0" +aioresponses = "^0.7.6" +fakeredis = "^2.21.0" +mypy = "^1.8.0" +types-requests = "^2.31.0" +types-redis = "^4.6.0" [build-system] requires = ["poetry-core>=1.0.0"] @@ -52,4 +62,23 @@ build-backend = "poetry.core.masonry.api" lint.ignore = [ "E741", # Ambigious name ] -exclude = ["*.ipynb"] \ No newline at end of file +exclude = ["*.ipynb"] + +[tool.pytest.ini_options] +asyncio_mode = "auto" +testpaths = ["tests"] +asyncio_default_fixture_loop_scope = "function" + +[tool.mypy] +python_version = "3.11" +warn_return_any = true +warn_unused_ignores = true +disallow_untyped_defs = true +disallow_incomplete_defs = true +check_untyped_defs = true +strict_optional = true +plugins = ["pydantic.mypy"] + +[[tool.mypy.overrides]] +module = ["transformers.*", "pytesseract.*", "cv2.*", "celery.*", "tqdm.*", "aiohttp.*", "aiohttp_socks.*", "tenacity.*", "geopy.*", "pandas.*", "numpy.*", "PIL.*", "sqlmodel.*", "sqlalchemy.*", "alembic.*", "apprise.*", "opentelemetry.*"] +ignore_missing_imports = true \ No newline at end of file diff --git a/crawler/rec/query.py b/crawler/rec/query.py index b589876..a2526a6 100644 --- a/crawler/rec/query.py +++ b/crawler/rec/query.py @@ -1,16 +1,19 @@ import enum from typing import Any +from contextlib import asynccontextmanager +from collections.abc import AsyncIterator + import aiohttp from models.listing import FurnishType, ListingType from rec import districts from tenacity import retry, stop_after_attempt, wait_random +from config.scraper_config import ScraperConfig -headers = { +DEFAULT_HEADERS = { "Host": "api.rightmove.co.uk", - # 'Accept-Encoding': 'gzip, deflate, br', - "User-Agent": "okhttp/4.10.0", - "Connection": "close", + "User-Agent": "okhttp/4.12.0", + "Connection": "keep-alive", } @@ -24,15 +27,66 @@ class PropertyType(enum.StrEnum): TERRACED = "terraced" +@asynccontextmanager +async def create_session( + config: ScraperConfig | None = None, +) -> AsyncIterator[aiohttp.ClientSession]: + """Create an aiohttp session with optional proxy support. + + Args: + config: Scraper configuration. Loads from environment if not provided. + + Yields: + Configured aiohttp ClientSession. + """ + if config is None: + config = ScraperConfig.from_env() + + connector = None + if config.proxy_url: + try: + from aiohttp_socks import ProxyConnector + + connector = ProxyConnector.from_url(config.proxy_url) + except ImportError: + raise ImportError( + "aiohttp-socks is required for proxy support. " + "Install with: pip install aiohttp-socks" + ) + + session = aiohttp.ClientSession( + trust_env=True, + connector=connector, + headers=DEFAULT_HEADERS, + ) + try: + yield session + finally: + await session.close() + + @retry(wait=wait_random(min=1, max=2), stop=stop_after_attempt(3)) -async def detail_query(detail_id: int) -> dict[str, Any]: +async def detail_query( + detail_id: int, + session: aiohttp.ClientSession | None = None, +) -> dict[str, Any]: + """Fetch detailed property information. + + Args: + detail_id: The property identifier. + session: Optional aiohttp session. Creates new one if not provided. + + Returns: + Property details as a dictionary. + """ params = { "apiApplication": "ANDROID", "appVersion": "3.70.0", } url = f"https://api.rightmove.co.uk/api/property/{detail_id}" - async with aiohttp.ClientSession(trust_env=True) as session: - async with session.get(url, params=params, headers=headers) as response: + + async def do_request(s: aiohttp.ClientSession) -> dict[str, Any]: + async with s.get(url, params=params, headers=DEFAULT_HEADERS) as response: if response.status != 200: raise Exception( f"""id: {detail_id}. Status Code: {response.status}.""" @@ -40,6 +94,12 @@ async def detail_query(detail_id: int) -> dict[str, Any]: ) return await response.json() + if session: + return await do_request(session) + else: + async with aiohttp.ClientSession(trust_env=True) as new_session: + return await do_request(new_session) + @retry(wait=wait_random(min=1, max=60), stop=stop_after_attempt(3)) async def listing_query( @@ -57,7 +117,29 @@ async def listing_query( property_type: list[PropertyType] = [], page_size: int = 25, furnish_types: list[FurnishType] = [], + session: aiohttp.ClientSession | None = None, ) -> dict[str, Any]: + """Execute a listing search query. + + Args: + page: Page number to fetch (1-indexed). + channel: Listing type (BUY or RENT). + min_bedrooms: Minimum number of bedrooms. + max_bedrooms: Maximum number of bedrooms. + radius: Search radius. + min_price: Minimum price. + max_price: Maximum price. + district: District identifier string. + mustNewHome: Filter for new homes only (BUY only). + max_days_since_added: Maximum days since listing was added (BUY only). + property_type: List of property types to filter (BUY only). + page_size: Number of results per page (default 25). + furnish_types: List of furnish types to filter (RENT only). + session: Optional aiohttp session. Creates new one if not provided. + + Returns: + API response as a dictionary. + """ params: dict[str, str] = { "locationIdentifier": districts.get_districts()[district], "channel": str(channel).upper(), @@ -95,19 +177,105 @@ async def listing_query( if furnish_types: params["furnishTypes"] = ",".join(furnish_types) - headers = { + request_headers = { "Host": "api.rightmove.co.uk", "Accept-Encoding": "gzip, deflate, br", "User-Agent": "okhttp/4.12.0", "Connection": "keep-alive", } - async with aiohttp.ClientSession(trust_env=True) as session: - async with session.get( + async def do_request(s: aiohttp.ClientSession) -> dict[str, Any]: + async with s.get( "https://api.rightmove.co.uk/api/property-listing", params=params, - headers=headers, + headers=request_headers, ) as response: if response.status != 200: raise Exception(f"Failed due to: {await response.text()}") return await response.json() + + if session: + return await do_request(session) + else: + async with aiohttp.ClientSession(trust_env=True) as new_session: + return await do_request(new_session) + + +@retry(wait=wait_random(min=1, max=10), stop=stop_after_attempt(3)) +async def probe_query( + *, + session: aiohttp.ClientSession, + channel: ListingType, + min_bedrooms: int, + max_bedrooms: int, + radius: float, + min_price: int, + max_price: int, + district: str, + max_days_since_added: int = 30, + furnish_types: list[FurnishType] = [], +) -> dict[str, Any]: + """Probe the API to get result count without fetching full results. + + Makes a minimal request (page_size=1) to efficiently get totalAvailableResults. + + Args: + session: aiohttp session for making requests. + channel: Listing type (BUY or RENT). + min_bedrooms: Minimum number of bedrooms. + max_bedrooms: Maximum number of bedrooms. + radius: Search radius. + min_price: Minimum price. + max_price: Maximum price. + district: District identifier string. + max_days_since_added: Maximum days since listing was added (BUY only). + furnish_types: List of furnish types to filter (RENT only). + + Returns: + API response containing totalAvailableResults. + """ + params: dict[str, str] = { + "locationIdentifier": districts.get_districts()[district], + "channel": str(channel).upper(), + "page": "1", + "numberOfPropertiesPerPage": "1", # Minimal page size for probing + "radius": str(radius), + "sortBy": "distance", + "includeUnavailableProperties": "false", + "minPrice": str(min_price), + "maxPrice": str(max_price), + "minBedrooms": str(min_bedrooms), + "maxBedrooms": str(max_bedrooms), + "apiApplication": "ANDROID", + "appVersion": "4.28.0", + } + + if channel is ListingType.BUY: + params["dontShow"] = "sharedOwnership,retirement" + if max_days_since_added is not None and max_days_since_added in [ + 1, + 3, + 7, + 14, + ]: + params["maxDaysSinceAdded"] = str(max_days_since_added) + + if channel is ListingType.RENT: + if furnish_types: + params["furnishTypes"] = ",".join(furnish_types) + + request_headers = { + "Host": "api.rightmove.co.uk", + "Accept-Encoding": "gzip, deflate, br", + "User-Agent": "okhttp/4.12.0", + "Connection": "keep-alive", + } + + async with session.get( + "https://api.rightmove.co.uk/api/property-listing", + params=params, + headers=request_headers, + ) as response: + if response.status != 200: + raise Exception(f"Probe failed: {await response.text()}") + return await response.json() diff --git a/crawler/services/listing_fetcher.py b/crawler/services/listing_fetcher.py new file mode 100644 index 0000000..a94f3e0 --- /dev/null +++ b/crawler/services/listing_fetcher.py @@ -0,0 +1,146 @@ +"""Listing fetcher service - fetches listing data from Rightmove API.""" +import asyncio +import logging +from typing import Any + +from config.scraper_config import ScraperConfig +from listing_processor import ListingProcessor +from rec.query import create_session, listing_query +from models.listing import QueryParameters +from repositories import ListingRepository +from tqdm.asyncio import tqdm +from models import Listing as modelListing +from services.query_splitter import QuerySplitter, SubQuery + +logger = logging.getLogger("uvicorn.error") + + +async def dump_listings_full( + parameters: QueryParameters, + repository: ListingRepository, +) -> list[modelListing]: + """Fetches all listings, images as well as detects floorplans.""" + new_listings = await dump_listings(parameters, repository) + logger.debug(f"Upserted {len(new_listings)} new listings") + # refresh listings + listings = await repository.get_listings(parameters) # this can be better + new_listings = [x for x in listings if x.id in new_listings] + return new_listings + + +async def dump_listings( + parameters: QueryParameters, + repository: ListingRepository, +) -> list[modelListing]: + """Fetch listings from Rightmove API and process them. + + Uses intelligent query splitting to maximize data extraction + while respecting Rightmove's result caps. + """ + config = ScraperConfig.from_env() + splitter = QuerySplitter(config) + + async with create_session(config) as session: + # Phase 1 & 2: Split and probe queries + logger.info("Splitting query and probing result counts...") + subqueries = await splitter.split(parameters, session) + + total_estimated = splitter.calculate_total_estimated_results(subqueries) + logger.info( + f"Split into {len(subqueries)} subqueries, " + f"estimated {total_estimated} total results" + ) + + # Phase 3: Fetch all pages for each subquery + semaphore = asyncio.Semaphore(config.max_concurrent_requests) + + async def fetch_subquery(sq: SubQuery) -> list[dict[str, Any]]: + """Fetch all pages for a single subquery.""" + results: list[dict[str, Any]] = [] + + estimated = sq.estimated_results or 0 + if estimated == 0: + return results + + page_size = parameters.page_size + max_pages = min( + config.max_pages_per_query, + (estimated // page_size) + 1, + ) + + for page_id in range(1, max_pages + 1): + async with semaphore: + await asyncio.sleep(config.request_delay_ms / 1000) + try: + result = await listing_query( + page=page_id, + channel=parameters.listing_type, + min_bedrooms=sq.min_bedrooms, + max_bedrooms=sq.max_bedrooms, + radius=parameters.radius, + min_price=sq.min_price, + max_price=sq.max_price, + district=sq.district, + page_size=page_size, + max_days_since_added=parameters.max_days_since_added, + furnish_types=parameters.furnish_types or [], + session=session, + ) + results.append(result) + + properties = result.get("properties", []) + if len(properties) < page_size: + break + + except Exception as e: + if "GENERIC_ERROR" in str(e): + logger.debug( + f"Max page for {sq.district}: {page_id - 1}" + ) + break + logger.warning( + f"Error fetching page {page_id} for {sq.district}: {e}" + ) + break + + return results + + # Fetch all subqueries with progress bar + all_results = await tqdm.gather( + *[fetch_subquery(sq) for sq in subqueries], + desc="Fetching listings", + ) + + # Extract listing identifiers from results + listing_ids: list[int] = [] + for subquery_results in all_results: + for response_json in subquery_results: + if not response_json: + continue + if response_json.get("totalAvailableResults", 0) == 0: + continue + for property_data in response_json.get("properties", []): + identifier = property_data.get("identifier") + if identifier: + listing_ids.append(identifier) + + logger.info(f"Found {len(listing_ids)} total listings") + + # Deduplicate + unique_ids = list(set(listing_ids)) + logger.info(f"After deduplication: {len(unique_ids)} unique listings") + + # Filter out listings already in database + all_listing_ids = [x.id for x in await repository.get_listings()] + missing_ids = [ + listing_id for listing_id in unique_ids if listing_id not in all_listing_ids + ] + + listing_processor = ListingProcessor(repository) + logger.info(f"Starting processing {len(missing_ids)} new listings") + processed_listings = await tqdm.gather( + *[listing_processor.process_listing(id) for id in missing_ids] + ) + filtered_listings = [x for x in processed_listings if x is not None] + + return filtered_listings diff --git a/crawler/services/query_splitter.py b/crawler/services/query_splitter.py new file mode 100644 index 0000000..0609634 --- /dev/null +++ b/crawler/services/query_splitter.py @@ -0,0 +1,303 @@ +"""Query splitting service for handling Rightmove's result cap. + +This module provides intelligent query splitting to work around Rightmove's +~1,500 listing cap per search. It adaptively splits queries by price bands +based on actual result counts. +""" +from __future__ import annotations + +import asyncio +import logging +from dataclasses import dataclass, replace +from typing import Any + +import aiohttp + +from config.scraper_config import ScraperConfig +from models.listing import ListingType, QueryParameters +from rec.districts import get_districts + +logger = logging.getLogger("uvicorn.error") + + +@dataclass +class SubQuery: + """Represents a single query subdivision. + + Attributes: + district: District identifier string. + min_bedrooms: Minimum number of bedrooms. + max_bedrooms: Maximum number of bedrooms. + min_price: Minimum price in currency units. + max_price: Maximum price in currency units. + estimated_results: Cached result count from probing (None if not probed). + """ + + district: str + min_bedrooms: int + max_bedrooms: int + min_price: int + max_price: int + estimated_results: int | None = None + + @property + def price_range(self) -> int: + """Returns the width of the price band.""" + return self.max_price - self.min_price + + +class QuerySplitter: + """Splits large queries into smaller subqueries to avoid result caps. + + Uses adaptive binary search on price ranges to find optimal subdivisions + that keep each subquery under the result threshold. + """ + + def __init__(self, config: ScraperConfig | None = None) -> None: + """Initialize the splitter with configuration. + + Args: + config: Scraper configuration. Loads from environment if not provided. + """ + self.config = config or ScraperConfig.from_env() + + def create_initial_subqueries( + self, + parameters: QueryParameters, + districts: dict[str, str], + ) -> list[SubQuery]: + """Create initial subqueries by splitting on district and bedrooms. + + This creates the initial split before probing for result counts. + Each bedroom count gets its own subquery to enable finer-grained splitting. + + Args: + parameters: Original query parameters. + districts: Dictionary of district name to location ID. + + Returns: + List of initial SubQuery objects. + """ + subqueries: list[SubQuery] = [] + + for district in districts.keys(): + for num_bedrooms in range( + parameters.min_bedrooms, parameters.max_bedrooms + 1 + ): + subqueries.append( + SubQuery( + district=district, + min_bedrooms=num_bedrooms, + max_bedrooms=num_bedrooms, + min_price=parameters.min_price, + max_price=parameters.max_price, + ) + ) + + return subqueries + + async def probe_result_count( + self, + subquery: SubQuery, + session: aiohttp.ClientSession, + parameters: QueryParameters, + ) -> int: + """Probe the API to get the total result count for a subquery. + + Makes a minimal request (page_size=1) to get totalAvailableResults. + + Args: + subquery: The subquery to probe. + session: aiohttp session for making requests. + parameters: Original query parameters for additional settings. + + Returns: + Total available results for this subquery. + """ + from rec.query import probe_query + + try: + result = await probe_query( + session=session, + channel=parameters.listing_type, + min_bedrooms=subquery.min_bedrooms, + max_bedrooms=subquery.max_bedrooms, + radius=parameters.radius, + min_price=subquery.min_price, + max_price=subquery.max_price, + district=subquery.district, + max_days_since_added=parameters.max_days_since_added, + furnish_types=parameters.furnish_types or [], + ) + return result.get("totalAvailableResults", 0) + except Exception as e: + logger.warning(f"Failed to probe subquery {subquery}: {e}") + return 0 + + def split_by_price(self, subquery: SubQuery) -> list[SubQuery]: + """Split a subquery into two by halving the price range. + + Args: + subquery: The subquery to split. + + Returns: + List of two subqueries covering the same price range. + """ + mid_price = (subquery.min_price + subquery.max_price) // 2 + + return [ + replace( + subquery, + max_price=mid_price, + estimated_results=None, + ), + replace( + subquery, + min_price=mid_price, + estimated_results=None, + ), + ] + + async def adaptive_split( + self, + subquery: SubQuery, + session: aiohttp.ClientSession, + parameters: QueryParameters, + semaphore: asyncio.Semaphore, + ) -> list[SubQuery]: + """Recursively split a subquery until all parts are under threshold. + + Uses binary search on price range to find optimal splits. + + Args: + subquery: The subquery to split. + session: aiohttp session for making requests. + parameters: Original query parameters. + semaphore: Semaphore for rate limiting. + + Returns: + List of subqueries that are all under the split threshold. + """ + # Check if we can split further + if subquery.price_range <= self.config.min_price_band: + logger.warning( + f"Cannot split further, price band at minimum: {subquery}" + ) + return [subquery] + + # Split into two halves + halves = self.split_by_price(subquery) + result: list[SubQuery] = [] + + for half in halves: + async with semaphore: + await asyncio.sleep(self.config.request_delay_ms / 1000) + count = await self.probe_result_count(half, session, parameters) + + half = replace(half, estimated_results=count) + + if count > self.config.split_threshold: + # Need to split further + result.extend( + await self.adaptive_split( + half, session, parameters, semaphore + ) + ) + else: + result.append(half) + + return result + + async def split( + self, + parameters: QueryParameters, + session: aiohttp.ClientSession, + on_progress: Any = None, + ) -> list[SubQuery]: + """Split query parameters into optimized subqueries. + + Performs the full splitting algorithm: + 1. Create initial splits by district and bedroom count + 2. Probe each to get result counts + 3. Adaptively split any that exceed the threshold + + Args: + parameters: Original query parameters to split. + session: aiohttp session for making requests. + on_progress: Optional callback for progress updates. + + Returns: + List of SubQuery objects, each under the result threshold. + """ + # Get valid districts + if parameters.district_names: + districts = { + district: locid + for district, locid in get_districts().items() + if district in parameters.district_names + } + else: + districts = get_districts() + + # Phase 1: Create initial subqueries + initial_subqueries = self.create_initial_subqueries(parameters, districts) + logger.info(f"Created {len(initial_subqueries)} initial subqueries") + + if on_progress: + on_progress( + phase="splitting", + message=f"Created {len(initial_subqueries)} initial subqueries", + ) + + # Phase 2: Probe and adaptively split + semaphore = asyncio.Semaphore(self.config.max_concurrent_requests) + refined_subqueries: list[SubQuery] = [] + + # Probe all initial subqueries in parallel + async def probe_and_split(sq: SubQuery) -> list[SubQuery]: + async with semaphore: + await asyncio.sleep(self.config.request_delay_ms / 1000) + count = await self.probe_result_count(sq, session, parameters) + + sq = replace(sq, estimated_results=count) + + if count > self.config.split_threshold: + logger.info( + f"Subquery {sq.district}/{sq.min_bedrooms}BR " + f"has {count} results, splitting..." + ) + return await self.adaptive_split( + sq, session, parameters, semaphore + ) + return [sq] + + tasks = [probe_and_split(sq) for sq in initial_subqueries] + results = await asyncio.gather(*tasks) + + for subquery_list in results: + refined_subqueries.extend(subquery_list) + + logger.info( + f"Refined to {len(refined_subqueries)} subqueries after splitting" + ) + + if on_progress: + on_progress( + phase="splitting_complete", + message=f"Refined to {len(refined_subqueries)} subqueries", + ) + + return refined_subqueries + + def calculate_total_estimated_results( + self, subqueries: list[SubQuery] + ) -> int: + """Calculate total estimated results across all subqueries. + + Args: + subqueries: List of subqueries with estimated_results set. + + Returns: + Sum of all estimated results. + """ + return sum(sq.estimated_results or 0 for sq in subqueries) diff --git a/crawler/tasks/listing_tasks.py b/crawler/tasks/listing_tasks.py index f86f89e..1fb3041 100644 --- a/crawler/tasks/listing_tasks.py +++ b/crawler/tasks/listing_tasks.py @@ -1,18 +1,17 @@ import asyncio -import itertools import logging from typing import Any from celery import Task from celery.schedules import crontab from celery_app import app from config.schedule_config import SchedulesConfig +from config.scraper_config import ScraperConfig from listing_processor import ListingProcessor from models.listing import Listing, QueryParameters -from rec.districts import get_districts -from rec.query import listing_query +from rec.query import create_session, listing_query from repositories.listing_repository import ListingRepository from database import engine -from services import image_fetcher, floorplan_detector +from services.query_splitter import QuerySplitter, SubQuery from utils.redis_lock import redis_lock logger = logging.getLogger("uvicorn.error") @@ -134,106 +133,138 @@ async def get_ids_to_process( repository: ListingRepository, task: Task, ) -> set[int]: - semaphore = asyncio.Semaphore(5) # if too high, rightmove drops connections - districts = await get_valid_districts_to_scrape(parameters.district_names) - task.update_state(state="Fetching listings to scrape", meta={"progress": 0}) - json_responses: list[list[dict[str, Any]]] = await asyncio.gather( - *[ - _fetch_listings_with_semaphore( - task=task, semaphore=semaphore, parameters=parameters, district=district - ) - for district in districts.keys() - ], - ) - json_responses_flat = list(itertools.chain.from_iterable(json_responses)) - logger.debug(f"Total listings fetched {len(json_responses_flat)}") + """Fetch all listing IDs using intelligent query splitting. - identifiers: set[int] = set() - for response_json in json_responses_flat: - if response_json == {}: - continue - if response_json["totalAvailableResults"] == 0: - continue - for property in response_json["properties"]: - identifier = property["identifier"] - identifiers.add(identifier) + Uses the QuerySplitter to adaptively split large queries and maximize + data extraction while respecting Rightmove's result caps. - # if listing is already in db, do not fetch details again - all_listing_ids = {l.id for l in await repository.get_listings()} - new_ids = identifiers - all_listing_ids - return new_ids + Args: + parameters: Query parameters for the search. + repository: Repository for checking existing listings. + task: Celery task for progress updates. + Returns: + Set of new listing IDs that need to be processed. + """ + config = ScraperConfig.from_env() + splitter = QuerySplitter(config) -async def get_valid_districts_to_scrape( - district_names: set[str] | None, -) -> dict[str, str]: - if district_names: - districts = { - district: locid - for district, locid in get_districts().items() - if district in district_names - } - else: - districts = get_districts() - return districts + def on_progress(phase: str, message: str) -> None: + task.update_state(state=message, meta={"phase": phase}) - -async def _fetch_listings_with_semaphore( - *, - task: Task, - semaphore: asyncio.Semaphore, - parameters: QueryParameters, - district: str, -) -> list[dict[str, Any]]: - result = [] - # split the price in N bands to avoid the 1.5k capping by rightmove - # basically instead of 1 query with price between 1k and 5k that is capped at 1500 results - # we do 10 queries each with an increment in price range so we send more queries but each - # has a smaller chance of returning more than 1.5k results - - number_of_steps = 10 - price_step = parameters.max_price // number_of_steps - - for step in range(number_of_steps): + async with create_session(config) as session: + # Phase 1 & 2: Split and probe queries task.update_state( - state=f"Fetching listings ({step} out of {number_of_steps})", - meta={"progress": step / number_of_steps}, + state="Analyzing query and splitting by price bands...", + meta={"phase": "splitting", "progress": 0}, ) - min_price = step * price_step - max_price = (step + 1) * price_step - logger.debug( - f"Step {step} of {number_of_steps} with {min_price=} and {max_price=}" + subqueries = await splitter.split(parameters, session, on_progress) + + total_estimated = splitter.calculate_total_estimated_results(subqueries) + logger.info( + f"Split into {len(subqueries)} subqueries, " + f"estimated {total_estimated} total results" ) - for num_bedrooms in range(parameters.min_bedrooms, parameters.max_bedrooms + 1): - for page_id in range( - 1, - 3, # seems like all searches stop at 1500 entries (page_id * page_size) - ): - logger.debug(f"Processing {page_id=} for {district=}") + # Phase 3: Fetch all pages for each subquery + task.update_state( + state=f"Fetching listings from {len(subqueries)} subqueries...", + meta={ + "phase": "fetching", + "subqueries": len(subqueries), + "estimated_results": total_estimated, + }, + ) + semaphore = asyncio.Semaphore(config.max_concurrent_requests) + identifiers: set[int] = set() + + async def fetch_subquery(sq: SubQuery) -> list[dict[str, Any]]: + """Fetch all pages for a single subquery.""" + results: list[dict[str, Any]] = [] + + # Calculate how many pages we need based on estimated results + estimated = sq.estimated_results or 0 + if estimated == 0: + return results + + # Fetch pages up to max_pages_per_query or until no more results + page_size = parameters.page_size + max_pages = min( + config.max_pages_per_query, + (estimated // page_size) + 1, + ) + + for page_id in range(1, max_pages + 1): async with semaphore: + await asyncio.sleep(config.request_delay_ms / 1000) try: - listing_query_result = await listing_query( + result = await listing_query( page=page_id, channel=parameters.listing_type, - # min_bedrooms=parameters.min_bedrooms, - # max_bedrooms=parameters.max_bedrooms, - min_bedrooms=num_bedrooms, - max_bedrooms=num_bedrooms, + min_bedrooms=sq.min_bedrooms, + max_bedrooms=sq.max_bedrooms, radius=parameters.radius, - min_price=min_price, - max_price=max_price, - district=district, - page_size=parameters.page_size, + min_price=sq.min_price, + max_price=sq.max_price, + district=sq.district, + page_size=page_size, max_days_since_added=parameters.max_days_since_added, furnish_types=parameters.furnish_types or [], + session=session, ) + results.append(result) + + # Check if we've received all results + properties = result.get("properties", []) + if len(properties) < page_size: + # No more results on next page + break except Exception as e: - if "GENERIC_ERROR" in str(e): # Too big page id - logger.debug(f"Max page id for {district=}: {page_id-1}") + if "GENERIC_ERROR" in str(e): + # Reached end of results + logger.debug( + f"Max page for {sq.district}: {page_id - 1}" + ) break - raise e - result.append(listing_query_result) - return result + logger.warning( + f"Error fetching page {page_id} for {sq.district}: {e}" + ) + break + + return results + + # Fetch all subqueries concurrently + all_results = await asyncio.gather( + *[fetch_subquery(sq) for sq in subqueries] + ) + + # Extract identifiers from all results + for subquery_results in all_results: + for response_json in subquery_results: + if not response_json: + continue + if response_json.get("totalAvailableResults", 0) == 0: + continue + for property_data in response_json.get("properties", []): + identifier = property_data.get("identifier") + if identifier: + identifiers.add(identifier) + + logger.info(f"Found {len(identifiers)} unique listings") + + # Filter out listings already in the database + all_listing_ids = {l.id for l in await repository.get_listings()} + new_ids = identifiers - all_listing_ids + + task.update_state( + state=f"Found {len(new_ids)} new listings to process", + meta={ + "phase": "filtering", + "total_found": len(identifiers), + "new_listings": len(new_ids), + }, + ) + + return new_ids diff --git a/crawler/tests/unit/test_query_splitter.py b/crawler/tests/unit/test_query_splitter.py new file mode 100644 index 0000000..467f234 --- /dev/null +++ b/crawler/tests/unit/test_query_splitter.py @@ -0,0 +1,374 @@ +"""Unit tests for QuerySplitter service.""" +import pytest +from unittest.mock import AsyncMock, patch + +from config.scraper_config import ScraperConfig +from models.listing import ListingType, QueryParameters +from services.query_splitter import QuerySplitter, SubQuery + + +class TestScraperConfig: + """Tests for the ScraperConfig dataclass.""" + + def test_default_values(self) -> None: + """Test that default values are set correctly.""" + config = ScraperConfig() + assert config.max_concurrent_requests == 5 + assert config.request_delay_ms == 100 + assert config.result_cap == 1500 + assert config.split_threshold == 1200 + assert config.min_price_band == 100 + assert config.max_pages_per_query == 60 + assert config.proxy_url is None + + def test_from_env(self) -> None: + """Test loading configuration from environment variables.""" + with patch.dict( + "os.environ", + { + "RIGHTMOVE_MAX_CONCURRENT": "10", + "RIGHTMOVE_REQUEST_DELAY_MS": "200", + "RIGHTMOVE_SPLIT_THRESHOLD": "1000", + "RIGHTMOVE_MIN_PRICE_BAND": "50", + "RIGHTMOVE_MAX_PAGES": "30", + "RIGHTMOVE_PROXY_URL": "socks5://localhost:9050", + }, + ): + config = ScraperConfig.from_env() + assert config.max_concurrent_requests == 10 + assert config.request_delay_ms == 200 + assert config.split_threshold == 1000 + assert config.min_price_band == 50 + assert config.max_pages_per_query == 30 + assert config.proxy_url == "socks5://localhost:9050" + + def test_from_env_empty_proxy(self) -> None: + """Test that empty proxy URL is converted to None.""" + with patch.dict( + "os.environ", + { + "RIGHTMOVE_PROXY_URL": "", + }, + clear=False, + ): + config = ScraperConfig.from_env() + assert config.proxy_url is None + + +class TestSubQuery: + """Tests for the SubQuery dataclass.""" + + def test_price_range_calculation(self) -> None: + """Test that price_range is calculated correctly.""" + sq = SubQuery( + district="Kings Cross", + min_bedrooms=2, + max_bedrooms=2, + min_price=1000, + max_price=2000, + ) + assert sq.price_range == 1000 + + +class TestQuerySplitter: + """Tests for the QuerySplitter class.""" + + @pytest.fixture + def config(self) -> ScraperConfig: + """Create a test configuration.""" + return ScraperConfig( + max_concurrent_requests=5, + request_delay_ms=10, # Faster for testing + result_cap=1500, + split_threshold=1200, + min_price_band=100, + max_pages_per_query=60, + proxy_url=None, + ) + + @pytest.fixture + def splitter(self, config: ScraperConfig) -> QuerySplitter: + """Create a QuerySplitter instance.""" + return QuerySplitter(config) + + @pytest.fixture + def parameters(self) -> QueryParameters: + """Create test query parameters.""" + return QueryParameters( + listing_type=ListingType.RENT, + min_bedrooms=2, + max_bedrooms=3, + min_price=1000, + max_price=5000, + district_names={"Kings Cross", "Angel"}, + ) + + def test_create_initial_subqueries( + self, splitter: QuerySplitter, parameters: QueryParameters + ) -> None: + """Test that initial subqueries are created correctly.""" + districts = {"Kings Cross": "STATION^5168", "Angel": "STATION^1234"} + + subqueries = splitter.create_initial_subqueries(parameters, districts) + + # 2 districts × 2 bedroom counts (2,3) = 4 subqueries + assert len(subqueries) == 4 + + # Check first subquery + assert subqueries[0].district == "Kings Cross" + assert subqueries[0].min_bedrooms == 2 + assert subqueries[0].max_bedrooms == 2 + assert subqueries[0].min_price == 1000 + assert subqueries[0].max_price == 5000 + + def test_split_by_price(self, splitter: QuerySplitter) -> None: + """Test that price splitting works correctly.""" + sq = SubQuery( + district="Kings Cross", + min_bedrooms=2, + max_bedrooms=2, + min_price=1000, + max_price=5000, + ) + + halves = splitter.split_by_price(sq) + + assert len(halves) == 2 + assert halves[0].min_price == 1000 + assert halves[0].max_price == 3000 # midpoint + assert halves[1].min_price == 3000 + assert halves[1].max_price == 5000 + + # Both should have same bedroom range and district + for half in halves: + assert half.district == "Kings Cross" + assert half.min_bedrooms == 2 + assert half.max_bedrooms == 2 + + @pytest.mark.asyncio + async def test_probe_result_count( + self, splitter: QuerySplitter, parameters: QueryParameters + ) -> None: + """Test probing API for result count.""" + sq = SubQuery( + district="Kings Cross", + min_bedrooms=2, + max_bedrooms=2, + min_price=1000, + max_price=5000, + ) + + mock_session = AsyncMock() + + # Mock the probe_query function + with patch("services.query_splitter.probe_query") as mock_probe: + mock_probe.return_value = {"totalAvailableResults": 800} + + count = await splitter.probe_result_count(sq, mock_session, parameters) + + assert count == 800 + mock_probe.assert_called_once() + + @pytest.mark.asyncio + async def test_probe_result_count_handles_error( + self, splitter: QuerySplitter, parameters: QueryParameters + ) -> None: + """Test that probe_result_count handles errors gracefully.""" + sq = SubQuery( + district="Kings Cross", + min_bedrooms=2, + max_bedrooms=2, + min_price=1000, + max_price=5000, + ) + + mock_session = AsyncMock() + + with patch("services.query_splitter.probe_query") as mock_probe: + mock_probe.side_effect = Exception("API error") + + count = await splitter.probe_result_count(sq, mock_session, parameters) + + # Should return 0 on error + assert count == 0 + + @pytest.mark.asyncio + async def test_adaptive_split_no_split_needed( + self, splitter: QuerySplitter, parameters: QueryParameters + ) -> None: + """Test adaptive split when results are below threshold.""" + sq = SubQuery( + district="Kings Cross", + min_bedrooms=2, + max_bedrooms=2, + min_price=1000, + max_price=2000, + ) + + mock_session = AsyncMock() + mock_semaphore = AsyncMock() + + with patch("services.query_splitter.probe_query") as mock_probe: + # First half has 600 results, second half has 500 + mock_probe.side_effect = [ + {"totalAvailableResults": 600}, + {"totalAvailableResults": 500}, + ] + + result = await splitter.adaptive_split( + sq, mock_session, parameters, mock_semaphore + ) + + # Both halves are under threshold (1200), so we get 2 subqueries back + assert len(result) == 2 + assert result[0].estimated_results == 600 + assert result[1].estimated_results == 500 + + @pytest.mark.asyncio + async def test_adaptive_split_recursive_splitting( + self, splitter: QuerySplitter, parameters: QueryParameters + ) -> None: + """Test adaptive split performs recursive splitting when needed.""" + sq = SubQuery( + district="Kings Cross", + min_bedrooms=2, + max_bedrooms=2, + min_price=1000, + max_price=5000, + ) + + mock_session = AsyncMock() + mock_semaphore = AsyncMock() + + with patch("services.query_splitter.probe_query") as mock_probe: + # First split: 1000-3000 has 1300 (over threshold), 3000-5000 has 800 + # Second split of 1000-3000: 1000-2000 has 700, 2000-3000 has 600 + mock_probe.side_effect = [ + {"totalAvailableResults": 1300}, # First half - needs more splitting + {"totalAvailableResults": 800}, # Second half - OK + {"totalAvailableResults": 700}, # First quarter - OK + {"totalAvailableResults": 600}, # Second quarter - OK + ] + + result = await splitter.adaptive_split( + sq, mock_session, parameters, mock_semaphore + ) + + # Should get 3 subqueries: [1000-2000 (700), 2000-3000 (600), 3000-5000 (800)] + assert len(result) == 3 + + @pytest.mark.asyncio + async def test_adaptive_split_respects_min_price_band( + self, splitter: QuerySplitter, parameters: QueryParameters + ) -> None: + """Test that adaptive split stops at min_price_band.""" + sq = SubQuery( + district="Kings Cross", + min_bedrooms=2, + max_bedrooms=2, + min_price=1000, + max_price=1050, # Only 50 range, below min_price_band of 100 + estimated_results=1500, # Over threshold but can't split + ) + + mock_session = AsyncMock() + mock_semaphore = AsyncMock() + + result = await splitter.adaptive_split( + sq, mock_session, parameters, mock_semaphore + ) + + # Can't split below min_price_band, should return original + assert len(result) == 1 + assert result[0].min_price == 1000 + assert result[0].max_price == 1050 + + def test_calculate_total_estimated_results( + self, splitter: QuerySplitter + ) -> None: + """Test calculation of total estimated results.""" + subqueries = [ + SubQuery( + district="Kings Cross", + min_bedrooms=2, + max_bedrooms=2, + min_price=1000, + max_price=2000, + estimated_results=500, + ), + SubQuery( + district="Kings Cross", + min_bedrooms=3, + max_bedrooms=3, + min_price=1000, + max_price=2000, + estimated_results=300, + ), + SubQuery( + district="Angel", + min_bedrooms=2, + max_bedrooms=2, + min_price=1000, + max_price=2000, + estimated_results=None, # Not probed + ), + ] + + total = splitter.calculate_total_estimated_results(subqueries) + assert total == 800 # 500 + 300 + 0 + + @pytest.mark.asyncio + async def test_split_integration( + self, splitter: QuerySplitter, parameters: QueryParameters + ) -> None: + """Integration test for the full split workflow.""" + mock_session = AsyncMock() + mock_districts = {"Kings Cross": "STATION^5168", "Angel": "STATION^1234"} + + with patch("services.query_splitter.get_districts", return_value=mock_districts): + with patch("services.query_splitter.probe_query") as mock_probe: + # Mock probe results for each initial subquery + # 2 districts × 2 bedroom counts = 4 initial subqueries + mock_probe.side_effect = [ + {"totalAvailableResults": 500}, # KC 2BR - OK + {"totalAvailableResults": 1300}, # KC 3BR - needs split + {"totalAvailableResults": 600}, # Angel 2BR - OK + {"totalAvailableResults": 800}, # Angel 3BR - OK + # Split KC 3BR + {"totalAvailableResults": 700}, # KC 3BR first half + {"totalAvailableResults": 600}, # KC 3BR second half + ] + + result = await splitter.split(parameters, mock_session) + + # Should have 5 subqueries total: + # KC 2BR (500), KC 3BR split into 2 (700+600), Angel 2BR (600), Angel 3BR (800) + assert len(result) == 5 + + # Verify total estimated results + total = splitter.calculate_total_estimated_results(result) + assert total == 3200 # 500 + 700 + 600 + 600 + 800 + + @pytest.mark.asyncio + async def test_split_with_on_progress_callback( + self, splitter: QuerySplitter, parameters: QueryParameters + ) -> None: + """Test that on_progress callback is called during split.""" + mock_session = AsyncMock() + mock_districts = {"Kings Cross": "STATION^5168", "Angel": "STATION^1234"} + progress_calls = [] + + def on_progress(phase: str, message: str) -> None: + progress_calls.append((phase, message)) + + with patch("services.query_splitter.get_districts", return_value=mock_districts): + with patch("services.query_splitter.probe_query") as mock_probe: + mock_probe.return_value = {"totalAvailableResults": 500} + + await splitter.split(parameters, mock_session, on_progress) + + # Should have received at least 2 progress updates + assert len(progress_calls) >= 2 + phases = [call[0] for call in progress_calls] + assert "splitting" in phases + assert "splitting_complete" in phases