Compare commits
5 commits
5514fa6381
...
500cb576db
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
500cb576db | ||
|
|
3267adca66 | ||
|
|
33ae5c91a2 | ||
|
|
75a9dbf035 | ||
|
|
e3fb6ab836 |
28 changed files with 4529 additions and 271 deletions
|
|
@ -7,6 +7,21 @@ export DB_CONNECTION_STRING="sqlite:///data/wrongmove.db" # by default use SQLit
|
||||||
export CELERY_BROKER_URL="redis://localhost:6379/0" # processing background tasks
|
export CELERY_BROKER_URL="redis://localhost:6379/0" # processing background tasks
|
||||||
export CELERY_RESULT_BACKEND="redis://localhost:6379/1"
|
export CELERY_RESULT_BACKEND="redis://localhost:6379/1"
|
||||||
|
|
||||||
|
# Rightmove scraper configuration
|
||||||
|
# These settings control query splitting to work around Rightmove's ~1500 result cap
|
||||||
|
RIGHTMOVE_MAX_CONCURRENT=5 # Max concurrent HTTP requests
|
||||||
|
RIGHTMOVE_REQUEST_DELAY_MS=100 # Delay between requests in milliseconds
|
||||||
|
RIGHTMOVE_SPLIT_THRESHOLD=1200 # Split query when results exceed this threshold
|
||||||
|
RIGHTMOVE_MIN_PRICE_BAND=100 # Minimum price band width (won't split below this)
|
||||||
|
RIGHTMOVE_MAX_PAGES=60 # Max pages per subquery (60 * 25 = 1500 max results)
|
||||||
|
RIGHTMOVE_PROXY_URL= # Optional SOCKS proxy URL (e.g., socks5://localhost:9050 for Tor)
|
||||||
|
|
||||||
|
# Throttling detection and circuit breaker
|
||||||
|
RIGHTMOVE_SLOW_RESPONSE_THRESHOLD=10.0 # Response time threshold in seconds
|
||||||
|
RIGHTMOVE_ENABLE_CIRCUIT_BREAKER=true # Enable circuit breaker protection
|
||||||
|
RIGHTMOVE_CIRCUIT_BREAKER_FAILURES=5 # Consecutive failures to open circuit
|
||||||
|
RIGHTMOVE_CIRCUIT_BREAKER_TIMEOUT=60.0 # Seconds to wait before recovery attempt
|
||||||
|
|
||||||
# Periodic scraping schedules (JSON array)
|
# Periodic scraping schedules (JSON array)
|
||||||
# Each schedule has: name, enabled, hour, minute, day_of_week, listing_type, min/max_bedrooms, min/max_price, district_names, furnish_types
|
# Each schedule has: name, enabled, hour, minute, day_of_week, listing_type, min/max_bedrooms, min/max_price, district_names, furnish_types
|
||||||
# Cron fields: minute (0-59), hour (0-23), day_of_week (0-6, 0=Sunday)
|
# Cron fields: minute (0-59), hour (0-23), day_of_week (0-6, 0=Sunday)
|
||||||
|
|
|
||||||
233
crawler/CLAUDE.md
Normal file
233
crawler/CLAUDE.md
Normal file
|
|
@ -0,0 +1,233 @@
|
||||||
|
# CLAUDE.md
|
||||||
|
|
||||||
|
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
||||||
|
|
||||||
|
## Project Overview
|
||||||
|
|
||||||
|
A real estate listing crawler and aggregator that scrapes property listings from Rightmove UK, extracts square meter data from floorplan images using OCR, calculates transit routes, and provides a web UI for browsing listings.
|
||||||
|
|
||||||
|
## Development Environment
|
||||||
|
|
||||||
|
**IMPORTANT**: This project runs on a remote host, not locally. Always use the remote executor to run commands:
|
||||||
|
|
||||||
|
- **All shell commands** (Python, pytest, poetry, alembic, etc.) must be executed via the remote executor
|
||||||
|
- **Starting the project**: Use the remote executor to run `./start.sh`
|
||||||
|
- **Running tests**: Use the remote executor to run `pytest`
|
||||||
|
- **Any CLI operations**: Use the remote executor to run `python main.py ...`
|
||||||
|
|
||||||
|
Never run commands directly on the local machine - always route them through the remote executor.
|
||||||
|
|
||||||
|
## Commands
|
||||||
|
|
||||||
|
### Setup and Run (Docker - Recommended)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Start all services (Redis, MySQL, API, Celery) with Docker
|
||||||
|
./start.sh
|
||||||
|
|
||||||
|
# Rebuild images and start
|
||||||
|
./start.sh --build
|
||||||
|
|
||||||
|
# Stop all containers
|
||||||
|
./start.sh --down
|
||||||
|
|
||||||
|
# View logs
|
||||||
|
./start.sh --logs
|
||||||
|
```
|
||||||
|
|
||||||
|
### Setup and Run (Local with Poetry)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Install dependencies
|
||||||
|
poetry install && cp .env.sample .env
|
||||||
|
|
||||||
|
# Start backend locally (requires Redis running)
|
||||||
|
./start.sh --local
|
||||||
|
|
||||||
|
# Start frontend (from frontend/ directory)
|
||||||
|
cd frontend && ./start.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
### CLI Operations
|
||||||
|
|
||||||
|
The main CLI (`main.py`) uses Click with a `--data-dir` option (default: `data/rs/`):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Dump listings from Rightmove API
|
||||||
|
python main.py dump-listings --type rent --min-price 2000 --max-price 4000 --min-bedrooms 2
|
||||||
|
|
||||||
|
# Download floorplan images
|
||||||
|
python main.py dump-images
|
||||||
|
|
||||||
|
# Extract square meters from floorplans using OCR
|
||||||
|
python main.py detect-floorplan
|
||||||
|
|
||||||
|
# Calculate transit routes (consumes Google Maps API calls)
|
||||||
|
python main.py routing --destination-address 'Address' -m transit -l 10
|
||||||
|
|
||||||
|
# Export to GeoJSON for visualization
|
||||||
|
python main.py export-immoweb -O output.js --type rent [filter options]
|
||||||
|
```
|
||||||
|
|
||||||
|
### Testing
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Run tests with coverage
|
||||||
|
pytest tests/ -v --cov=. --cov-report=term-missing
|
||||||
|
|
||||||
|
# Run type checker
|
||||||
|
mypy .
|
||||||
|
```
|
||||||
|
|
||||||
|
### Database Migrations
|
||||||
|
|
||||||
|
```bash
|
||||||
|
alembic upgrade head # Apply migrations
|
||||||
|
alembic revision -m "description" # Create new migration
|
||||||
|
```
|
||||||
|
|
||||||
|
### Code Formatting
|
||||||
|
|
||||||
|
```bash
|
||||||
|
yapf --style .style.yapf --recursive .
|
||||||
|
```
|
||||||
|
|
||||||
|
## Architecture
|
||||||
|
|
||||||
|
### Core Data Flow
|
||||||
|
|
||||||
|
1. **Scraping** (`rec/query.py`): Fetches listing IDs and details from Rightmove's Android API
|
||||||
|
2. **Processing** (`listing_processor.py`): Pipeline with steps for fetching details, downloading images, and OCR detection
|
||||||
|
3. **Storage**: SQLModel/SQLAlchemy with MySQL or SQLite, plus JSON files in `data/rs/<listing_id>/`
|
||||||
|
4. **API** (`api/app.py`): FastAPI endpoints authenticated via JWT from external Authentik service
|
||||||
|
5. **Background Tasks** (`tasks/listing_tasks.py`): Celery tasks for async listing processing with Redis broker
|
||||||
|
|
||||||
|
### Key Models
|
||||||
|
|
||||||
|
- `models/listing.py`: SQLModel entities (`RentListing`, `BuyListing`) with `QueryParameters` for filtering
|
||||||
|
- `data_access.py`: **DEPRECATED** - Legacy `Listing` dataclass for filesystem-based data access. Use `models.listing.RentListing` or `models.listing.BuyListing` instead.
|
||||||
|
|
||||||
|
### Services Layer (Unified CLI and API)
|
||||||
|
|
||||||
|
**IMPORTANT**: The `services/` directory contains unified handler functions that both the CLI and HTTP API use. This ensures consistency and code reuse.
|
||||||
|
|
||||||
|
#### High-level services (use these in CLI and API):
|
||||||
|
- **`listing_service.py`**: Listing operations
|
||||||
|
- `get_listings()` - Retrieve listings from database
|
||||||
|
- `refresh_listings()` - Fetch new listings from Rightmove (sync or async)
|
||||||
|
- `download_images()` - Download floorplan images
|
||||||
|
- `detect_floorplans()` - Run OCR on floorplans
|
||||||
|
- `calculate_routes()` - Calculate transit routes
|
||||||
|
|
||||||
|
- **`export_service.py`**: Export operations
|
||||||
|
- `export_to_csv()` - Export listings to CSV file
|
||||||
|
- `export_to_geojson()` - Export listings to GeoJSON (file or in-memory)
|
||||||
|
|
||||||
|
- **`district_service.py`**: District management
|
||||||
|
- `get_all_districts()` - Get district name → region ID mapping
|
||||||
|
- `get_district_names()` - Get list of district names
|
||||||
|
- `validate_districts()` - Validate district names
|
||||||
|
|
||||||
|
- **`task_service.py`**: Background task management
|
||||||
|
- `get_task_status()` - Get Celery task status
|
||||||
|
- `get_user_tasks()` - Get all tasks for a user
|
||||||
|
- `add_task_for_user()` - Associate task with user
|
||||||
|
|
||||||
|
#### Low-level services (internal implementation):
|
||||||
|
- `listing_fetcher.py`: Fetches listing data from Rightmove API
|
||||||
|
- `image_fetcher.py`: Downloads floorplan images
|
||||||
|
- `floorplan_detector.py`: OCR-based square meter detection
|
||||||
|
- `route_calculator.py`: Calculates transit routes using Google Maps API
|
||||||
|
- `query_splitter.py`: Intelligent query splitting to maximize data extraction
|
||||||
|
|
||||||
|
### Query Splitting System
|
||||||
|
|
||||||
|
Rightmove's API caps search results at ~1,500 listings per query. The query splitting system works around this limitation to fetch **all matching listings**.
|
||||||
|
|
||||||
|
#### How it works:
|
||||||
|
|
||||||
|
1. **Initial Split**: Queries are split by district and bedroom count
|
||||||
|
2. **Probe**: Each subquery is probed (minimal API request) to get `totalAvailableResults`
|
||||||
|
3. **Adaptive Split**: If results exceed threshold (1,200), the price range is binary-split
|
||||||
|
4. **Recursive Refinement**: Splitting continues until all subqueries are under threshold
|
||||||
|
5. **Full Fetch**: Each subquery fetches up to 60 pages (1,500 results max)
|
||||||
|
|
||||||
|
```
|
||||||
|
Original: 2BR, £1000-£5000 → 3,000 results (over cap!)
|
||||||
|
↓ split by price
|
||||||
|
£1000-£3000: 1,800 (still over!) | £3000-£5000: 1,200 ✓
|
||||||
|
↓ split again
|
||||||
|
£1000-£2000: 900 ✓ | £2000-£3000: 900 ✓
|
||||||
|
|
||||||
|
Final: 3 subqueries → 900 + 900 + 1,200 = 3,000 total results ✓
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Key components:
|
||||||
|
- `config/scraper_config.py`: Configuration with env var loading
|
||||||
|
- `services/query_splitter.py`: `QuerySplitter` class with `SubQuery` dataclass
|
||||||
|
- `rec/query.py`: `probe_query()` for result count probing, `create_session()` for connection pooling
|
||||||
|
|
||||||
|
### Processing Pipeline
|
||||||
|
|
||||||
|
`ListingProcessor` runs sequential steps defined in `listing_processor.py`:
|
||||||
|
1. `FetchListingDetailsStep` - Get property details from API
|
||||||
|
2. `FetchImagesStep` - Download floorplan images
|
||||||
|
3. `DetectFloorplanStep` - OCR to extract square meters from floorplans
|
||||||
|
|
||||||
|
### Floorplan OCR
|
||||||
|
|
||||||
|
`rec/floorplan.py` uses pytesseract with image preprocessing (adaptive thresholding) to extract square meter values from floorplan images.
|
||||||
|
|
||||||
|
### Repository Pattern
|
||||||
|
|
||||||
|
`repositories/listing_repository.py` handles database operations with SQLModel sessions.
|
||||||
|
|
||||||
|
## Environment Variables
|
||||||
|
|
||||||
|
- `DB_CONNECTION_STRING`: Database URL (SQLite default: `sqlite:///data/wrongmove.db`)
|
||||||
|
- `CELERY_BROKER_URL` / `CELERY_RESULT_BACKEND`: Redis URLs
|
||||||
|
- `ROUTING_API_KEY`: Google Maps API key for transit routing
|
||||||
|
|
||||||
|
### Scraper Configuration
|
||||||
|
|
||||||
|
These control the query splitting behavior (see `.env.sample` for defaults):
|
||||||
|
|
||||||
|
| Variable | Default | Description |
|
||||||
|
|----------|---------|-------------|
|
||||||
|
| `RIGHTMOVE_MAX_CONCURRENT` | 5 | Max concurrent HTTP requests |
|
||||||
|
| `RIGHTMOVE_REQUEST_DELAY_MS` | 100 | Delay between requests (ms) |
|
||||||
|
| `RIGHTMOVE_SPLIT_THRESHOLD` | 1200 | Split query when results exceed this |
|
||||||
|
| `RIGHTMOVE_MIN_PRICE_BAND` | 100 | Minimum price band width (won't split below) |
|
||||||
|
| `RIGHTMOVE_MAX_PAGES` | 60 | Max pages per subquery (60 × 25 = 1500) |
|
||||||
|
| `RIGHTMOVE_PROXY_URL` | - | SOCKS proxy URL (e.g., `socks5://localhost:9050` for Tor) |
|
||||||
|
|
||||||
|
## Project Structure
|
||||||
|
|
||||||
|
- `main.py`: CLI entry point
|
||||||
|
- `api/`: FastAPI application with auth middleware
|
||||||
|
- `config/`: Configuration modules (scraper settings, scheduled tasks)
|
||||||
|
- `models/`: SQLModel database entities
|
||||||
|
- `repositories/`: Database access layer
|
||||||
|
- `rec/`: Core business logic (query, floorplan OCR, routing, districts)
|
||||||
|
- `services/`: Service layer modules (listing_fetcher, image_fetcher, floorplan_detector, route_calculator, query_splitter)
|
||||||
|
- `tasks/`: Celery background tasks
|
||||||
|
- `frontend/`: React/Vite frontend with Caddy proxy
|
||||||
|
- `alembic/`: Database migrations
|
||||||
|
- `tests/`: Test suite (unit and integration tests)
|
||||||
|
|
||||||
|
## Type Checking
|
||||||
|
|
||||||
|
The project uses strict mypy configuration with `disallow_untyped_defs=true`. Run `mypy .` to check types.
|
||||||
|
|
||||||
|
## Exploration Preferences
|
||||||
|
|
||||||
|
- Always ignore `node_modules` directory when exploring the codebase
|
||||||
|
|
||||||
|
## Git Workflow
|
||||||
|
|
||||||
|
**IMPORTANT**: After completing work items, always create separate commits for each logical change:
|
||||||
|
- Keep each commit focused on one feature/fix
|
||||||
|
- Do not include unrelated files
|
||||||
|
- Use descriptive commit messages
|
||||||
|
- Group related files together (e.g., tests with the code they test)
|
||||||
|
|
||||||
|
|
@ -18,6 +18,11 @@ from fastapi.middleware.cors import CORSMiddleware
|
||||||
from ui_exporter import convert_to_geojson_feature, convert_row_to_geojson
|
from ui_exporter import convert_to_geojson_feature, convert_row_to_geojson
|
||||||
|
|
||||||
from services import listing_service, export_service, district_service, task_service
|
from services import listing_service, export_service, district_service, task_service
|
||||||
|
from services.listing_cache import (
|
||||||
|
get_cached_count,
|
||||||
|
get_cached_features,
|
||||||
|
cache_features_batch,
|
||||||
|
)
|
||||||
from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
|
from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
|
||||||
from api.metrics import metrics_app
|
from api.metrics import metrics_app
|
||||||
from opentelemetry.metrics import get_meter
|
from opentelemetry.metrics import get_meter
|
||||||
|
|
@ -101,7 +106,7 @@ async def get_listing(
|
||||||
async def get_listing_geojson(
|
async def get_listing_geojson(
|
||||||
user: Annotated[User, Depends(get_current_user)],
|
user: Annotated[User, Depends(get_current_user)],
|
||||||
query_parameters: Annotated[QueryParameters, Depends(get_query_parameters)],
|
query_parameters: Annotated[QueryParameters, Depends(get_query_parameters)],
|
||||||
limit: int = 1000, # Default limit to prevent timeout
|
limit: int | None = None,
|
||||||
) -> dict:
|
) -> dict:
|
||||||
"""Get listings as GeoJSON for map display."""
|
"""Get listings as GeoJSON for map display."""
|
||||||
repository = ListingRepository(engine)
|
repository = ListingRepository(engine)
|
||||||
|
|
@ -118,7 +123,7 @@ async def stream_listing_geojson(
|
||||||
user: Annotated[User, Depends(get_current_user)],
|
user: Annotated[User, Depends(get_current_user)],
|
||||||
query_parameters: Annotated[QueryParameters, Depends(get_query_parameters)],
|
query_parameters: Annotated[QueryParameters, Depends(get_query_parameters)],
|
||||||
batch_size: int = 50,
|
batch_size: int = 50,
|
||||||
limit: int = 1000,
|
limit: int | None = None,
|
||||||
) -> StreamingResponse:
|
) -> StreamingResponse:
|
||||||
"""Stream listings as NDJSON for progressive map loading.
|
"""Stream listings as NDJSON for progressive map loading.
|
||||||
|
|
||||||
|
|
@ -128,6 +133,32 @@ async def stream_listing_geojson(
|
||||||
- complete: Final message with total count
|
- complete: Final message with total count
|
||||||
"""
|
"""
|
||||||
async def generate():
|
async def generate():
|
||||||
|
# Check cache first
|
||||||
|
cached_count = get_cached_count(query_parameters)
|
||||||
|
|
||||||
|
if cached_count is not None and cached_count > 0:
|
||||||
|
# Cache HIT
|
||||||
|
effective_total = min(limit, cached_count) if limit else cached_count
|
||||||
|
|
||||||
|
yield json.dumps({
|
||||||
|
"type": "metadata",
|
||||||
|
"batch_size": batch_size,
|
||||||
|
"total_expected": effective_total,
|
||||||
|
"cached": True,
|
||||||
|
}) + "\n"
|
||||||
|
|
||||||
|
count = 0
|
||||||
|
for feature_batch in get_cached_features(query_parameters, batch_size=batch_size):
|
||||||
|
if limit and count + len(feature_batch) > limit:
|
||||||
|
feature_batch = feature_batch[:limit - count]
|
||||||
|
count += len(feature_batch)
|
||||||
|
yield json.dumps({"type": "batch", "features": feature_batch}) + "\n"
|
||||||
|
if limit and count >= limit:
|
||||||
|
break
|
||||||
|
|
||||||
|
yield json.dumps({"type": "complete", "total": count}) + "\n"
|
||||||
|
else:
|
||||||
|
# Cache MISS - query DB and populate cache
|
||||||
repository = ListingRepository(engine)
|
repository = ListingRepository(engine)
|
||||||
|
|
||||||
# Phase 1: Fast count for progress estimation
|
# Phase 1: Fast count for progress estimation
|
||||||
|
|
@ -138,6 +169,7 @@ async def stream_listing_geojson(
|
||||||
"type": "metadata",
|
"type": "metadata",
|
||||||
"batch_size": batch_size,
|
"batch_size": batch_size,
|
||||||
"total_expected": effective_total,
|
"total_expected": effective_total,
|
||||||
|
"cached": False,
|
||||||
}) + "\n"
|
}) + "\n"
|
||||||
|
|
||||||
# Phase 2: Stream with column projection and keyset pagination
|
# Phase 2: Stream with column projection and keyset pagination
|
||||||
|
|
@ -151,11 +183,13 @@ async def stream_listing_geojson(
|
||||||
count += 1
|
count += 1
|
||||||
|
|
||||||
if len(batch) >= batch_size:
|
if len(batch) >= batch_size:
|
||||||
|
cache_features_batch(query_parameters, batch)
|
||||||
yield json.dumps({"type": "batch", "features": batch}) + "\n"
|
yield json.dumps({"type": "batch", "features": batch}) + "\n"
|
||||||
batch = []
|
batch = []
|
||||||
|
|
||||||
# Send remaining
|
# Send remaining
|
||||||
if batch:
|
if batch:
|
||||||
|
cache_features_batch(query_parameters, batch)
|
||||||
yield json.dumps({"type": "batch", "features": batch}) + "\n"
|
yield json.dumps({"type": "batch", "features": batch}) + "\n"
|
||||||
|
|
||||||
# Final message
|
# Final message
|
||||||
|
|
@ -200,13 +234,19 @@ async def refresh_listings(
|
||||||
async def get_task_status(
|
async def get_task_status(
|
||||||
user: Annotated[User, Depends(get_current_user)],
|
user: Annotated[User, Depends(get_current_user)],
|
||||||
task_id: str,
|
task_id: str,
|
||||||
) -> dict[str, str]:
|
) -> dict[str, str | int | float | None]:
|
||||||
"""Get the status of a background task."""
|
"""Get the status of a background task."""
|
||||||
status = task_service.get_task_status(task_id)
|
status = task_service.get_task_status(task_id)
|
||||||
return {
|
return {
|
||||||
"task_id": status.task_id,
|
"task_id": status.task_id,
|
||||||
"status": status.status,
|
"status": status.status,
|
||||||
"result": json.dumps(status.result) if status.result else "",
|
"result": json.dumps(status.result) if status.result else None,
|
||||||
|
"progress": status.progress,
|
||||||
|
"processed": status.processed,
|
||||||
|
"total": status.total,
|
||||||
|
"message": status.message,
|
||||||
|
"error": status.error,
|
||||||
|
"traceback": status.traceback,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,5 @@
|
||||||
"""Configuration modules."""
|
"""Configuration modules."""
|
||||||
from config.schedule_config import ScheduleConfig, SchedulesConfig
|
from config.schedule_config import ScheduleConfig, SchedulesConfig
|
||||||
|
from config.scraper_config import ScraperConfig
|
||||||
|
|
||||||
__all__ = ["ScheduleConfig", "SchedulesConfig"]
|
__all__ = ["ScheduleConfig", "SchedulesConfig", "ScraperConfig"]
|
||||||
|
|
|
||||||
89
crawler/config/scraper_config.py
Normal file
89
crawler/config/scraper_config.py
Normal file
|
|
@ -0,0 +1,89 @@
|
||||||
|
"""Scraper configuration with environment variable loading."""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import Self
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class ScraperConfig:
|
||||||
|
"""Configuration for the Rightmove scraper.
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
max_concurrent_requests: Maximum number of concurrent HTTP requests.
|
||||||
|
request_delay_ms: Delay between requests in milliseconds.
|
||||||
|
result_cap: Maximum results Rightmove returns per query (their limit).
|
||||||
|
split_threshold: When results exceed this, split the query further.
|
||||||
|
min_price_band: Minimum width of a price band (won't split below this).
|
||||||
|
max_pages_per_query: Maximum pages to fetch per subquery (60 * 25 = 1500).
|
||||||
|
proxy_url: Optional SOCKS proxy URL (e.g., socks5://localhost:9050 for Tor).
|
||||||
|
slow_response_threshold: Response time threshold in seconds for throttle detection.
|
||||||
|
enable_circuit_breaker: Whether to enable circuit breaker protection.
|
||||||
|
circuit_breaker_failure_threshold: Number of consecutive failures to open circuit.
|
||||||
|
circuit_breaker_recovery_timeout: Seconds to wait before testing recovery.
|
||||||
|
"""
|
||||||
|
|
||||||
|
max_concurrent_requests: int = 5
|
||||||
|
request_delay_ms: int = 100
|
||||||
|
result_cap: int = 1500
|
||||||
|
split_threshold: int = 1200 # Split when approaching cap
|
||||||
|
min_price_band: int = 100 # Minimum band width in currency units
|
||||||
|
max_pages_per_query: int = 60 # 60 * 25 = 1500 results max
|
||||||
|
proxy_url: str | None = None
|
||||||
|
slow_response_threshold: float = 10.0 # seconds
|
||||||
|
enable_circuit_breaker: bool = True
|
||||||
|
circuit_breaker_failure_threshold: int = 5
|
||||||
|
circuit_breaker_recovery_timeout: float = 60.0
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_env(cls) -> Self:
|
||||||
|
"""Load configuration from environment variables.
|
||||||
|
|
||||||
|
Environment variables:
|
||||||
|
RIGHTMOVE_MAX_CONCURRENT: Max concurrent requests (default: 5)
|
||||||
|
RIGHTMOVE_REQUEST_DELAY_MS: Request delay in ms (default: 100)
|
||||||
|
RIGHTMOVE_RESULT_CAP: Result cap per query (default: 1500)
|
||||||
|
RIGHTMOVE_SPLIT_THRESHOLD: Split threshold (default: 1200)
|
||||||
|
RIGHTMOVE_MIN_PRICE_BAND: Minimum price band width (default: 100)
|
||||||
|
RIGHTMOVE_MAX_PAGES: Max pages per query (default: 60)
|
||||||
|
RIGHTMOVE_PROXY_URL: SOCKS proxy URL (default: None)
|
||||||
|
RIGHTMOVE_SLOW_RESPONSE_THRESHOLD: Slow response threshold in seconds (default: 10.0)
|
||||||
|
RIGHTMOVE_ENABLE_CIRCUIT_BREAKER: Enable circuit breaker (default: True)
|
||||||
|
RIGHTMOVE_CIRCUIT_BREAKER_FAILURES: Failures to open circuit (default: 5)
|
||||||
|
RIGHTMOVE_CIRCUIT_BREAKER_TIMEOUT: Recovery timeout in seconds (default: 60.0)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
ScraperConfig instance with values from environment or defaults.
|
||||||
|
"""
|
||||||
|
return cls(
|
||||||
|
max_concurrent_requests=int(
|
||||||
|
os.environ.get("RIGHTMOVE_MAX_CONCURRENT", "5")
|
||||||
|
),
|
||||||
|
request_delay_ms=int(
|
||||||
|
os.environ.get("RIGHTMOVE_REQUEST_DELAY_MS", "100")
|
||||||
|
),
|
||||||
|
result_cap=int(os.environ.get("RIGHTMOVE_RESULT_CAP", "1500")),
|
||||||
|
split_threshold=int(
|
||||||
|
os.environ.get("RIGHTMOVE_SPLIT_THRESHOLD", "1200")
|
||||||
|
),
|
||||||
|
min_price_band=int(
|
||||||
|
os.environ.get("RIGHTMOVE_MIN_PRICE_BAND", "100")
|
||||||
|
),
|
||||||
|
max_pages_per_query=int(
|
||||||
|
os.environ.get("RIGHTMOVE_MAX_PAGES", "60")
|
||||||
|
),
|
||||||
|
proxy_url=os.environ.get("RIGHTMOVE_PROXY_URL") or None,
|
||||||
|
slow_response_threshold=float(
|
||||||
|
os.environ.get("RIGHTMOVE_SLOW_RESPONSE_THRESHOLD", "10.0")
|
||||||
|
),
|
||||||
|
enable_circuit_breaker=os.environ.get(
|
||||||
|
"RIGHTMOVE_ENABLE_CIRCUIT_BREAKER", "true"
|
||||||
|
).lower() in ("true", "1", "yes"),
|
||||||
|
circuit_breaker_failure_threshold=int(
|
||||||
|
os.environ.get("RIGHTMOVE_CIRCUIT_BREAKER_FAILURES", "5")
|
||||||
|
),
|
||||||
|
circuit_breaker_recovery_timeout=float(
|
||||||
|
os.environ.get("RIGHTMOVE_CIRCUIT_BREAKER_TIMEOUT", "60.0")
|
||||||
|
),
|
||||||
|
)
|
||||||
252
crawler/frontend/package-lock.json
generated
252
crawler/frontend/package-lock.json
generated
|
|
@ -9,7 +9,9 @@
|
||||||
"version": "0.0.0",
|
"version": "0.0.0",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@hookform/resolvers": "^5.1.1",
|
"@hookform/resolvers": "^5.1.1",
|
||||||
|
"@radix-ui/react-accordion": "^1.2.12",
|
||||||
"@radix-ui/react-alert-dialog": "^1.1.14",
|
"@radix-ui/react-alert-dialog": "^1.1.14",
|
||||||
|
"@radix-ui/react-checkbox": "^1.3.3",
|
||||||
"@radix-ui/react-dialog": "^1.1.14",
|
"@radix-ui/react-dialog": "^1.1.14",
|
||||||
"@radix-ui/react-hover-card": "^1.1.14",
|
"@radix-ui/react-hover-card": "^1.1.14",
|
||||||
"@radix-ui/react-label": "^2.1.7",
|
"@radix-ui/react-label": "^2.1.7",
|
||||||
|
|
@ -18,6 +20,7 @@
|
||||||
"@radix-ui/react-scroll-area": "^1.2.9",
|
"@radix-ui/react-scroll-area": "^1.2.9",
|
||||||
"@radix-ui/react-select": "^2.2.5",
|
"@radix-ui/react-select": "^2.2.5",
|
||||||
"@radix-ui/react-separator": "^1.1.7",
|
"@radix-ui/react-separator": "^1.1.7",
|
||||||
|
"@radix-ui/react-slider": "^1.3.6",
|
||||||
"@radix-ui/react-slot": "^1.2.3",
|
"@radix-ui/react-slot": "^1.2.3",
|
||||||
"@radix-ui/react-tooltip": "^1.2.7",
|
"@radix-ui/react-tooltip": "^1.2.7",
|
||||||
"@tabler/icons-react": "^3.34.0",
|
"@tabler/icons-react": "^3.34.0",
|
||||||
|
|
@ -40,7 +43,7 @@
|
||||||
"react-dom": "^19.1.0",
|
"react-dom": "^19.1.0",
|
||||||
"react-hook-form": "^7.58.1",
|
"react-hook-form": "^7.58.1",
|
||||||
"react-oidc-context": "^3.3.0",
|
"react-oidc-context": "^3.3.0",
|
||||||
"rivets": "^0.9.6",
|
"react-virtuoso": "^4.18.1",
|
||||||
"tailwind-merge": "^3.3.1",
|
"tailwind-merge": "^3.3.1",
|
||||||
"tailwindcss": "^4.1.10",
|
"tailwindcss": "^4.1.10",
|
||||||
"zod": "^3.25.67"
|
"zod": "^3.25.67"
|
||||||
|
|
@ -50,7 +53,6 @@
|
||||||
"@types/node": "^24.0.1",
|
"@types/node": "^24.0.1",
|
||||||
"@types/react": "^19.1.2",
|
"@types/react": "^19.1.2",
|
||||||
"@types/react-dom": "^19.1.2",
|
"@types/react-dom": "^19.1.2",
|
||||||
"@types/rivets": "^0.9.5",
|
|
||||||
"@vitejs/plugin-react-swc": "^3.9.0",
|
"@vitejs/plugin-react-swc": "^3.9.0",
|
||||||
"eslint": "^9.25.0",
|
"eslint": "^9.25.0",
|
||||||
"eslint-plugin-react-hooks": "^5.2.0",
|
"eslint-plugin-react-hooks": "^5.2.0",
|
||||||
|
|
@ -924,6 +926,43 @@
|
||||||
"integrity": "sha512-XnbHrrprsNqZKQhStrSwgRUQzoCI1glLzdw79xiZPoofhGICeZRSQ3dIxAKH1gb3OHfNf4d6f+vAv3kil2eggA==",
|
"integrity": "sha512-XnbHrrprsNqZKQhStrSwgRUQzoCI1glLzdw79xiZPoofhGICeZRSQ3dIxAKH1gb3OHfNf4d6f+vAv3kil2eggA==",
|
||||||
"license": "MIT"
|
"license": "MIT"
|
||||||
},
|
},
|
||||||
|
"node_modules/@radix-ui/react-accordion": {
|
||||||
|
"version": "1.2.12",
|
||||||
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-accordion/-/react-accordion-1.2.12.tgz",
|
||||||
|
"integrity": "sha512-T4nygeh9YE9dLRPhAHSeOZi7HBXo+0kYIPJXayZfvWOWA0+n3dESrZbjfDPUABkUNym6Hd+f2IR113To8D2GPA==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"@radix-ui/primitive": "1.1.3",
|
||||||
|
"@radix-ui/react-collapsible": "1.1.12",
|
||||||
|
"@radix-ui/react-collection": "1.1.7",
|
||||||
|
"@radix-ui/react-compose-refs": "1.1.2",
|
||||||
|
"@radix-ui/react-context": "1.1.2",
|
||||||
|
"@radix-ui/react-direction": "1.1.1",
|
||||||
|
"@radix-ui/react-id": "1.1.1",
|
||||||
|
"@radix-ui/react-primitive": "2.1.3",
|
||||||
|
"@radix-ui/react-use-controllable-state": "1.2.2"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"@types/react": "*",
|
||||||
|
"@types/react-dom": "*",
|
||||||
|
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
|
||||||
|
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
||||||
|
},
|
||||||
|
"peerDependenciesMeta": {
|
||||||
|
"@types/react": {
|
||||||
|
"optional": true
|
||||||
|
},
|
||||||
|
"@types/react-dom": {
|
||||||
|
"optional": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@radix-ui/react-accordion/node_modules/@radix-ui/primitive": {
|
||||||
|
"version": "1.1.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/@radix-ui/primitive/-/primitive-1.1.3.tgz",
|
||||||
|
"integrity": "sha512-JTF99U/6XIjCBo0wqkU5sK10glYe27MRRsfwoiq5zzOEZLHU3A3KCMa5X/azekYRCJ0HlwI0crAXS/5dEHTzDg==",
|
||||||
|
"license": "MIT"
|
||||||
|
},
|
||||||
"node_modules/@radix-ui/react-alert-dialog": {
|
"node_modules/@radix-ui/react-alert-dialog": {
|
||||||
"version": "1.1.14",
|
"version": "1.1.14",
|
||||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-alert-dialog/-/react-alert-dialog-1.1.14.tgz",
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-alert-dialog/-/react-alert-dialog-1.1.14.tgz",
|
||||||
|
|
@ -975,6 +1014,126 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/@radix-ui/react-checkbox": {
|
||||||
|
"version": "1.3.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-checkbox/-/react-checkbox-1.3.3.tgz",
|
||||||
|
"integrity": "sha512-wBbpv+NQftHDdG86Qc0pIyXk5IR3tM8Vd0nWLKDcX8nNn4nXFOFwsKuqw2okA/1D/mpaAkmuyndrPJTYDNZtFw==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"@radix-ui/primitive": "1.1.3",
|
||||||
|
"@radix-ui/react-compose-refs": "1.1.2",
|
||||||
|
"@radix-ui/react-context": "1.1.2",
|
||||||
|
"@radix-ui/react-presence": "1.1.5",
|
||||||
|
"@radix-ui/react-primitive": "2.1.3",
|
||||||
|
"@radix-ui/react-use-controllable-state": "1.2.2",
|
||||||
|
"@radix-ui/react-use-previous": "1.1.1",
|
||||||
|
"@radix-ui/react-use-size": "1.1.1"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"@types/react": "*",
|
||||||
|
"@types/react-dom": "*",
|
||||||
|
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
|
||||||
|
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
||||||
|
},
|
||||||
|
"peerDependenciesMeta": {
|
||||||
|
"@types/react": {
|
||||||
|
"optional": true
|
||||||
|
},
|
||||||
|
"@types/react-dom": {
|
||||||
|
"optional": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@radix-ui/react-checkbox/node_modules/@radix-ui/primitive": {
|
||||||
|
"version": "1.1.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/@radix-ui/primitive/-/primitive-1.1.3.tgz",
|
||||||
|
"integrity": "sha512-JTF99U/6XIjCBo0wqkU5sK10glYe27MRRsfwoiq5zzOEZLHU3A3KCMa5X/azekYRCJ0HlwI0crAXS/5dEHTzDg==",
|
||||||
|
"license": "MIT"
|
||||||
|
},
|
||||||
|
"node_modules/@radix-ui/react-checkbox/node_modules/@radix-ui/react-presence": {
|
||||||
|
"version": "1.1.5",
|
||||||
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-presence/-/react-presence-1.1.5.tgz",
|
||||||
|
"integrity": "sha512-/jfEwNDdQVBCNvjkGit4h6pMOzq8bHkopq458dPt2lMjx+eBQUohZNG9A7DtO/O5ukSbxuaNGXMjHicgwy6rQQ==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"@radix-ui/react-compose-refs": "1.1.2",
|
||||||
|
"@radix-ui/react-use-layout-effect": "1.1.1"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"@types/react": "*",
|
||||||
|
"@types/react-dom": "*",
|
||||||
|
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
|
||||||
|
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
||||||
|
},
|
||||||
|
"peerDependenciesMeta": {
|
||||||
|
"@types/react": {
|
||||||
|
"optional": true
|
||||||
|
},
|
||||||
|
"@types/react-dom": {
|
||||||
|
"optional": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@radix-ui/react-collapsible": {
|
||||||
|
"version": "1.1.12",
|
||||||
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-collapsible/-/react-collapsible-1.1.12.tgz",
|
||||||
|
"integrity": "sha512-Uu+mSh4agx2ib1uIGPP4/CKNULyajb3p92LsVXmH2EHVMTfZWpll88XJ0j4W0z3f8NK1eYl1+Mf/szHPmcHzyA==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"@radix-ui/primitive": "1.1.3",
|
||||||
|
"@radix-ui/react-compose-refs": "1.1.2",
|
||||||
|
"@radix-ui/react-context": "1.1.2",
|
||||||
|
"@radix-ui/react-id": "1.1.1",
|
||||||
|
"@radix-ui/react-presence": "1.1.5",
|
||||||
|
"@radix-ui/react-primitive": "2.1.3",
|
||||||
|
"@radix-ui/react-use-controllable-state": "1.2.2",
|
||||||
|
"@radix-ui/react-use-layout-effect": "1.1.1"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"@types/react": "*",
|
||||||
|
"@types/react-dom": "*",
|
||||||
|
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
|
||||||
|
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
||||||
|
},
|
||||||
|
"peerDependenciesMeta": {
|
||||||
|
"@types/react": {
|
||||||
|
"optional": true
|
||||||
|
},
|
||||||
|
"@types/react-dom": {
|
||||||
|
"optional": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@radix-ui/react-collapsible/node_modules/@radix-ui/primitive": {
|
||||||
|
"version": "1.1.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/@radix-ui/primitive/-/primitive-1.1.3.tgz",
|
||||||
|
"integrity": "sha512-JTF99U/6XIjCBo0wqkU5sK10glYe27MRRsfwoiq5zzOEZLHU3A3KCMa5X/azekYRCJ0HlwI0crAXS/5dEHTzDg==",
|
||||||
|
"license": "MIT"
|
||||||
|
},
|
||||||
|
"node_modules/@radix-ui/react-collapsible/node_modules/@radix-ui/react-presence": {
|
||||||
|
"version": "1.1.5",
|
||||||
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-presence/-/react-presence-1.1.5.tgz",
|
||||||
|
"integrity": "sha512-/jfEwNDdQVBCNvjkGit4h6pMOzq8bHkopq458dPt2lMjx+eBQUohZNG9A7DtO/O5ukSbxuaNGXMjHicgwy6rQQ==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"@radix-ui/react-compose-refs": "1.1.2",
|
||||||
|
"@radix-ui/react-use-layout-effect": "1.1.1"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"@types/react": "*",
|
||||||
|
"@types/react-dom": "*",
|
||||||
|
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
|
||||||
|
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
||||||
|
},
|
||||||
|
"peerDependenciesMeta": {
|
||||||
|
"@types/react": {
|
||||||
|
"optional": true
|
||||||
|
},
|
||||||
|
"@types/react-dom": {
|
||||||
|
"optional": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/@radix-ui/react-collection": {
|
"node_modules/@radix-ui/react-collection": {
|
||||||
"version": "1.1.7",
|
"version": "1.1.7",
|
||||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-collection/-/react-collection-1.1.7.tgz",
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-collection/-/react-collection-1.1.7.tgz",
|
||||||
|
|
@ -1482,6 +1641,45 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/@radix-ui/react-slider": {
|
||||||
|
"version": "1.3.6",
|
||||||
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-slider/-/react-slider-1.3.6.tgz",
|
||||||
|
"integrity": "sha512-JPYb1GuM1bxfjMRlNLE+BcmBC8onfCi60Blk7OBqi2MLTFdS+8401U4uFjnwkOr49BLmXxLC6JHkvAsx5OJvHw==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"@radix-ui/number": "1.1.1",
|
||||||
|
"@radix-ui/primitive": "1.1.3",
|
||||||
|
"@radix-ui/react-collection": "1.1.7",
|
||||||
|
"@radix-ui/react-compose-refs": "1.1.2",
|
||||||
|
"@radix-ui/react-context": "1.1.2",
|
||||||
|
"@radix-ui/react-direction": "1.1.1",
|
||||||
|
"@radix-ui/react-primitive": "2.1.3",
|
||||||
|
"@radix-ui/react-use-controllable-state": "1.2.2",
|
||||||
|
"@radix-ui/react-use-layout-effect": "1.1.1",
|
||||||
|
"@radix-ui/react-use-previous": "1.1.1",
|
||||||
|
"@radix-ui/react-use-size": "1.1.1"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"@types/react": "*",
|
||||||
|
"@types/react-dom": "*",
|
||||||
|
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
|
||||||
|
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
||||||
|
},
|
||||||
|
"peerDependenciesMeta": {
|
||||||
|
"@types/react": {
|
||||||
|
"optional": true
|
||||||
|
},
|
||||||
|
"@types/react-dom": {
|
||||||
|
"optional": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@radix-ui/react-slider/node_modules/@radix-ui/primitive": {
|
||||||
|
"version": "1.1.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/@radix-ui/primitive/-/primitive-1.1.3.tgz",
|
||||||
|
"integrity": "sha512-JTF99U/6XIjCBo0wqkU5sK10glYe27MRRsfwoiq5zzOEZLHU3A3KCMa5X/azekYRCJ0HlwI0crAXS/5dEHTzDg==",
|
||||||
|
"license": "MIT"
|
||||||
|
},
|
||||||
"node_modules/@radix-ui/react-slot": {
|
"node_modules/@radix-ui/react-slot": {
|
||||||
"version": "1.2.3",
|
"version": "1.2.3",
|
||||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.3.tgz",
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.3.tgz",
|
||||||
|
|
@ -2773,16 +2971,6 @@
|
||||||
"@types/geojson": "*"
|
"@types/geojson": "*"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@types/jquery": {
|
|
||||||
"version": "3.5.32",
|
|
||||||
"resolved": "https://registry.npmjs.org/@types/jquery/-/jquery-3.5.32.tgz",
|
|
||||||
"integrity": "sha512-b9Xbf4CkMqS02YH8zACqN1xzdxc3cO735Qe5AbSUFmyOiaWAbcpqh9Wna+Uk0vgACvoQHpWDg2rGdHkYPLmCiQ==",
|
|
||||||
"dev": true,
|
|
||||||
"license": "MIT",
|
|
||||||
"dependencies": {
|
|
||||||
"@types/sizzle": "*"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/@types/json-schema": {
|
"node_modules/@types/json-schema": {
|
||||||
"version": "7.0.15",
|
"version": "7.0.15",
|
||||||
"resolved": "https://registry.npmjs.org/@types/json-schema/-/json-schema-7.0.15.tgz",
|
"resolved": "https://registry.npmjs.org/@types/json-schema/-/json-schema-7.0.15.tgz",
|
||||||
|
|
@ -2852,23 +3040,6 @@
|
||||||
"@types/react": "^19.0.0"
|
"@types/react": "^19.0.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@types/rivets": {
|
|
||||||
"version": "0.9.5",
|
|
||||||
"resolved": "https://registry.npmjs.org/@types/rivets/-/rivets-0.9.5.tgz",
|
|
||||||
"integrity": "sha512-spCtZoSOrS8kNTJNOXamCCQurqOdF1Piak8bUQVqHQNRoTLoID6O6xVX41P5W2vvlxc9UpSG75zl4CRra0l3Eg==",
|
|
||||||
"dev": true,
|
|
||||||
"license": "MIT",
|
|
||||||
"dependencies": {
|
|
||||||
"@types/jquery": "*"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/@types/sizzle": {
|
|
||||||
"version": "2.3.9",
|
|
||||||
"resolved": "https://registry.npmjs.org/@types/sizzle/-/sizzle-2.3.9.tgz",
|
|
||||||
"integrity": "sha512-xzLEyKB50yqCUPUJkIsrVvoWNfFUbIZI+RspLWt8u+tIW/BetMBZtgV2LY/2o+tYH8dRvQ+eoPf3NdhQCcLE2w==",
|
|
||||||
"dev": true,
|
|
||||||
"license": "MIT"
|
|
||||||
},
|
|
||||||
"node_modules/@types/supercluster": {
|
"node_modules/@types/supercluster": {
|
||||||
"version": "7.1.3",
|
"version": "7.1.3",
|
||||||
"resolved": "https://registry.npmjs.org/@types/supercluster/-/supercluster-7.1.3.tgz",
|
"resolved": "https://registry.npmjs.org/@types/supercluster/-/supercluster-7.1.3.tgz",
|
||||||
|
|
@ -5348,6 +5519,16 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/react-virtuoso": {
|
||||||
|
"version": "4.18.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/react-virtuoso/-/react-virtuoso-4.18.1.tgz",
|
||||||
|
"integrity": "sha512-KF474cDwaSb9+SJ380xruBB4P+yGWcVkcu26HtMqYNMTYlYbrNy8vqMkE+GpAApPPufJqgOLMoWMFG/3pJMXUA==",
|
||||||
|
"license": "MIT",
|
||||||
|
"peerDependencies": {
|
||||||
|
"react": ">=16 || >=17 || >= 18 || >= 19",
|
||||||
|
"react-dom": ">=16 || >=17 || >= 18 || >=19"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/resolve-from": {
|
"node_modules/resolve-from": {
|
||||||
"version": "4.0.0",
|
"version": "4.0.0",
|
||||||
"resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-4.0.0.tgz",
|
"resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-4.0.0.tgz",
|
||||||
|
|
@ -5378,14 +5559,6 @@
|
||||||
"node": ">=0.10.0"
|
"node": ">=0.10.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/rivets": {
|
|
||||||
"version": "0.9.6",
|
|
||||||
"resolved": "https://registry.npmjs.org/rivets/-/rivets-0.9.6.tgz",
|
|
||||||
"integrity": "sha512-KfdMjLRWw4+38ej9bRXegKZVfYo0jEacwadA5z6NTKya+YohwGemwdbxvJ52WCXODkTnR4Q8UmUC6HVxsdzkxA==",
|
|
||||||
"dependencies": {
|
|
||||||
"sightglass": "~0.2.4"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/robust-predicates": {
|
"node_modules/robust-predicates": {
|
||||||
"version": "3.0.2",
|
"version": "3.0.2",
|
||||||
"resolved": "https://registry.npmjs.org/robust-predicates/-/robust-predicates-3.0.2.tgz",
|
"resolved": "https://registry.npmjs.org/robust-predicates/-/robust-predicates-3.0.2.tgz",
|
||||||
|
|
@ -5524,11 +5697,6 @@
|
||||||
"node": ">=8"
|
"node": ">=8"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/sightglass": {
|
|
||||||
"version": "0.2.6",
|
|
||||||
"resolved": "https://registry.npmjs.org/sightglass/-/sightglass-0.2.6.tgz",
|
|
||||||
"integrity": "sha512-t1fgbuhURcWc8VgZk8kJQ3QmmZk3kghDcf0wpsN8I8RaV05IUkc2b195KpGqgocKT/q8+vKk6EcB2c7N2lAd6A=="
|
|
||||||
},
|
|
||||||
"node_modules/source-map-js": {
|
"node_modules/source-map-js": {
|
||||||
"version": "1.2.1",
|
"version": "1.2.1",
|
||||||
"resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.1.tgz",
|
"resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.1.tgz",
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,9 @@
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@hookform/resolvers": "^5.1.1",
|
"@hookform/resolvers": "^5.1.1",
|
||||||
|
"@radix-ui/react-accordion": "^1.2.12",
|
||||||
"@radix-ui/react-alert-dialog": "^1.1.14",
|
"@radix-ui/react-alert-dialog": "^1.1.14",
|
||||||
|
"@radix-ui/react-checkbox": "^1.3.3",
|
||||||
"@radix-ui/react-dialog": "^1.1.14",
|
"@radix-ui/react-dialog": "^1.1.14",
|
||||||
"@radix-ui/react-hover-card": "^1.1.14",
|
"@radix-ui/react-hover-card": "^1.1.14",
|
||||||
"@radix-ui/react-label": "^2.1.7",
|
"@radix-ui/react-label": "^2.1.7",
|
||||||
|
|
@ -20,6 +22,7 @@
|
||||||
"@radix-ui/react-scroll-area": "^1.2.9",
|
"@radix-ui/react-scroll-area": "^1.2.9",
|
||||||
"@radix-ui/react-select": "^2.2.5",
|
"@radix-ui/react-select": "^2.2.5",
|
||||||
"@radix-ui/react-separator": "^1.1.7",
|
"@radix-ui/react-separator": "^1.1.7",
|
||||||
|
"@radix-ui/react-slider": "^1.3.6",
|
||||||
"@radix-ui/react-slot": "^1.2.3",
|
"@radix-ui/react-slot": "^1.2.3",
|
||||||
"@radix-ui/react-tooltip": "^1.2.7",
|
"@radix-ui/react-tooltip": "^1.2.7",
|
||||||
"@tabler/icons-react": "^3.34.0",
|
"@tabler/icons-react": "^3.34.0",
|
||||||
|
|
@ -42,7 +45,7 @@
|
||||||
"react-dom": "^19.1.0",
|
"react-dom": "^19.1.0",
|
||||||
"react-hook-form": "^7.58.1",
|
"react-hook-form": "^7.58.1",
|
||||||
"react-oidc-context": "^3.3.0",
|
"react-oidc-context": "^3.3.0",
|
||||||
"rivets": "^0.9.6",
|
"react-virtuoso": "^4.18.1",
|
||||||
"tailwind-merge": "^3.3.1",
|
"tailwind-merge": "^3.3.1",
|
||||||
"tailwindcss": "^4.1.10",
|
"tailwindcss": "^4.1.10",
|
||||||
"zod": "^3.25.67"
|
"zod": "^3.25.67"
|
||||||
|
|
@ -52,7 +55,6 @@
|
||||||
"@types/node": "^24.0.1",
|
"@types/node": "^24.0.1",
|
||||||
"@types/react": "^19.1.2",
|
"@types/react": "^19.1.2",
|
||||||
"@types/react-dom": "^19.1.2",
|
"@types/react-dom": "^19.1.2",
|
||||||
"@types/rivets": "^0.9.5",
|
|
||||||
"@vitejs/plugin-react-swc": "^3.9.0",
|
"@vitejs/plugin-react-swc": "^3.9.0",
|
||||||
"eslint": "^9.25.0",
|
"eslint": "^9.25.0",
|
||||||
"eslint-plugin-react-hooks": "^5.2.0",
|
"eslint-plugin-react-hooks": "^5.2.0",
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,10 @@
|
||||||
import type { User } from 'oidc-client-ts';
|
import type { User } from 'oidc-client-ts';
|
||||||
import { useEffect, useState, useRef, useCallback } from 'react';
|
import { useEffect, useState, useRef, useCallback } from 'react';
|
||||||
import './App.css';
|
import './App.css';
|
||||||
import { getUser, handleCallback } from './auth/authService';
|
import { getUser } from './auth/authService';
|
||||||
import AlertError from './components/AlertError';
|
import AlertError from './components/AlertError';
|
||||||
import LoginModal from './components/LoginModal';
|
import LoginModal from './components/LoginModal';
|
||||||
|
import AuthCallback from './components/AuthCallback';
|
||||||
import { Map } from './components/Map';
|
import { Map } from './components/Map';
|
||||||
import { FilterPanel, type ParameterValues, DEFAULT_FILTER_VALUES } from './components/FilterPanel';
|
import { FilterPanel, type ParameterValues, DEFAULT_FILTER_VALUES } from './components/FilterPanel';
|
||||||
import { Header } from './components/Header';
|
import { Header } from './components/Header';
|
||||||
|
|
@ -34,15 +35,12 @@ function App() {
|
||||||
// Ref to track if initial load has been triggered
|
// Ref to track if initial load has been triggered
|
||||||
const initialLoadTriggeredRef = useRef(false);
|
const initialLoadTriggeredRef = useRef(false);
|
||||||
|
|
||||||
useEffect(() => {
|
// Check if this is the callback route - render dedicated component
|
||||||
// Check if this is a callback from Authentik (after login)
|
|
||||||
if (window.location.pathname === '/callback') {
|
if (window.location.pathname === '/callback') {
|
||||||
handleCallback().then(() => {
|
return <AuthCallback />;
|
||||||
window.location.href = '/'; // Redirect to home after login
|
|
||||||
});
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
// Load user data
|
// Load user data
|
||||||
getUser().then(setUser);
|
getUser().then(setUser);
|
||||||
}, []);
|
}, []);
|
||||||
|
|
@ -69,16 +67,32 @@ function App() {
|
||||||
setStreamingProgress({ count: 0 });
|
setStreamingProgress({ count: 0 });
|
||||||
setListingData(null);
|
setListingData(null);
|
||||||
|
|
||||||
|
let updateScheduled = false;
|
||||||
|
|
||||||
|
const flushUpdate = () => {
|
||||||
|
updateScheduled = false;
|
||||||
|
setListingData({
|
||||||
|
type: 'FeatureCollection',
|
||||||
|
features: [...accumulatedFeaturesRef.current]
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
const scheduleUpdate = () => {
|
||||||
|
if (!updateScheduled) {
|
||||||
|
updateScheduled = true;
|
||||||
|
requestAnimationFrame(flushUpdate);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
try {
|
try {
|
||||||
for await (const batch of streamListingGeoJSON(user, parameters, (progress) => {
|
for await (const batch of streamListingGeoJSON(user, parameters, (progress) => {
|
||||||
setStreamingProgress(progress);
|
setStreamingProgress(progress);
|
||||||
})) {
|
})) {
|
||||||
accumulatedFeaturesRef.current.push(...batch);
|
accumulatedFeaturesRef.current.push(...batch);
|
||||||
setListingData({
|
scheduleUpdate();
|
||||||
type: 'FeatureCollection',
|
|
||||||
features: [...accumulatedFeaturesRef.current]
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
// Final flush to ensure all data is rendered
|
||||||
|
flushUpdate();
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
if (error instanceof Error) {
|
if (error instanceof Error) {
|
||||||
setSubmitError(error.message);
|
setSubmitError(error.message);
|
||||||
|
|
|
||||||
|
|
@ -1,11 +1,36 @@
|
||||||
import { User, UserManager } from 'oidc-client-ts';
|
import { User, UserManager } from 'oidc-client-ts';
|
||||||
import { oidcConfig } from './config';
|
import { oidcConfig } from './config';
|
||||||
|
import { parseOidcError, type AuthError } from './errors';
|
||||||
|
|
||||||
const userManager = new UserManager(oidcConfig);
|
const userManager = new UserManager(oidcConfig);
|
||||||
|
|
||||||
export const login = () => userManager.signinRedirect();
|
export const login = async (): Promise<void> => {
|
||||||
export const logout = () => userManager.signoutRedirect();
|
try {
|
||||||
export const handleCallback = () => userManager.signinRedirectCallback();
|
await userManager.signinRedirect();
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Login redirect failed:', error);
|
||||||
|
throw parseOidcError(error);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
export const logout = async (): Promise<void> => {
|
||||||
|
try {
|
||||||
|
await userManager.signoutRedirect();
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Logout redirect failed:', error);
|
||||||
|
throw parseOidcError(error);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
export const handleCallback = async (): Promise<User> => {
|
||||||
|
try {
|
||||||
|
const user = await userManager.signinRedirectCallback();
|
||||||
|
return user;
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Callback handling failed:', error);
|
||||||
|
throw parseOidcError(error);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
export const getUser = async (): Promise<User | null> => {
|
export const getUser = async (): Promise<User | null> => {
|
||||||
try {
|
try {
|
||||||
|
|
@ -16,3 +41,5 @@ export const getUser = async (): Promise<User | null> => {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
export type { AuthError };
|
||||||
|
|
|
||||||
60
crawler/frontend/src/auth/errors.ts
Normal file
60
crawler/frontend/src/auth/errors.ts
Normal file
|
|
@ -0,0 +1,60 @@
|
||||||
|
export enum AuthErrorType {
|
||||||
|
REDIRECT_FAILED = 'REDIRECT_FAILED',
|
||||||
|
CALLBACK_FAILED = 'CALLBACK_FAILED',
|
||||||
|
NETWORK_ERROR = 'NETWORK_ERROR',
|
||||||
|
USER_CANCELLED = 'USER_CANCELLED',
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface AuthError {
|
||||||
|
type: AuthErrorType;
|
||||||
|
message: string;
|
||||||
|
retryable: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function parseOidcError(error: unknown): AuthError {
|
||||||
|
const errorMessage = error instanceof Error ? error.message : String(error);
|
||||||
|
const errorString = errorMessage.toLowerCase();
|
||||||
|
|
||||||
|
// Check for popup/redirect blocked errors
|
||||||
|
if (errorString.includes('popup') || errorString.includes('blocked') || errorString.includes('window')) {
|
||||||
|
return {
|
||||||
|
type: AuthErrorType.REDIRECT_FAILED,
|
||||||
|
message: 'Unable to redirect. Please check if popups are blocked.',
|
||||||
|
retryable: true,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for user cancellation
|
||||||
|
if (errorString.includes('cancel') || errorString.includes('closed') || errorString.includes('denied')) {
|
||||||
|
return {
|
||||||
|
type: AuthErrorType.USER_CANCELLED,
|
||||||
|
message: 'Sign in was cancelled.',
|
||||||
|
retryable: true,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for network errors
|
||||||
|
if (errorString.includes('network') || errorString.includes('fetch') || errorString.includes('timeout') || errorString.includes('failed to fetch')) {
|
||||||
|
return {
|
||||||
|
type: AuthErrorType.NETWORK_ERROR,
|
||||||
|
message: 'Unable to reach authentication server. Please check your connection.',
|
||||||
|
retryable: true,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for callback/state errors
|
||||||
|
if (errorString.includes('state') || errorString.includes('invalid') || errorString.includes('mismatch') || errorString.includes('no matching state')) {
|
||||||
|
return {
|
||||||
|
type: AuthErrorType.CALLBACK_FAILED,
|
||||||
|
message: 'Login verification failed. Please try again.',
|
||||||
|
retryable: true,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Default error
|
||||||
|
return {
|
||||||
|
type: AuthErrorType.CALLBACK_FAILED,
|
||||||
|
message: errorMessage || 'An unexpected error occurred during sign in.',
|
||||||
|
retryable: true,
|
||||||
|
};
|
||||||
|
}
|
||||||
111
crawler/frontend/src/components/AuthCallback.tsx
Normal file
111
crawler/frontend/src/components/AuthCallback.tsx
Normal file
|
|
@ -0,0 +1,111 @@
|
||||||
|
import React, { useEffect, useState } from 'react';
|
||||||
|
import { handleCallback, login, type AuthError } from '@/auth/authService';
|
||||||
|
import { Loader2, CheckCircle, AlertCircle, Home } from 'lucide-react';
|
||||||
|
import { Button } from './ui/button';
|
||||||
|
|
||||||
|
type CallbackState = 'processing' | 'success' | 'error';
|
||||||
|
|
||||||
|
const AuthCallback: React.FC = () => {
|
||||||
|
const [state, setState] = useState<CallbackState>('processing');
|
||||||
|
const [error, setError] = useState<AuthError | null>(null);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
const processCallback = async () => {
|
||||||
|
try {
|
||||||
|
await handleCallback();
|
||||||
|
setState('success');
|
||||||
|
// Auto-redirect after success
|
||||||
|
setTimeout(() => {
|
||||||
|
window.location.href = '/';
|
||||||
|
}, 1500);
|
||||||
|
} catch (err) {
|
||||||
|
setError(err as AuthError);
|
||||||
|
setState('error');
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
processCallback();
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
const handleRetry = async () => {
|
||||||
|
setState('processing');
|
||||||
|
setError(null);
|
||||||
|
try {
|
||||||
|
await login();
|
||||||
|
} catch (err) {
|
||||||
|
setError(err as AuthError);
|
||||||
|
setState('error');
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const handleGoHome = () => {
|
||||||
|
window.location.href = '/';
|
||||||
|
};
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="min-h-screen flex items-center justify-center bg-background p-4">
|
||||||
|
<div className="w-full max-w-md">
|
||||||
|
<div className="bg-card border rounded-xl shadow-lg p-8">
|
||||||
|
{state === 'processing' && (
|
||||||
|
<div className="text-center space-y-4">
|
||||||
|
<div className="flex justify-center">
|
||||||
|
<div className="p-4 bg-primary/10 rounded-full">
|
||||||
|
<Loader2 className="h-8 w-8 text-primary animate-spin" />
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div className="space-y-2">
|
||||||
|
<h1 className="text-xl font-semibold">Completing Sign In</h1>
|
||||||
|
<p className="text-muted-foreground">
|
||||||
|
Please wait while we verify your credentials...
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{state === 'success' && (
|
||||||
|
<div className="text-center space-y-4">
|
||||||
|
<div className="flex justify-center">
|
||||||
|
<div className="p-4 bg-green-500/10 rounded-full">
|
||||||
|
<CheckCircle className="h-8 w-8 text-green-500" />
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div className="space-y-2">
|
||||||
|
<h1 className="text-xl font-semibold">Welcome Back!</h1>
|
||||||
|
<p className="text-muted-foreground">
|
||||||
|
Redirecting you to the dashboard...
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{state === 'error' && (
|
||||||
|
<div className="text-center space-y-6">
|
||||||
|
<div className="flex justify-center">
|
||||||
|
<div className="p-4 bg-destructive/10 rounded-full">
|
||||||
|
<AlertCircle className="h-8 w-8 text-destructive" />
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div className="space-y-2">
|
||||||
|
<h1 className="text-xl font-semibold">Sign In Failed</h1>
|
||||||
|
<p className="text-muted-foreground">
|
||||||
|
{error?.message || 'An unexpected error occurred.'}
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
<div className="flex flex-col sm:flex-row gap-3 justify-center">
|
||||||
|
<Button onClick={handleRetry} className="gap-2">
|
||||||
|
Try Again
|
||||||
|
</Button>
|
||||||
|
<Button variant="outline" onClick={handleGoHome} className="gap-2">
|
||||||
|
<Home className="h-4 w-4" />
|
||||||
|
Go Home
|
||||||
|
</Button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
|
export default AuthCallback;
|
||||||
151
crawler/frontend/src/components/ListView.tsx
Normal file
151
crawler/frontend/src/components/ListView.tsx
Normal file
|
|
@ -0,0 +1,151 @@
|
||||||
|
import { useState, useMemo, useCallback } from 'react';
|
||||||
|
import { ArrowUpDown, ArrowUp, ArrowDown } from 'lucide-react';
|
||||||
|
import { Virtuoso } from 'react-virtuoso';
|
||||||
|
import { Button } from './ui/button';
|
||||||
|
import { PropertyCard } from './PropertyCard';
|
||||||
|
import type { GeoJSONFeatureCollection, PropertyFeature, PropertyProperties } from '@/types';
|
||||||
|
|
||||||
|
type SortField = 'total_price' | 'qmprice' | 'qm' | 'rooms' | 'last_seen';
|
||||||
|
type SortOrder = 'asc' | 'desc';
|
||||||
|
|
||||||
|
interface ListViewProps {
|
||||||
|
listingData: GeoJSONFeatureCollection;
|
||||||
|
onPropertyClick?: (property: PropertyProperties, coordinates: [number, number]) => void;
|
||||||
|
highlightedPropertyUrl?: string | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface SortConfig {
|
||||||
|
field: SortField;
|
||||||
|
order: SortOrder;
|
||||||
|
}
|
||||||
|
|
||||||
|
const SORT_OPTIONS: { field: SortField; label: string }[] = [
|
||||||
|
{ field: 'total_price', label: 'Price' },
|
||||||
|
{ field: 'qmprice', label: '£/m²' },
|
||||||
|
{ field: 'qm', label: 'Size' },
|
||||||
|
{ field: 'rooms', label: 'Beds' },
|
||||||
|
{ field: 'last_seen', label: 'Last Seen' },
|
||||||
|
];
|
||||||
|
|
||||||
|
export function ListView({ listingData, onPropertyClick, highlightedPropertyUrl }: ListViewProps) {
|
||||||
|
const [sortConfig, setSortConfig] = useState<SortConfig>({ field: 'qmprice', order: 'asc' });
|
||||||
|
|
||||||
|
// Calculate average price per sqm for "good deal" indicator
|
||||||
|
const avgPricePerSqm = useMemo(() => {
|
||||||
|
const validPrices = listingData.features
|
||||||
|
.map((f) => f.properties.qmprice)
|
||||||
|
.filter((p): p is number => typeof p === 'number' && p > 0);
|
||||||
|
return validPrices.length > 0
|
||||||
|
? validPrices.reduce((a, b) => a + b, 0) / validPrices.length
|
||||||
|
: 0;
|
||||||
|
}, [listingData]);
|
||||||
|
|
||||||
|
// Sort features
|
||||||
|
const sortedFeatures = useMemo(() => {
|
||||||
|
const features = [...listingData.features];
|
||||||
|
|
||||||
|
features.sort((a, b) => {
|
||||||
|
let aValue: number | string;
|
||||||
|
let bValue: number | string;
|
||||||
|
|
||||||
|
switch (sortConfig.field) {
|
||||||
|
case 'total_price':
|
||||||
|
aValue = a.properties.total_price || 0;
|
||||||
|
bValue = b.properties.total_price || 0;
|
||||||
|
break;
|
||||||
|
case 'qmprice':
|
||||||
|
aValue = a.properties.qmprice || 0;
|
||||||
|
bValue = b.properties.qmprice || 0;
|
||||||
|
break;
|
||||||
|
case 'qm':
|
||||||
|
aValue = a.properties.qm || 0;
|
||||||
|
bValue = b.properties.qm || 0;
|
||||||
|
break;
|
||||||
|
case 'rooms':
|
||||||
|
aValue = a.properties.rooms || 0;
|
||||||
|
bValue = b.properties.rooms || 0;
|
||||||
|
break;
|
||||||
|
case 'last_seen':
|
||||||
|
aValue = new Date(a.properties.last_seen).getTime();
|
||||||
|
bValue = new Date(b.properties.last_seen).getTime();
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (typeof aValue === 'number' && typeof bValue === 'number') {
|
||||||
|
return sortConfig.order === 'asc' ? aValue - bValue : bValue - aValue;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
});
|
||||||
|
|
||||||
|
return features;
|
||||||
|
}, [listingData.features, sortConfig]);
|
||||||
|
|
||||||
|
const handleSort = (field: SortField) => {
|
||||||
|
setSortConfig((prev) => ({
|
||||||
|
field,
|
||||||
|
order: prev.field === field && prev.order === 'asc' ? 'desc' : 'asc',
|
||||||
|
}));
|
||||||
|
};
|
||||||
|
|
||||||
|
const handlePropertyClick = useCallback((feature: PropertyFeature) => {
|
||||||
|
if (onPropertyClick) {
|
||||||
|
onPropertyClick(feature.properties, feature.geometry.coordinates);
|
||||||
|
}
|
||||||
|
}, [onPropertyClick]);
|
||||||
|
|
||||||
|
const SortIcon = ({ field }: { field: SortField }) => {
|
||||||
|
if (sortConfig.field !== field) {
|
||||||
|
return <ArrowUpDown className="h-3.5 w-3.5" />;
|
||||||
|
}
|
||||||
|
return sortConfig.order === 'asc'
|
||||||
|
? <ArrowUp className="h-3.5 w-3.5" />
|
||||||
|
: <ArrowDown className="h-3.5 w-3.5" />;
|
||||||
|
};
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="h-full flex flex-col bg-background">
|
||||||
|
{/* Sort controls */}
|
||||||
|
<div className="flex items-center gap-1 p-2 border-b overflow-x-auto">
|
||||||
|
<span className="text-xs text-muted-foreground mr-1 shrink-0">Sort:</span>
|
||||||
|
{SORT_OPTIONS.map((option) => (
|
||||||
|
<Button
|
||||||
|
key={option.field}
|
||||||
|
variant={sortConfig.field === option.field ? 'secondary' : 'ghost'}
|
||||||
|
size="sm"
|
||||||
|
className="h-7 px-2 text-xs shrink-0"
|
||||||
|
onClick={() => handleSort(option.field)}
|
||||||
|
>
|
||||||
|
{option.label}
|
||||||
|
<SortIcon field={option.field} />
|
||||||
|
</Button>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Listing count */}
|
||||||
|
<div className="px-3 py-2 text-sm text-muted-foreground border-b">
|
||||||
|
Showing {sortedFeatures.length.toLocaleString()} properties
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Property list */}
|
||||||
|
<Virtuoso
|
||||||
|
className="flex-1"
|
||||||
|
data={sortedFeatures}
|
||||||
|
overscan={200}
|
||||||
|
itemContent={(_index, feature) => (
|
||||||
|
<div className="px-3 pb-2 first:pt-3">
|
||||||
|
<PropertyCard
|
||||||
|
key={feature.properties.url}
|
||||||
|
property={feature.properties}
|
||||||
|
variant="compact"
|
||||||
|
avgPricePerSqm={avgPricePerSqm}
|
||||||
|
isHighlighted={feature.properties.url === highlightedPropertyUrl}
|
||||||
|
onClick={() => handlePropertyClick(feature)}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
@ -1,43 +1,127 @@
|
||||||
import { login } from '@/auth/authService';
|
import { login, type AuthError } from '@/auth/authService';
|
||||||
import { Button } from "@/components/ui/button";
|
import { Button } from "@/components/ui/button";
|
||||||
import { DialogDescription } from '@radix-ui/react-dialog';
|
import { DialogDescription } from '@radix-ui/react-dialog';
|
||||||
import React, { useState } from 'react';
|
import React, { useState } from 'react';
|
||||||
import { Dialog, DialogContent, DialogFooter, DialogHeader, DialogTitle } from './ui/dialog';
|
import { Dialog, DialogContent, DialogFooter, DialogHeader, DialogTitle } from './ui/dialog';
|
||||||
|
import { Home, LogIn, AlertCircle, Loader2 } from 'lucide-react';
|
||||||
|
|
||||||
interface ModalProps {
|
interface LoginModalProps {
|
||||||
isOpen: boolean;
|
isOpen: boolean;
|
||||||
}
|
}
|
||||||
|
|
||||||
const Modal: React.FC<ModalProps> = ({
|
const LoginModal: React.FC<LoginModalProps> = ({ isOpen }) => {
|
||||||
isOpen,
|
const [isLoading, setIsLoading] = useState(false);
|
||||||
}) => {
|
const [error, setError] = useState<AuthError | null>(null);
|
||||||
|
|
||||||
if (!isOpen) return null;
|
if (!isOpen) return null;
|
||||||
const [isLoading, setIsLoading] = useState(false)
|
|
||||||
|
const handleLogin = async () => {
|
||||||
|
setIsLoading(true);
|
||||||
|
setError(null);
|
||||||
|
try {
|
||||||
|
await login();
|
||||||
|
} catch (err) {
|
||||||
|
setError(err as AuthError);
|
||||||
|
setIsLoading(false);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const handleRetry = () => {
|
||||||
|
setError(null);
|
||||||
|
handleLogin();
|
||||||
|
};
|
||||||
|
|
||||||
|
const handleCancel = () => {
|
||||||
|
setError(null);
|
||||||
|
setIsLoading(false);
|
||||||
|
};
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<Dialog open={isOpen}>
|
<Dialog open={isOpen}>
|
||||||
<form>
|
|
||||||
<DialogContent className="sm:max-w-[425px]">
|
<DialogContent className="sm:max-w-[425px]">
|
||||||
<DialogHeader>
|
<DialogHeader className="space-y-4">
|
||||||
<DialogTitle>Login to Wrongmove</DialogTitle>
|
<div className="flex items-center gap-3">
|
||||||
<DialogDescription>(We are currently in closed beta; ask Viktor to send you an invitation)</DialogDescription>
|
<div className="p-2 bg-primary/10 rounded-lg">
|
||||||
|
<Home className="h-6 w-6 text-primary" />
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<DialogTitle className="text-xl">Wrongmove</DialogTitle>
|
||||||
|
<DialogDescription className="text-sm text-muted-foreground">
|
||||||
|
Your smart property search companion
|
||||||
|
</DialogDescription>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
</DialogHeader>
|
</DialogHeader>
|
||||||
|
|
||||||
|
<div className="py-4 space-y-4">
|
||||||
|
{/* Beta Notice */}
|
||||||
|
<div className="bg-muted/50 border rounded-lg p-4 text-sm">
|
||||||
|
<p className="text-muted-foreground">
|
||||||
|
We are currently in closed beta. Please contact Viktor to request an invitation.
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Error State */}
|
||||||
|
{error && (
|
||||||
|
<div className="bg-destructive/10 border border-destructive/30 rounded-lg p-4 flex items-start gap-3">
|
||||||
|
<AlertCircle className="h-5 w-5 text-destructive shrink-0 mt-0.5" />
|
||||||
|
<div className="flex-1 space-y-2">
|
||||||
|
<p className="text-sm text-destructive">{error.message}</p>
|
||||||
|
<div className="flex gap-2">
|
||||||
|
<Button
|
||||||
|
size="sm"
|
||||||
|
variant="outline"
|
||||||
|
onClick={handleRetry}
|
||||||
|
className="text-destructive border-destructive/30 hover:bg-destructive/10"
|
||||||
|
>
|
||||||
|
Try Again
|
||||||
|
</Button>
|
||||||
|
<Button
|
||||||
|
size="sm"
|
||||||
|
variant="ghost"
|
||||||
|
onClick={handleCancel}
|
||||||
|
>
|
||||||
|
Cancel
|
||||||
|
</Button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* Loading State */}
|
||||||
|
{isLoading && !error && (
|
||||||
|
<div className="flex items-center justify-center gap-3 py-4 text-muted-foreground">
|
||||||
|
<Loader2 className="h-5 w-5 animate-spin" />
|
||||||
|
<span>Redirecting to login...</span>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
|
||||||
<DialogFooter>
|
<DialogFooter>
|
||||||
{isLoading && (
|
{!error && (
|
||||||
<div>Signing in. Please wait...</div>
|
<Button
|
||||||
)
|
onClick={handleLogin}
|
||||||
}
|
disabled={isLoading}
|
||||||
<Button onClick={
|
className="w-full gap-2"
|
||||||
() => {
|
size="lg"
|
||||||
setIsLoading(true)
|
>
|
||||||
login()
|
{isLoading ? (
|
||||||
}} disabled={isLoading}>Login</Button>
|
<>
|
||||||
|
<Loader2 className="h-4 w-4 animate-spin" />
|
||||||
|
Signing in...
|
||||||
|
</>
|
||||||
|
) : (
|
||||||
|
<>
|
||||||
|
<LogIn className="h-4 w-4" />
|
||||||
|
Sign in with SSO
|
||||||
|
</>
|
||||||
|
)}
|
||||||
|
</Button>
|
||||||
|
)}
|
||||||
</DialogFooter>
|
</DialogFooter>
|
||||||
</DialogContent>
|
</DialogContent>
|
||||||
</form>
|
|
||||||
</Dialog>
|
</Dialog>
|
||||||
)
|
);
|
||||||
};
|
};
|
||||||
|
|
||||||
export default Modal;
|
export default LoginModal;
|
||||||
|
|
|
||||||
137
crawler/frontend/src/services/streamingService.ts
Normal file
137
crawler/frontend/src/services/streamingService.ts
Normal file
|
|
@ -0,0 +1,137 @@
|
||||||
|
// Streaming service for progressive listing data loading
|
||||||
|
|
||||||
|
import type { User } from 'oidc-client-ts';
|
||||||
|
import type { PropertyFeature } from '@/types';
|
||||||
|
import type { ParameterValues } from '@/components/FilterPanel';
|
||||||
|
import { ApiError } from '@/types';
|
||||||
|
import { API_ENDPOINTS } from '@/constants';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Build query string from parameters object
|
||||||
|
*/
|
||||||
|
function buildQueryString(params: Record<string, string | number | boolean | Date | undefined>): string {
|
||||||
|
const queryString = new URLSearchParams();
|
||||||
|
|
||||||
|
for (const [key, value] of Object.entries(params)) {
|
||||||
|
if (value !== undefined && value !== null && value !== '') {
|
||||||
|
if (value instanceof Date) {
|
||||||
|
queryString.append(key, value.toISOString());
|
||||||
|
} else {
|
||||||
|
queryString.append(key, String(value));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return queryString.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Build listing query parameters from form values
|
||||||
|
*/
|
||||||
|
function buildListingParams(parameters: ParameterValues): Record<string, string | number | boolean | Date | undefined> {
|
||||||
|
return {
|
||||||
|
listing_type: parameters.listing_type,
|
||||||
|
min_bedrooms: parameters.min_bedrooms,
|
||||||
|
max_bedrooms: parameters.max_bedrooms,
|
||||||
|
max_price: parameters.max_price,
|
||||||
|
min_price: parameters.min_price,
|
||||||
|
min_sqm: parameters.min_sqm,
|
||||||
|
max_sqm: parameters.max_sqm,
|
||||||
|
min_price_per_sqm: parameters.min_price_per_sqm,
|
||||||
|
max_price_per_sqm: parameters.max_price_per_sqm,
|
||||||
|
last_seen_days: parameters.last_seen_days,
|
||||||
|
let_date_available_from: parameters.available_from,
|
||||||
|
district_names: parameters.district || undefined,
|
||||||
|
furnish_types: parameters.furnish_types?.join(',') || undefined,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface StreamMessage {
|
||||||
|
type: 'metadata' | 'batch' | 'complete';
|
||||||
|
features?: PropertyFeature[];
|
||||||
|
total?: number;
|
||||||
|
total_expected?: number;
|
||||||
|
batch_size?: number;
|
||||||
|
cached?: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface StreamingProgress {
|
||||||
|
count: number;
|
||||||
|
total?: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Stream listing GeoJSON data as an async generator.
|
||||||
|
* Yields batches of features as they arrive from the server.
|
||||||
|
*/
|
||||||
|
export async function* streamListingGeoJSON(
|
||||||
|
user: User,
|
||||||
|
parameters: ParameterValues,
|
||||||
|
onProgress?: (progress: StreamingProgress) => void
|
||||||
|
): AsyncGenerator<PropertyFeature[], void, unknown> {
|
||||||
|
const params = buildListingParams(parameters);
|
||||||
|
const queryString = buildQueryString(params);
|
||||||
|
const url = queryString
|
||||||
|
? `${API_ENDPOINTS.LISTING_GEOJSON_STREAM}?${queryString}`
|
||||||
|
: API_ENDPOINTS.LISTING_GEOJSON_STREAM;
|
||||||
|
|
||||||
|
const response = await fetch(url, {
|
||||||
|
headers: {
|
||||||
|
Authorization: `Bearer ${user.access_token}`,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
throw new ApiError(`Error: ${response.status}`, response.status);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!response.body) {
|
||||||
|
throw new Error('No response body');
|
||||||
|
}
|
||||||
|
|
||||||
|
const reader = response.body.getReader();
|
||||||
|
const decoder = new TextDecoder();
|
||||||
|
let buffer = '';
|
||||||
|
let totalCount = 0;
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
const { done, value } = await reader.read();
|
||||||
|
if (done) break;
|
||||||
|
|
||||||
|
buffer += decoder.decode(value, { stream: true });
|
||||||
|
const lines = buffer.split('\n');
|
||||||
|
buffer = lines.pop() || ''; // Keep incomplete line in buffer
|
||||||
|
|
||||||
|
for (const line of lines) {
|
||||||
|
if (!line.trim()) continue;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const message: StreamMessage = JSON.parse(line);
|
||||||
|
|
||||||
|
if (message.type === 'metadata') {
|
||||||
|
onProgress?.({ count: 0, total: message.total_expected });
|
||||||
|
} else if (message.type === 'batch' && message.features) {
|
||||||
|
totalCount += message.features.length;
|
||||||
|
onProgress?.({ count: totalCount });
|
||||||
|
yield message.features;
|
||||||
|
} else if (message.type === 'complete') {
|
||||||
|
onProgress?.({ count: message.total ?? totalCount, total: message.total });
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.error('Failed to parse streaming message:', e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Process any remaining data in the buffer
|
||||||
|
if (buffer.trim()) {
|
||||||
|
try {
|
||||||
|
const message: StreamMessage = JSON.parse(buffer);
|
||||||
|
if (message.type === 'batch' && message.features) {
|
||||||
|
yield message.features;
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.error('Failed to parse final streaming message:', e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -13,6 +13,9 @@ from repositories.listing_repository import ListingRepository
|
||||||
|
|
||||||
logger = logging.getLogger("uvicorn.error")
|
logger = logging.getLogger("uvicorn.error")
|
||||||
|
|
||||||
|
# Also use celery task logger for visibility in worker output
|
||||||
|
celery_logger = logging.getLogger("celery.task")
|
||||||
|
|
||||||
|
|
||||||
class ListingProcessor:
|
class ListingProcessor:
|
||||||
semaphore: asyncio.Semaphore
|
semaphore: asyncio.Semaphore
|
||||||
|
|
@ -36,15 +39,16 @@ class ListingProcessor:
|
||||||
for step in self.process_steps:
|
for step in self.process_steps:
|
||||||
if await step.needs_processing(listing_id):
|
if await step.needs_processing(listing_id):
|
||||||
async with self.semaphore:
|
async with self.semaphore:
|
||||||
|
step_name = step.__class__.__name__
|
||||||
try:
|
try:
|
||||||
listing = await step.process(listing_id)
|
listing = await step.process(listing_id)
|
||||||
|
logger.debug(f"[{listing_id}] {step_name} completed")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Failed to process {listing_id=}: {e}")
|
logger.error(f"[{listing_id}] {step_name} failed: {e}")
|
||||||
|
celery_logger.error(f"[{listing_id}] {step_name} failed: {e}")
|
||||||
return None
|
return None
|
||||||
return listing
|
return listing
|
||||||
|
|
||||||
async def listing_exists(self, listing_id: int) -> bool: ...
|
|
||||||
|
|
||||||
|
|
||||||
class Step:
|
class Step:
|
||||||
listing_repository: ListingRepository
|
listing_repository: ListingRepository
|
||||||
|
|
@ -65,19 +69,23 @@ class FetchListingDetailsStep(Step):
|
||||||
existing_listings = await self.listing_repository.get_listings(
|
existing_listings = await self.listing_repository.get_listings(
|
||||||
only_ids=[listing_id]
|
only_ids=[listing_id]
|
||||||
)
|
)
|
||||||
if (existing_listings) == 0:
|
if len(existing_listings) == 0:
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
async def process(self, listing_id: int) -> Listing:
|
async def process(self, listing_id: int) -> Listing:
|
||||||
logger.debug(f"Fetching details for {listing_id=}")
|
logger.debug(f"[{listing_id}] Fetching property details from API")
|
||||||
|
celery_logger.info(f"[{listing_id}] Fetching details...")
|
||||||
|
|
||||||
existing_listings = await self.listing_repository.get_listings(
|
existing_listings = await self.listing_repository.get_listings(
|
||||||
only_ids=[listing_id]
|
only_ids=[listing_id]
|
||||||
)
|
)
|
||||||
now = datetime.now()
|
now = datetime.now()
|
||||||
if len(existing_listings) > 0:
|
if len(existing_listings) > 0:
|
||||||
# listing exists, do not refresh
|
# listing exists, do not refresh
|
||||||
|
logger.debug(f"[{listing_id}] Already exists, skipping refresh")
|
||||||
return existing_listings[0]
|
return existing_listings[0]
|
||||||
|
|
||||||
listing_details = await detail_query(listing_id)
|
listing_details = await detail_query(listing_id)
|
||||||
|
|
||||||
furnish_type_str = listing_details["property"].get("letFurnishType", "unknown")
|
furnish_type_str = listing_details["property"].get("letFurnishType", "unknown")
|
||||||
|
|
@ -124,7 +132,12 @@ class FetchListingDetailsStep(Step):
|
||||||
additional_info=listing_details,
|
additional_info=listing_details,
|
||||||
)
|
)
|
||||||
await self.listing_repository.upsert_listings([listing])
|
await self.listing_repository.upsert_listings([listing])
|
||||||
logger.debug(f"Completed fetching details for {listing_id=}")
|
|
||||||
|
celery_logger.info(
|
||||||
|
f"[{listing_id}] Details fetched: £{listing.price}, "
|
||||||
|
f"{listing.number_of_bedrooms}BR, {listing.agency}"
|
||||||
|
)
|
||||||
|
logger.debug(f"[{listing_id}] Details fetch complete")
|
||||||
# TODO: dump to filesystem
|
# TODO: dump to filesystem
|
||||||
return listing
|
return listing
|
||||||
|
|
||||||
|
|
@ -140,7 +153,8 @@ class FetchImagesStep(Step):
|
||||||
return len(listing.floorplan_image_paths) == 0
|
return len(listing.floorplan_image_paths) == 0
|
||||||
|
|
||||||
async def process(self, listing_id: int) -> Listing:
|
async def process(self, listing_id: int) -> Listing:
|
||||||
logger.debug(f"Fetching images for {listing_id=}")
|
logger.debug(f"[{listing_id}] Fetching floorplan images")
|
||||||
|
|
||||||
existing_listings = await self.listing_repository.get_listings(
|
existing_listings = await self.listing_repository.get_listings(
|
||||||
only_ids=[listing_id]
|
only_ids=[listing_id]
|
||||||
)
|
)
|
||||||
|
|
@ -152,6 +166,12 @@ class FetchImagesStep(Step):
|
||||||
all_floorplans = listing.additional_info.get("property", {}).get(
|
all_floorplans = listing.additional_info.get("property", {}).get(
|
||||||
"floorplans", []
|
"floorplans", []
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if len(all_floorplans) == 0:
|
||||||
|
logger.debug(f"[{listing_id}] No floorplans available")
|
||||||
|
return listing
|
||||||
|
|
||||||
|
downloaded = 0
|
||||||
client_timeout = aiohttp.ClientTimeout(total=30)
|
client_timeout = aiohttp.ClientTimeout(total=30)
|
||||||
for floorplan_obj in all_floorplans:
|
for floorplan_obj in all_floorplans:
|
||||||
url = floorplan_obj["url"]
|
url = floorplan_obj["url"]
|
||||||
|
|
@ -169,8 +189,12 @@ class FetchImagesStep(Step):
|
||||||
with open(floorplan_path, "wb") as f:
|
with open(floorplan_path, "wb") as f:
|
||||||
f.write(await response.read())
|
f.write(await response.read())
|
||||||
listing.floorplan_image_paths.append(str(floorplan_path))
|
listing.floorplan_image_paths.append(str(floorplan_path))
|
||||||
|
downloaded += 1
|
||||||
|
|
||||||
await self.listing_repository.upsert_listings([listing])
|
await self.listing_repository.upsert_listings([listing])
|
||||||
logger.debug(f"Completed fetching images for {listing_id=}")
|
|
||||||
|
celery_logger.info(f"[{listing_id}] Downloaded {downloaded} floorplan images")
|
||||||
|
logger.debug(f"[{listing_id}] Image fetch complete")
|
||||||
return listing
|
return listing
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -188,11 +212,19 @@ class DetectFloorplanStep(Step):
|
||||||
return listings[0].square_meters is None
|
return listings[0].square_meters is None
|
||||||
|
|
||||||
async def process(self, listing_id: int) -> Listing:
|
async def process(self, listing_id: int) -> Listing:
|
||||||
logger.debug(f"Running floorplan detection for {listing_id=}")
|
logger.debug(f"[{listing_id}] Running OCR on floorplans")
|
||||||
|
|
||||||
listings = await self.listing_repository.get_listings(only_ids=[listing_id])
|
listings = await self.listing_repository.get_listings(only_ids=[listing_id])
|
||||||
if len(listings) == 0:
|
if len(listings) == 0:
|
||||||
raise ValueError(f"Listing {listing_id} does not exist")
|
raise ValueError(f"Listing {listing_id} does not exist")
|
||||||
listing = listings[0]
|
listing = listings[0]
|
||||||
|
|
||||||
|
if len(listing.floorplan_image_paths) == 0:
|
||||||
|
logger.debug(f"[{listing_id}] No floorplan images to process")
|
||||||
|
listing.square_meters = 0
|
||||||
|
await self.listing_repository.upsert_listings([listing])
|
||||||
|
return listing
|
||||||
|
|
||||||
sqms = []
|
sqms = []
|
||||||
for floorplan_path in listing.floorplan_image_paths:
|
for floorplan_path in listing.floorplan_image_paths:
|
||||||
async with self.ocr_semaphore:
|
async with self.ocr_semaphore:
|
||||||
|
|
@ -201,9 +233,15 @@ class DetectFloorplanStep(Step):
|
||||||
)
|
)
|
||||||
if estimated_sqm is not None:
|
if estimated_sqm is not None:
|
||||||
sqms.append(estimated_sqm)
|
sqms.append(estimated_sqm)
|
||||||
|
|
||||||
max_sqm = max(sqms, default=0) # try once, if we fail, keep as 0
|
max_sqm = max(sqms, default=0) # try once, if we fail, keep as 0
|
||||||
# if max_sqm is not None:
|
|
||||||
listing.square_meters = max_sqm
|
listing.square_meters = max_sqm
|
||||||
await self.listing_repository.upsert_listings([listing])
|
await self.listing_repository.upsert_listings([listing])
|
||||||
logger.debug(f"Completed running floorplan detection for {listing_id=}")
|
|
||||||
|
if max_sqm > 0:
|
||||||
|
celery_logger.info(f"[{listing_id}] OCR detected {max_sqm} sqm")
|
||||||
|
else:
|
||||||
|
logger.debug(f"[{listing_id}] OCR: no square meters detected")
|
||||||
|
|
||||||
|
logger.debug(f"[{listing_id}] OCR complete")
|
||||||
return listing
|
return listing
|
||||||
|
|
|
||||||
526
crawler/poetry.lock
generated
526
crawler/poetry.lock
generated
|
|
@ -6,7 +6,7 @@ version = "2.6.1"
|
||||||
description = "Happy Eyeballs for asyncio"
|
description = "Happy Eyeballs for asyncio"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.9"
|
python-versions = ">=3.9"
|
||||||
groups = ["main"]
|
groups = ["main", "dev"]
|
||||||
files = [
|
files = [
|
||||||
{file = "aiohappyeyeballs-2.6.1-py3-none-any.whl", hash = "sha256:f349ba8f4b75cb25c99c5c2d84e997e485204d2902a9597802b0371f09331fb8"},
|
{file = "aiohappyeyeballs-2.6.1-py3-none-any.whl", hash = "sha256:f349ba8f4b75cb25c99c5c2d84e997e485204d2902a9597802b0371f09331fb8"},
|
||||||
{file = "aiohappyeyeballs-2.6.1.tar.gz", hash = "sha256:c3f9d0113123803ccadfdf3f0faa505bc78e6a72d1cc4806cbd719826e943558"},
|
{file = "aiohappyeyeballs-2.6.1.tar.gz", hash = "sha256:c3f9d0113123803ccadfdf3f0faa505bc78e6a72d1cc4806cbd719826e943558"},
|
||||||
|
|
@ -18,7 +18,7 @@ version = "3.12.13"
|
||||||
description = "Async http client/server framework (asyncio)"
|
description = "Async http client/server framework (asyncio)"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.9"
|
python-versions = ">=3.9"
|
||||||
groups = ["main"]
|
groups = ["main", "dev"]
|
||||||
files = [
|
files = [
|
||||||
{file = "aiohttp-3.12.13-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:5421af8f22a98f640261ee48aae3a37f0c41371e99412d55eaf2f8a46d5dad29"},
|
{file = "aiohttp-3.12.13-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:5421af8f22a98f640261ee48aae3a37f0c41371e99412d55eaf2f8a46d5dad29"},
|
||||||
{file = "aiohttp-3.12.13-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0fcda86f6cb318ba36ed8f1396a6a4a3fd8f856f84d426584392083d10da4de0"},
|
{file = "aiohttp-3.12.13-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0fcda86f6cb318ba36ed8f1396a6a4a3fd8f856f84d426584392083d10da4de0"},
|
||||||
|
|
@ -120,13 +120,29 @@ yarl = ">=1.17.0,<2.0"
|
||||||
[package.extras]
|
[package.extras]
|
||||||
speedups = ["Brotli ; platform_python_implementation == \"CPython\"", "aiodns (>=3.3.0)", "brotlicffi ; platform_python_implementation != \"CPython\""]
|
speedups = ["Brotli ; platform_python_implementation == \"CPython\"", "aiodns (>=3.3.0)", "brotlicffi ; platform_python_implementation != \"CPython\""]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "aioresponses"
|
||||||
|
version = "0.7.8"
|
||||||
|
description = "Mock out requests made by ClientSession from aiohttp package"
|
||||||
|
optional = false
|
||||||
|
python-versions = "*"
|
||||||
|
groups = ["dev"]
|
||||||
|
files = [
|
||||||
|
{file = "aioresponses-0.7.8-py2.py3-none-any.whl", hash = "sha256:b73bd4400d978855e55004b23a3a84cb0f018183bcf066a85ad392800b5b9a94"},
|
||||||
|
{file = "aioresponses-0.7.8.tar.gz", hash = "sha256:b861cdfe5dc58f3b8afac7b0a6973d5d7b2cb608dd0f6253d16b8ee8eaf6df11"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
aiohttp = ">=3.3.0,<4.0.0"
|
||||||
|
packaging = ">=22.0"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "aiosignal"
|
name = "aiosignal"
|
||||||
version = "1.3.2"
|
version = "1.3.2"
|
||||||
description = "aiosignal: a list of registered asynchronous callbacks"
|
description = "aiosignal: a list of registered asynchronous callbacks"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.9"
|
python-versions = ">=3.9"
|
||||||
groups = ["main"]
|
groups = ["main", "dev"]
|
||||||
files = [
|
files = [
|
||||||
{file = "aiosignal-1.3.2-py2.py3-none-any.whl", hash = "sha256:45cde58e409a301715980c2b01d0c28bdde3770d8290b5eb2173759d9acb31a5"},
|
{file = "aiosignal-1.3.2-py2.py3-none-any.whl", hash = "sha256:45cde58e409a301715980c2b01d0c28bdde3770d8290b5eb2173759d9acb31a5"},
|
||||||
{file = "aiosignal-1.3.2.tar.gz", hash = "sha256:a8c255c66fafb1e499c9351d0bf32ff2d8a0321595ebac3b93713656d2436f54"},
|
{file = "aiosignal-1.3.2.tar.gz", hash = "sha256:a8c255c66fafb1e499c9351d0bf32ff2d8a0321595ebac3b93713656d2436f54"},
|
||||||
|
|
@ -400,7 +416,7 @@ version = "5.0.1"
|
||||||
description = "Timeout context manager for asyncio programs"
|
description = "Timeout context manager for asyncio programs"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.8"
|
python-versions = ">=3.8"
|
||||||
groups = ["main"]
|
groups = ["main", "dev"]
|
||||||
markers = "python_version == \"3.11\" and python_full_version < \"3.11.3\""
|
markers = "python_version == \"3.11\" and python_full_version < \"3.11.3\""
|
||||||
files = [
|
files = [
|
||||||
{file = "async_timeout-5.0.1-py3-none-any.whl", hash = "sha256:39e3809566ff85354557ec2398b55e096c8364bacac9405a7a1fa429e77fe76c"},
|
{file = "async_timeout-5.0.1-py3-none-any.whl", hash = "sha256:39e3809566ff85354557ec2398b55e096c8364bacac9405a7a1fa429e77fe76c"},
|
||||||
|
|
@ -932,13 +948,118 @@ mypy = ["bokeh", "contourpy[bokeh,docs]", "docutils-stubs", "mypy (==1.15.0)", "
|
||||||
test = ["Pillow", "contourpy[test-no-images]", "matplotlib"]
|
test = ["Pillow", "contourpy[test-no-images]", "matplotlib"]
|
||||||
test-no-images = ["pytest", "pytest-cov", "pytest-rerunfailures", "pytest-xdist", "wurlitzer"]
|
test-no-images = ["pytest", "pytest-cov", "pytest-rerunfailures", "pytest-xdist", "wurlitzer"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "coverage"
|
||||||
|
version = "7.13.2"
|
||||||
|
description = "Code coverage measurement for Python"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.10"
|
||||||
|
groups = ["dev"]
|
||||||
|
files = [
|
||||||
|
{file = "coverage-7.13.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f4af3b01763909f477ea17c962e2cca8f39b350a4e46e3a30838b2c12e31b81b"},
|
||||||
|
{file = "coverage-7.13.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:36393bd2841fa0b59498f75466ee9bdec4f770d3254f031f23e8fd8e140ffdd2"},
|
||||||
|
{file = "coverage-7.13.2-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:9cc7573518b7e2186bd229b1a0fe24a807273798832c27032c4510f47ffdb896"},
|
||||||
|
{file = "coverage-7.13.2-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:ca9566769b69a5e216a4e176d54b9df88f29d750c5b78dbb899e379b4e14b30c"},
|
||||||
|
{file = "coverage-7.13.2-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9c9bdea644e94fd66d75a6f7e9a97bb822371e1fe7eadae2cacd50fcbc28e4dc"},
|
||||||
|
{file = "coverage-7.13.2-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:5bd447332ec4f45838c1ad42268ce21ca87c40deb86eabd59888859b66be22a5"},
|
||||||
|
{file = "coverage-7.13.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:7c79ad5c28a16a1277e1187cf83ea8dafdcc689a784228a7d390f19776db7c31"},
|
||||||
|
{file = "coverage-7.13.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:76e06ccacd1fb6ada5d076ed98a8c6f66e2e6acd3df02819e2ee29fd637b76ad"},
|
||||||
|
{file = "coverage-7.13.2-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:49d49e9a5e9f4dc3d3dac95278a020afa6d6bdd41f63608a76fa05a719d5b66f"},
|
||||||
|
{file = "coverage-7.13.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:ed2bce0e7bfa53f7b0b01c722da289ef6ad4c18ebd52b1f93704c21f116360c8"},
|
||||||
|
{file = "coverage-7.13.2-cp310-cp310-win32.whl", hash = "sha256:1574983178b35b9af4db4a9f7328a18a14a0a0ce76ffaa1c1bacb4cc82089a7c"},
|
||||||
|
{file = "coverage-7.13.2-cp310-cp310-win_amd64.whl", hash = "sha256:a360a8baeb038928ceb996f5623a4cd508728f8f13e08d4e96ce161702f3dd99"},
|
||||||
|
{file = "coverage-7.13.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:060ebf6f2c51aff5ba38e1f43a2095e087389b1c69d559fde6049a4b0001320e"},
|
||||||
|
{file = "coverage-7.13.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c1ea8ca9db5e7469cd364552985e15911548ea5b69c48a17291f0cac70484b2e"},
|
||||||
|
{file = "coverage-7.13.2-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:b780090d15fd58f07cf2011943e25a5f0c1c894384b13a216b6c86c8a8a7c508"},
|
||||||
|
{file = "coverage-7.13.2-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:88a800258d83acb803c38175b4495d293656d5fac48659c953c18e5f539a274b"},
|
||||||
|
{file = "coverage-7.13.2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6326e18e9a553e674d948536a04a80d850a5eeefe2aae2e6d7cf05d54046c01b"},
|
||||||
|
{file = "coverage-7.13.2-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:59562de3f797979e1ff07c587e2ac36ba60ca59d16c211eceaa579c266c5022f"},
|
||||||
|
{file = "coverage-7.13.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:27ba1ed6f66b0e2d61bfa78874dffd4f8c3a12f8e2b5410e515ab345ba7bc9c3"},
|
||||||
|
{file = "coverage-7.13.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:8be48da4d47cc68754ce643ea50b3234557cbefe47c2f120495e7bd0a2756f2b"},
|
||||||
|
{file = "coverage-7.13.2-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:2a47a4223d3361b91176aedd9d4e05844ca67d7188456227b6bf5e436630c9a1"},
|
||||||
|
{file = "coverage-7.13.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:c6f141b468740197d6bd38f2b26ade124363228cc3f9858bd9924ab059e00059"},
|
||||||
|
{file = "coverage-7.13.2-cp311-cp311-win32.whl", hash = "sha256:89567798404af067604246e01a49ef907d112edf2b75ef814b1364d5ce267031"},
|
||||||
|
{file = "coverage-7.13.2-cp311-cp311-win_amd64.whl", hash = "sha256:21dd57941804ae2ac7e921771a5e21bbf9aabec317a041d164853ad0a96ce31e"},
|
||||||
|
{file = "coverage-7.13.2-cp311-cp311-win_arm64.whl", hash = "sha256:10758e0586c134a0bafa28f2d37dd2cdb5e4a90de25c0fc0c77dabbad46eca28"},
|
||||||
|
{file = "coverage-7.13.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f106b2af193f965d0d3234f3f83fc35278c7fb935dfbde56ae2da3dd2c03b84d"},
|
||||||
|
{file = "coverage-7.13.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:78f45d21dc4d5d6bd29323f0320089ef7eae16e4bef712dff79d184fa7330af3"},
|
||||||
|
{file = "coverage-7.13.2-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:fae91dfecd816444c74531a9c3d6ded17a504767e97aa674d44f638107265b99"},
|
||||||
|
{file = "coverage-7.13.2-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:264657171406c114787b441484de620e03d8f7202f113d62fcd3d9688baa3e6f"},
|
||||||
|
{file = "coverage-7.13.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ae47d8dcd3ded0155afbb59c62bd8ab07ea0fd4902e1c40567439e6db9dcaf2f"},
|
||||||
|
{file = "coverage-7.13.2-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:8a0b33e9fd838220b007ce8f299114d406c1e8edb21336af4c97a26ecfd185aa"},
|
||||||
|
{file = "coverage-7.13.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b3becbea7f3ce9a2d4d430f223ec15888e4deb31395840a79e916368d6004cce"},
|
||||||
|
{file = "coverage-7.13.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:f819c727a6e6eeb8711e4ce63d78c620f69630a2e9d53bc95ca5379f57b6ba94"},
|
||||||
|
{file = "coverage-7.13.2-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:4f7b71757a3ab19f7ba286e04c181004c1d61be921795ee8ba6970fd0ec91da5"},
|
||||||
|
{file = "coverage-7.13.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b7fc50d2afd2e6b4f6f2f403b70103d280a8e0cb35320cbbe6debcda02a1030b"},
|
||||||
|
{file = "coverage-7.13.2-cp312-cp312-win32.whl", hash = "sha256:292250282cf9bcf206b543d7608bda17ca6fc151f4cbae949fc7e115112fbd41"},
|
||||||
|
{file = "coverage-7.13.2-cp312-cp312-win_amd64.whl", hash = "sha256:eeea10169fac01549a7921d27a3e517194ae254b542102267bef7a93ed38c40e"},
|
||||||
|
{file = "coverage-7.13.2-cp312-cp312-win_arm64.whl", hash = "sha256:2a5b567f0b635b592c917f96b9a9cb3dbd4c320d03f4bf94e9084e494f2e8894"},
|
||||||
|
{file = "coverage-7.13.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ed75de7d1217cf3b99365d110975f83af0528c849ef5180a12fd91b5064df9d6"},
|
||||||
|
{file = "coverage-7.13.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:97e596de8fa9bada4d88fde64a3f4d37f1b6131e4faa32bad7808abc79887ddc"},
|
||||||
|
{file = "coverage-7.13.2-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:68c86173562ed4413345410c9480a8d64864ac5e54a5cda236748031e094229f"},
|
||||||
|
{file = "coverage-7.13.2-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:7be4d613638d678b2b3773b8f687537b284d7074695a43fe2fbbfc0e31ceaed1"},
|
||||||
|
{file = "coverage-7.13.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d7f63ce526a96acd0e16c4af8b50b64334239550402fb1607ce6a584a6d62ce9"},
|
||||||
|
{file = "coverage-7.13.2-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:406821f37f864f968e29ac14c3fccae0fec9fdeba48327f0341decf4daf92d7c"},
|
||||||
|
{file = "coverage-7.13.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ee68e5a4e3e5443623406b905db447dceddffee0dceb39f4e0cd9ec2a35004b5"},
|
||||||
|
{file = "coverage-7.13.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:2ee0e58cca0c17dd9c6c1cdde02bb705c7b3fbfa5f3b0b5afeda20d4ebff8ef4"},
|
||||||
|
{file = "coverage-7.13.2-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:6e5bbb5018bf76a56aabdb64246b5288d5ae1b7d0dd4d0534fe86df2c2992d1c"},
|
||||||
|
{file = "coverage-7.13.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a55516c68ef3e08e134e818d5e308ffa6b1337cc8b092b69b24287bf07d38e31"},
|
||||||
|
{file = "coverage-7.13.2-cp313-cp313-win32.whl", hash = "sha256:5b20211c47a8abf4abc3319d8ce2464864fa9f30c5fcaf958a3eed92f4f1fef8"},
|
||||||
|
{file = "coverage-7.13.2-cp313-cp313-win_amd64.whl", hash = "sha256:14f500232e521201cf031549fb1ebdfc0a40f401cf519157f76c397e586c3beb"},
|
||||||
|
{file = "coverage-7.13.2-cp313-cp313-win_arm64.whl", hash = "sha256:9779310cb5a9778a60c899f075a8514c89fa6d10131445c2207fc893e0b14557"},
|
||||||
|
{file = "coverage-7.13.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:e64fa5a1e41ce5df6b547cbc3d3699381c9e2c2c369c67837e716ed0f549d48e"},
|
||||||
|
{file = "coverage-7.13.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b01899e82a04085b6561eb233fd688474f57455e8ad35cd82286463ba06332b7"},
|
||||||
|
{file = "coverage-7.13.2-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:838943bea48be0e2768b0cf7819544cdedc1bbb2f28427eabb6eb8c9eb2285d3"},
|
||||||
|
{file = "coverage-7.13.2-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:93d1d25ec2b27e90bcfef7012992d1f5121b51161b8bffcda756a816cf13c2c3"},
|
||||||
|
{file = "coverage-7.13.2-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:93b57142f9621b0d12349c43fc7741fe578e4bc914c1e5a54142856cfc0bf421"},
|
||||||
|
{file = "coverage-7.13.2-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f06799ae1bdfff7ccb8665d75f8291c69110ba9585253de254688aa8a1ccc6c5"},
|
||||||
|
{file = "coverage-7.13.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:7f9405ab4f81d490811b1d91c7a20361135a2df4c170e7f0b747a794da5b7f23"},
|
||||||
|
{file = "coverage-7.13.2-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:f9ab1d5b86f8fbc97a5b3cd6280a3fd85fef3b028689d8a2c00918f0d82c728c"},
|
||||||
|
{file = "coverage-7.13.2-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:f674f59712d67e841525b99e5e2b595250e39b529c3bda14764e4f625a3fa01f"},
|
||||||
|
{file = "coverage-7.13.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:c6cadac7b8ace1ba9144feb1ae3cb787a6065ba6d23ffc59a934b16406c26573"},
|
||||||
|
{file = "coverage-7.13.2-cp313-cp313t-win32.whl", hash = "sha256:14ae4146465f8e6e6253eba0cccd57423e598a4cb925958b240c805300918343"},
|
||||||
|
{file = "coverage-7.13.2-cp313-cp313t-win_amd64.whl", hash = "sha256:9074896edd705a05769e3de0eac0a8388484b503b68863dd06d5e473f874fd47"},
|
||||||
|
{file = "coverage-7.13.2-cp313-cp313t-win_arm64.whl", hash = "sha256:69e526e14f3f854eda573d3cf40cffd29a1a91c684743d904c33dbdcd0e0f3e7"},
|
||||||
|
{file = "coverage-7.13.2-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:387a825f43d680e7310e6f325b2167dd093bc8ffd933b83e9aa0983cf6e0a2ef"},
|
||||||
|
{file = "coverage-7.13.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:f0d7fea9d8e5d778cd5a9e8fc38308ad688f02040e883cdc13311ef2748cb40f"},
|
||||||
|
{file = "coverage-7.13.2-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:e080afb413be106c95c4ee96b4fffdc9e2fa56a8bbf90b5c0918e5c4449412f5"},
|
||||||
|
{file = "coverage-7.13.2-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:a7fc042ba3c7ce25b8a9f097eb0f32a5ce1ccdb639d9eec114e26def98e1f8a4"},
|
||||||
|
{file = "coverage-7.13.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d0ba505e021557f7f8173ee8cd6b926373d8653e5ff7581ae2efce1b11ef4c27"},
|
||||||
|
{file = "coverage-7.13.2-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:7de326f80e3451bd5cc7239ab46c73ddb658fe0b7649476bc7413572d36cd548"},
|
||||||
|
{file = "coverage-7.13.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:abaea04f1e7e34841d4a7b343904a3f59481f62f9df39e2cd399d69a187a9660"},
|
||||||
|
{file = "coverage-7.13.2-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:9f93959ee0c604bccd8e0697be21de0887b1f73efcc3aa73a3ec0fd13feace92"},
|
||||||
|
{file = "coverage-7.13.2-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:13fe81ead04e34e105bf1b3c9f9cdf32ce31736ee5d90a8d2de02b9d3e1bcb82"},
|
||||||
|
{file = "coverage-7.13.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d6d16b0f71120e365741bca2cb473ca6fe38930bc5431c5e850ba949f708f892"},
|
||||||
|
{file = "coverage-7.13.2-cp314-cp314-win32.whl", hash = "sha256:9b2f4714bb7d99ba3790ee095b3b4ac94767e1347fe424278a0b10acb3ff04fe"},
|
||||||
|
{file = "coverage-7.13.2-cp314-cp314-win_amd64.whl", hash = "sha256:e4121a90823a063d717a96e0a0529c727fb31ea889369a0ee3ec00ed99bf6859"},
|
||||||
|
{file = "coverage-7.13.2-cp314-cp314-win_arm64.whl", hash = "sha256:6873f0271b4a15a33e7590f338d823f6f66f91ed147a03938d7ce26efd04eee6"},
|
||||||
|
{file = "coverage-7.13.2-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:f61d349f5b7cd95c34017f1927ee379bfbe9884300d74e07cf630ccf7a610c1b"},
|
||||||
|
{file = "coverage-7.13.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:a43d34ce714f4ca674c0d90beb760eb05aad906f2c47580ccee9da8fe8bfb417"},
|
||||||
|
{file = "coverage-7.13.2-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:bff1b04cb9d4900ce5c56c4942f047dc7efe57e2608cb7c3c8936e9970ccdbee"},
|
||||||
|
{file = "coverage-7.13.2-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:6ae99e4560963ad8e163e819e5d77d413d331fd00566c1e0856aa252303552c1"},
|
||||||
|
{file = "coverage-7.13.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e79a8c7d461820257d9aa43716c4efc55366d7b292e46b5b37165be1d377405d"},
|
||||||
|
{file = "coverage-7.13.2-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:060ee84f6a769d40c492711911a76811b4befb6fba50abb450371abb720f5bd6"},
|
||||||
|
{file = "coverage-7.13.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:3bca209d001fd03ea2d978f8a4985093240a355c93078aee3f799852c23f561a"},
|
||||||
|
{file = "coverage-7.13.2-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:6b8092aa38d72f091db61ef83cb66076f18f02da3e1a75039a4f218629600e04"},
|
||||||
|
{file = "coverage-7.13.2-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:4a3158dc2dcce5200d91ec28cd315c999eebff355437d2765840555d765a6e5f"},
|
||||||
|
{file = "coverage-7.13.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:3973f353b2d70bd9796cc12f532a05945232ccae966456c8ed7034cb96bbfd6f"},
|
||||||
|
{file = "coverage-7.13.2-cp314-cp314t-win32.whl", hash = "sha256:79f6506a678a59d4ded048dc72f1859ebede8ec2b9a2d509ebe161f01c2879d3"},
|
||||||
|
{file = "coverage-7.13.2-cp314-cp314t-win_amd64.whl", hash = "sha256:196bfeabdccc5a020a57d5a368c681e3a6ceb0447d153aeccc1ab4d70a5032ba"},
|
||||||
|
{file = "coverage-7.13.2-cp314-cp314t-win_arm64.whl", hash = "sha256:69269ab58783e090bfbf5b916ab3d188126e22d6070bbfc93098fdd474ef937c"},
|
||||||
|
{file = "coverage-7.13.2-py3-none-any.whl", hash = "sha256:40ce1ea1e25125556d8e76bd0b61500839a07944cc287ac21d5626f3e620cad5"},
|
||||||
|
{file = "coverage-7.13.2.tar.gz", hash = "sha256:044c6951ec37146b72a50cc81ef02217d27d4c3640efd2640311393cbbf143d3"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
toml = ["tomli ; python_full_version <= \"3.11.0a6\""]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cryptography"
|
name = "cryptography"
|
||||||
version = "45.0.4"
|
version = "45.0.4"
|
||||||
description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers."
|
description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers."
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = "!=3.9.0,!=3.9.1,>=3.7"
|
python-versions = "!=3.9.0,!=3.9.1,>=3.7"
|
||||||
groups = ["main"]
|
groups = ["main", "dev"]
|
||||||
files = [
|
files = [
|
||||||
{file = "cryptography-45.0.4-cp311-abi3-macosx_10_9_universal2.whl", hash = "sha256:425a9a6ac2823ee6e46a76a21a4e8342d8fa5c01e08b823c1f19a8b74f096069"},
|
{file = "cryptography-45.0.4-cp311-abi3-macosx_10_9_universal2.whl", hash = "sha256:425a9a6ac2823ee6e46a76a21a4e8342d8fa5c01e08b823c1f19a8b74f096069"},
|
||||||
{file = "cryptography-45.0.4-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:680806cf63baa0039b920f4976f5f31b10e772de42f16310a6839d9f21a26b0d"},
|
{file = "cryptography-45.0.4-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:680806cf63baa0039b920f4976f5f31b10e772de42f16310a6839d9f21a26b0d"},
|
||||||
|
|
@ -1132,6 +1253,30 @@ files = [
|
||||||
[package.extras]
|
[package.extras]
|
||||||
tests = ["asttokens (>=2.1.0)", "coverage", "coverage-enable-subprocess", "ipython", "littleutils", "pytest", "rich ; python_version >= \"3.11\""]
|
tests = ["asttokens (>=2.1.0)", "coverage", "coverage-enable-subprocess", "ipython", "littleutils", "pytest", "rich ; python_version >= \"3.11\""]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "fakeredis"
|
||||||
|
version = "2.33.0"
|
||||||
|
description = "Python implementation of redis API, can be used for testing purposes."
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.7"
|
||||||
|
groups = ["dev"]
|
||||||
|
files = [
|
||||||
|
{file = "fakeredis-2.33.0-py3-none-any.whl", hash = "sha256:de535f3f9ccde1c56672ab2fdd6a8efbc4f2619fc2f1acc87b8737177d71c965"},
|
||||||
|
{file = "fakeredis-2.33.0.tar.gz", hash = "sha256:d7bc9a69d21df108a6451bbffee23b3eba432c21a654afc7ff2d295428ec5770"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
redis = {version = ">=4.3", markers = "python_version > \"3.8\""}
|
||||||
|
sortedcontainers = ">=2"
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
bf = ["pyprobables (>=0.6)"]
|
||||||
|
cf = ["pyprobables (>=0.6)"]
|
||||||
|
json = ["jsonpath-ng (>=1.6)"]
|
||||||
|
lua = ["lupa (>=2.1)"]
|
||||||
|
probabilistic = ["pyprobables (>=0.6)"]
|
||||||
|
valkey = ["valkey (>=6) ; python_version >= \"3.8\""]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "fastapi"
|
name = "fastapi"
|
||||||
version = "0.115.13"
|
version = "0.115.13"
|
||||||
|
|
@ -1309,7 +1454,7 @@ version = "1.7.0"
|
||||||
description = "A list-like structure which implements collections.abc.MutableSequence"
|
description = "A list-like structure which implements collections.abc.MutableSequence"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.9"
|
python-versions = ">=3.9"
|
||||||
groups = ["main"]
|
groups = ["main", "dev"]
|
||||||
files = [
|
files = [
|
||||||
{file = "frozenlist-1.7.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:cc4df77d638aa2ed703b878dd093725b72a824c3c546c076e8fdf276f78ee84a"},
|
{file = "frozenlist-1.7.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:cc4df77d638aa2ed703b878dd093725b72a824c3c546c076e8fdf276f78ee84a"},
|
||||||
{file = "frozenlist-1.7.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:716a9973a2cc963160394f701964fe25012600f3d311f60c790400b00e568b61"},
|
{file = "frozenlist-1.7.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:716a9973a2cc963160394f701964fe25012600f3d311f60c790400b00e568b61"},
|
||||||
|
|
@ -1676,14 +1821,14 @@ test = ["Cython (>=0.29.24)"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "httpx"
|
name = "httpx"
|
||||||
version = "0.28.1"
|
version = "0.27.2"
|
||||||
description = "The next generation HTTP client."
|
description = "The next generation HTTP client."
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.8"
|
python-versions = ">=3.8"
|
||||||
groups = ["main", "dev"]
|
groups = ["main", "dev"]
|
||||||
files = [
|
files = [
|
||||||
{file = "httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad"},
|
{file = "httpx-0.27.2-py3-none-any.whl", hash = "sha256:7bb2708e112d8fdd7829cd4243970f0c223274051cb35ee80c03301ee29a3df0"},
|
||||||
{file = "httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc"},
|
{file = "httpx-0.27.2.tar.gz", hash = "sha256:f7c2be1d2f3c3c3160d441802406b206c2b76f5947b11115e6df10c6c65e66c2"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
|
|
@ -1691,6 +1836,7 @@ anyio = "*"
|
||||||
certifi = "*"
|
certifi = "*"
|
||||||
httpcore = "==1.*"
|
httpcore = "==1.*"
|
||||||
idna = "*"
|
idna = "*"
|
||||||
|
sniffio = "*"
|
||||||
|
|
||||||
[package.extras]
|
[package.extras]
|
||||||
brotli = ["brotli ; platform_python_implementation == \"CPython\"", "brotlicffi ; platform_python_implementation != \"CPython\""]
|
brotli = ["brotli ; platform_python_implementation == \"CPython\"", "brotlicffi ; platform_python_implementation != \"CPython\""]
|
||||||
|
|
@ -1777,6 +1923,18 @@ perf = ["ipython"]
|
||||||
test = ["flufl.flake8", "importlib_resources (>=1.3) ; python_version < \"3.9\"", "jaraco.test (>=5.4)", "packaging", "pyfakefs", "pytest (>=6,!=8.1.*)", "pytest-perf (>=0.9.2)"]
|
test = ["flufl.flake8", "importlib_resources (>=1.3) ; python_version < \"3.9\"", "jaraco.test (>=5.4)", "packaging", "pyfakefs", "pytest (>=6,!=8.1.*)", "pytest-perf (>=0.9.2)"]
|
||||||
type = ["pytest-mypy"]
|
type = ["pytest-mypy"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "iniconfig"
|
||||||
|
version = "2.3.0"
|
||||||
|
description = "brain-dead simple config-ini parsing"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.10"
|
||||||
|
groups = ["dev"]
|
||||||
|
files = [
|
||||||
|
{file = "iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12"},
|
||||||
|
{file = "iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730"},
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "ipdb"
|
name = "ipdb"
|
||||||
version = "0.13.13"
|
version = "0.13.13"
|
||||||
|
|
@ -2360,6 +2518,93 @@ interegular = ["interegular (>=0.3.1,<0.4.0)"]
|
||||||
nearley = ["js2py"]
|
nearley = ["js2py"]
|
||||||
regex = ["regex"]
|
regex = ["regex"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "librt"
|
||||||
|
version = "0.7.8"
|
||||||
|
description = "Mypyc runtime library"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.9"
|
||||||
|
groups = ["dev"]
|
||||||
|
markers = "platform_python_implementation != \"PyPy\""
|
||||||
|
files = [
|
||||||
|
{file = "librt-0.7.8-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b45306a1fc5f53c9330fbee134d8b3227fe5da2ab09813b892790400aa49352d"},
|
||||||
|
{file = "librt-0.7.8-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:864c4b7083eeee250ed55135d2127b260d7eb4b5e953a9e5df09c852e327961b"},
|
||||||
|
{file = "librt-0.7.8-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:6938cc2de153bc927ed8d71c7d2f2ae01b4e96359126c602721340eb7ce1a92d"},
|
||||||
|
{file = "librt-0.7.8-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:66daa6ac5de4288a5bbfbe55b4caa7bf0cd26b3269c7a476ffe8ce45f837f87d"},
|
||||||
|
{file = "librt-0.7.8-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4864045f49dc9c974dadb942ac56a74cd0479a2aafa51ce272c490a82322ea3c"},
|
||||||
|
{file = "librt-0.7.8-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:a36515b1328dc5b3ffce79fe204985ca8572525452eacabee2166f44bb387b2c"},
|
||||||
|
{file = "librt-0.7.8-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:b7e7f140c5169798f90b80d6e607ed2ba5059784968a004107c88ad61fb3641d"},
|
||||||
|
{file = "librt-0.7.8-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:ff71447cb778a4f772ddc4ce360e6ba9c95527ed84a52096bd1bbf9fee2ec7c0"},
|
||||||
|
{file = "librt-0.7.8-cp310-cp310-win32.whl", hash = "sha256:047164e5f68b7a8ebdf9fae91a3c2161d3192418aadd61ddd3a86a56cbe3dc85"},
|
||||||
|
{file = "librt-0.7.8-cp310-cp310-win_amd64.whl", hash = "sha256:d6f254d096d84156a46a84861183c183d30734e52383602443292644d895047c"},
|
||||||
|
{file = "librt-0.7.8-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ff3e9c11aa260c31493d4b3197d1e28dd07768594a4f92bec4506849d736248f"},
|
||||||
|
{file = "librt-0.7.8-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ddb52499d0b3ed4aa88746aaf6f36a08314677d5c346234c3987ddc506404eac"},
|
||||||
|
{file = "librt-0.7.8-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:e9c0afebbe6ce177ae8edba0c7c4d626f2a0fc12c33bb993d163817c41a7a05c"},
|
||||||
|
{file = "librt-0.7.8-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:631599598e2c76ded400c0a8722dec09217c89ff64dc54b060f598ed68e7d2a8"},
|
||||||
|
{file = "librt-0.7.8-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9c1ba843ae20db09b9d5c80475376168feb2640ce91cd9906414f23cc267a1ff"},
|
||||||
|
{file = "librt-0.7.8-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:b5b007bb22ea4b255d3ee39dfd06d12534de2fcc3438567d9f48cdaf67ae1ae3"},
|
||||||
|
{file = "librt-0.7.8-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:dbd79caaf77a3f590cbe32dc2447f718772d6eea59656a7dcb9311161b10fa75"},
|
||||||
|
{file = "librt-0.7.8-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:87808a8d1e0bd62a01cafc41f0fd6818b5a5d0ca0d8a55326a81643cdda8f873"},
|
||||||
|
{file = "librt-0.7.8-cp311-cp311-win32.whl", hash = "sha256:31724b93baa91512bd0a376e7cf0b59d8b631ee17923b1218a65456fa9bda2e7"},
|
||||||
|
{file = "librt-0.7.8-cp311-cp311-win_amd64.whl", hash = "sha256:978e8b5f13e52cf23a9e80f3286d7546baa70bc4ef35b51d97a709d0b28e537c"},
|
||||||
|
{file = "librt-0.7.8-cp311-cp311-win_arm64.whl", hash = "sha256:20e3946863d872f7cabf7f77c6c9d370b8b3d74333d3a32471c50d3a86c0a232"},
|
||||||
|
{file = "librt-0.7.8-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:9b6943885b2d49c48d0cff23b16be830ba46b0152d98f62de49e735c6e655a63"},
|
||||||
|
{file = "librt-0.7.8-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:46ef1f4b9b6cc364b11eea0ecc0897314447a66029ee1e55859acb3dd8757c93"},
|
||||||
|
{file = "librt-0.7.8-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:907ad09cfab21e3c86e8f1f87858f7049d1097f77196959c033612f532b4e592"},
|
||||||
|
{file = "librt-0.7.8-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2991b6c3775383752b3ca0204842743256f3ad3deeb1d0adc227d56b78a9a850"},
|
||||||
|
{file = "librt-0.7.8-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:03679b9856932b8c8f674e87aa3c55ea11c9274301f76ae8dc4d281bda55cf62"},
|
||||||
|
{file = "librt-0.7.8-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3968762fec1b2ad34ce57458b6de25dbb4142713e9ca6279a0d352fa4e9f452b"},
|
||||||
|
{file = "librt-0.7.8-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:bb7a7807523a31f03061288cc4ffc065d684c39db7644c676b47d89553c0d714"},
|
||||||
|
{file = "librt-0.7.8-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ad64a14b1e56e702e19b24aae108f18ad1bf7777f3af5fcd39f87d0c5a814449"},
|
||||||
|
{file = "librt-0.7.8-cp312-cp312-win32.whl", hash = "sha256:0241a6ed65e6666236ea78203a73d800dbed896cf12ae25d026d75dc1fcd1dac"},
|
||||||
|
{file = "librt-0.7.8-cp312-cp312-win_amd64.whl", hash = "sha256:6db5faf064b5bab9675c32a873436b31e01d66ca6984c6f7f92621656033a708"},
|
||||||
|
{file = "librt-0.7.8-cp312-cp312-win_arm64.whl", hash = "sha256:57175aa93f804d2c08d2edb7213e09276bd49097611aefc37e3fa38d1fb99ad0"},
|
||||||
|
{file = "librt-0.7.8-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:4c3995abbbb60b3c129490fa985dfe6cac11d88fc3c36eeb4fb1449efbbb04fc"},
|
||||||
|
{file = "librt-0.7.8-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:44e0c2cbc9bebd074cf2cdbe472ca185e824be4e74b1c63a8e934cea674bebf2"},
|
||||||
|
{file = "librt-0.7.8-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:4d2f1e492cae964b3463a03dc77a7fe8742f7855d7258c7643f0ee32b6651dd3"},
|
||||||
|
{file = "librt-0.7.8-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:451e7ffcef8f785831fdb791bd69211f47e95dc4c6ddff68e589058806f044c6"},
|
||||||
|
{file = "librt-0.7.8-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3469e1af9f1380e093ae06bedcbdd11e407ac0b303a56bbe9afb1d6824d4982d"},
|
||||||
|
{file = "librt-0.7.8-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f11b300027ce19a34f6d24ebb0a25fd0e24a9d53353225a5c1e6cadbf2916b2e"},
|
||||||
|
{file = "librt-0.7.8-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:4adc73614f0d3c97874f02f2c7fd2a27854e7e24ad532ea6b965459c5b757eca"},
|
||||||
|
{file = "librt-0.7.8-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:60c299e555f87e4c01b2eca085dfccda1dde87f5a604bb45c2906b8305819a93"},
|
||||||
|
{file = "librt-0.7.8-cp313-cp313-win32.whl", hash = "sha256:b09c52ed43a461994716082ee7d87618096851319bf695d57ec123f2ab708951"},
|
||||||
|
{file = "librt-0.7.8-cp313-cp313-win_amd64.whl", hash = "sha256:f8f4a901a3fa28969d6e4519deceab56c55a09d691ea7b12ca830e2fa3461e34"},
|
||||||
|
{file = "librt-0.7.8-cp313-cp313-win_arm64.whl", hash = "sha256:43d4e71b50763fcdcf64725ac680d8cfa1706c928b844794a7aa0fa9ac8e5f09"},
|
||||||
|
{file = "librt-0.7.8-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:be927c3c94c74b05128089a955fba86501c3b544d1d300282cc1b4bd370cb418"},
|
||||||
|
{file = "librt-0.7.8-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:7b0803e9008c62a7ef79058233db7ff6f37a9933b8f2573c05b07ddafa226611"},
|
||||||
|
{file = "librt-0.7.8-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:79feb4d00b2a4e0e05c9c56df707934f41fcb5fe53fd9efb7549068d0495b758"},
|
||||||
|
{file = "librt-0.7.8-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b9122094e3f24aa759c38f46bd8863433820654927370250f460ae75488b66ea"},
|
||||||
|
{file = "librt-0.7.8-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7e03bea66af33c95ce3addf87a9bf1fcad8d33e757bc479957ddbc0e4f7207ac"},
|
||||||
|
{file = "librt-0.7.8-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:f1ade7f31675db00b514b98f9ab9a7698c7282dad4be7492589109471852d398"},
|
||||||
|
{file = "librt-0.7.8-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:a14229ac62adcf1b90a15992f1ab9c69ae8b99ffb23cb64a90878a6e8a2f5b81"},
|
||||||
|
{file = "librt-0.7.8-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:5bcaaf624fd24e6a0cb14beac37677f90793a96864c67c064a91458611446e83"},
|
||||||
|
{file = "librt-0.7.8-cp314-cp314-win32.whl", hash = "sha256:7aa7d5457b6c542ecaed79cec4ad98534373c9757383973e638ccced0f11f46d"},
|
||||||
|
{file = "librt-0.7.8-cp314-cp314-win_amd64.whl", hash = "sha256:3d1322800771bee4a91f3b4bd4e49abc7d35e65166821086e5afd1e6c0d9be44"},
|
||||||
|
{file = "librt-0.7.8-cp314-cp314-win_arm64.whl", hash = "sha256:5363427bc6a8c3b1719f8f3845ea53553d301382928a86e8fab7984426949bce"},
|
||||||
|
{file = "librt-0.7.8-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:ca916919793a77e4a98d4a1701e345d337ce53be4a16620f063191f7322ac80f"},
|
||||||
|
{file = "librt-0.7.8-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:54feb7b4f2f6706bb82325e836a01be805770443e2400f706e824e91f6441dde"},
|
||||||
|
{file = "librt-0.7.8-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:39a4c76fee41007070f872b648cc2f711f9abf9a13d0c7162478043377b52c8e"},
|
||||||
|
{file = "librt-0.7.8-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ac9c8a458245c7de80bc1b9765b177055efff5803f08e548dd4bb9ab9a8d789b"},
|
||||||
|
{file = "librt-0.7.8-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:95b67aa7eff150f075fda09d11f6bfb26edffd300f6ab1666759547581e8f666"},
|
||||||
|
{file = "librt-0.7.8-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:535929b6eff670c593c34ff435d5440c3096f20fa72d63444608a5aef64dd581"},
|
||||||
|
{file = "librt-0.7.8-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:63937bd0f4d1cb56653dc7ae900d6c52c41f0015e25aaf9902481ee79943b33a"},
|
||||||
|
{file = "librt-0.7.8-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:cf243da9e42d914036fd362ac3fa77d80a41cadcd11ad789b1b5eec4daaf67ca"},
|
||||||
|
{file = "librt-0.7.8-cp314-cp314t-win32.whl", hash = "sha256:171ca3a0a06c643bd0a2f62a8944e1902c94aa8e5da4db1ea9a8daf872685365"},
|
||||||
|
{file = "librt-0.7.8-cp314-cp314t-win_amd64.whl", hash = "sha256:445b7304145e24c60288a2f172b5ce2ca35c0f81605f5299f3fa567e189d2e32"},
|
||||||
|
{file = "librt-0.7.8-cp314-cp314t-win_arm64.whl", hash = "sha256:8766ece9de08527deabcd7cb1b4f1a967a385d26e33e536d6d8913db6ef74f06"},
|
||||||
|
{file = "librt-0.7.8-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c7e8f88f79308d86d8f39c491773cbb533d6cb7fa6476f35d711076ee04fceb6"},
|
||||||
|
{file = "librt-0.7.8-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:389bd25a0db916e1d6bcb014f11aa9676cedaa485e9ec3752dfe19f196fd377b"},
|
||||||
|
{file = "librt-0.7.8-cp39-cp39-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:73fd300f501a052f2ba52ede721232212f3b06503fa12665408ecfc9d8fd149c"},
|
||||||
|
{file = "librt-0.7.8-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6d772edc6a5f7835635c7562f6688e031f0b97e31d538412a852c49c9a6c92d5"},
|
||||||
|
{file = "librt-0.7.8-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bfde8a130bd0f239e45503ab39fab239ace094d63ee1d6b67c25a63d741c0f71"},
|
||||||
|
{file = "librt-0.7.8-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:fdec6e2368ae4f796fc72fad7fd4bd1753715187e6d870932b0904609e7c878e"},
|
||||||
|
{file = "librt-0.7.8-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:00105e7d541a8f2ee5be52caacea98a005e0478cfe78c8080fbb7b5d2b340c63"},
|
||||||
|
{file = "librt-0.7.8-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:c6f8947d3dfd7f91066c5b4385812c18be26c9d5a99ca56667547f2c39149d94"},
|
||||||
|
{file = "librt-0.7.8-cp39-cp39-win32.whl", hash = "sha256:41d7bb1e07916aeb12ae4a44e3025db3691c4149ab788d0315781b4d29b86afb"},
|
||||||
|
{file = "librt-0.7.8-cp39-cp39-win_amd64.whl", hash = "sha256:e90a8e237753c83b8e484d478d9a996dc5e39fd5bd4c6ce32563bc8123f132be"},
|
||||||
|
{file = "librt-0.7.8.tar.gz", hash = "sha256:1a4ede613941d9c3470b0368be851df6bb78ab218635512d0370b27a277a0862"},
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "mako"
|
name = "mako"
|
||||||
version = "1.3.10"
|
version = "1.3.10"
|
||||||
|
|
@ -2595,7 +2840,7 @@ version = "6.5.0"
|
||||||
description = "multidict implementation"
|
description = "multidict implementation"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.9"
|
python-versions = ">=3.9"
|
||||||
groups = ["main"]
|
groups = ["main", "dev"]
|
||||||
files = [
|
files = [
|
||||||
{file = "multidict-6.5.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:2e118a202904623b1d2606d1c8614e14c9444b59d64454b0c355044058066469"},
|
{file = "multidict-6.5.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:2e118a202904623b1d2606d1c8614e14c9444b59d64454b0c355044058066469"},
|
||||||
{file = "multidict-6.5.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a42995bdcaff4e22cb1280ae7752c3ed3fbb398090c6991a2797a4a0e5ed16a9"},
|
{file = "multidict-6.5.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a42995bdcaff4e22cb1280ae7752c3ed3fbb398090c6991a2797a4a0e5ed16a9"},
|
||||||
|
|
@ -2709,6 +2954,79 @@ files = [
|
||||||
{file = "multidict-6.5.0.tar.gz", hash = "sha256:942bd8002492ba819426a8d7aefde3189c1b87099cdf18aaaefefcf7f3f7b6d2"},
|
{file = "multidict-6.5.0.tar.gz", hash = "sha256:942bd8002492ba819426a8d7aefde3189c1b87099cdf18aaaefefcf7f3f7b6d2"},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "mypy"
|
||||||
|
version = "1.19.1"
|
||||||
|
description = "Optional static typing for Python"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.9"
|
||||||
|
groups = ["dev"]
|
||||||
|
files = [
|
||||||
|
{file = "mypy-1.19.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5f05aa3d375b385734388e844bc01733bd33c644ab48e9684faa54e5389775ec"},
|
||||||
|
{file = "mypy-1.19.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:022ea7279374af1a5d78dfcab853fe6a536eebfda4b59deab53cd21f6cd9f00b"},
|
||||||
|
{file = "mypy-1.19.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ee4c11e460685c3e0c64a4c5de82ae143622410950d6be863303a1c4ba0e36d6"},
|
||||||
|
{file = "mypy-1.19.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:de759aafbae8763283b2ee5869c7255391fbc4de3ff171f8f030b5ec48381b74"},
|
||||||
|
{file = "mypy-1.19.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:ab43590f9cd5108f41aacf9fca31841142c786827a74ab7cc8a2eacb634e09a1"},
|
||||||
|
{file = "mypy-1.19.1-cp310-cp310-win_amd64.whl", hash = "sha256:2899753e2f61e571b3971747e302d5f420c3fd09650e1951e99f823bc3089dac"},
|
||||||
|
{file = "mypy-1.19.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d8dfc6ab58ca7dda47d9237349157500468e404b17213d44fc1cb77bce532288"},
|
||||||
|
{file = "mypy-1.19.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e3f276d8493c3c97930e354b2595a44a21348b320d859fb4a2b9f66da9ed27ab"},
|
||||||
|
{file = "mypy-1.19.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2abb24cf3f17864770d18d673c85235ba52456b36a06b6afc1e07c1fdcd3d0e6"},
|
||||||
|
{file = "mypy-1.19.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a009ffa5a621762d0c926a078c2d639104becab69e79538a494bcccb62cc0331"},
|
||||||
|
{file = "mypy-1.19.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f7cee03c9a2e2ee26ec07479f38ea9c884e301d42c6d43a19d20fb014e3ba925"},
|
||||||
|
{file = "mypy-1.19.1-cp311-cp311-win_amd64.whl", hash = "sha256:4b84a7a18f41e167f7995200a1d07a4a6810e89d29859df936f1c3923d263042"},
|
||||||
|
{file = "mypy-1.19.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:a8174a03289288c1f6c46d55cef02379b478bfbc8e358e02047487cad44c6ca1"},
|
||||||
|
{file = "mypy-1.19.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ffcebe56eb09ff0c0885e750036a095e23793ba6c2e894e7e63f6d89ad51f22e"},
|
||||||
|
{file = "mypy-1.19.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b64d987153888790bcdb03a6473d321820597ab8dd9243b27a92153c4fa50fd2"},
|
||||||
|
{file = "mypy-1.19.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c35d298c2c4bba75feb2195655dfea8124d855dfd7343bf8b8c055421eaf0cf8"},
|
||||||
|
{file = "mypy-1.19.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:34c81968774648ab5ac09c29a375fdede03ba253f8f8287847bd480782f73a6a"},
|
||||||
|
{file = "mypy-1.19.1-cp312-cp312-win_amd64.whl", hash = "sha256:b10e7c2cd7870ba4ad9b2d8a6102eb5ffc1f16ca35e3de6bfa390c1113029d13"},
|
||||||
|
{file = "mypy-1.19.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e3157c7594ff2ef1634ee058aafc56a82db665c9438fd41b390f3bde1ab12250"},
|
||||||
|
{file = "mypy-1.19.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:bdb12f69bcc02700c2b47e070238f42cb87f18c0bc1fc4cdb4fb2bc5fd7a3b8b"},
|
||||||
|
{file = "mypy-1.19.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f859fb09d9583a985be9a493d5cfc5515b56b08f7447759a0c5deaf68d80506e"},
|
||||||
|
{file = "mypy-1.19.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c9a6538e0415310aad77cb94004ca6482330fece18036b5f360b62c45814c4ef"},
|
||||||
|
{file = "mypy-1.19.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:da4869fc5e7f62a88f3fe0b5c919d1d9f7ea3cef92d3689de2823fd27e40aa75"},
|
||||||
|
{file = "mypy-1.19.1-cp313-cp313-win_amd64.whl", hash = "sha256:016f2246209095e8eda7538944daa1d60e1e8134d98983b9fc1e92c1fc0cb8dd"},
|
||||||
|
{file = "mypy-1.19.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:06e6170bd5836770e8104c8fdd58e5e725cfeb309f0a6c681a811f557e97eac1"},
|
||||||
|
{file = "mypy-1.19.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:804bd67b8054a85447c8954215a906d6eff9cabeabe493fb6334b24f4bfff718"},
|
||||||
|
{file = "mypy-1.19.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:21761006a7f497cb0d4de3d8ef4ca70532256688b0523eee02baf9eec895e27b"},
|
||||||
|
{file = "mypy-1.19.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:28902ee51f12e0f19e1e16fbe2f8f06b6637f482c459dd393efddd0ec7f82045"},
|
||||||
|
{file = "mypy-1.19.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:481daf36a4c443332e2ae9c137dfee878fcea781a2e3f895d54bd3002a900957"},
|
||||||
|
{file = "mypy-1.19.1-cp314-cp314-win_amd64.whl", hash = "sha256:8bb5c6f6d043655e055be9b542aa5f3bdd30e4f3589163e85f93f3640060509f"},
|
||||||
|
{file = "mypy-1.19.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:7bcfc336a03a1aaa26dfce9fff3e287a3ba99872a157561cbfcebe67c13308e3"},
|
||||||
|
{file = "mypy-1.19.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b7951a701c07ea584c4fe327834b92a30825514c868b1f69c30445093fdd9d5a"},
|
||||||
|
{file = "mypy-1.19.1-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b13cfdd6c87fc3efb69ea4ec18ef79c74c3f98b4e5498ca9b85ab3b2c2329a67"},
|
||||||
|
{file = "mypy-1.19.1-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4f28f99c824ecebcdaa2e55d82953e38ff60ee5ec938476796636b86afa3956e"},
|
||||||
|
{file = "mypy-1.19.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:c608937067d2fc5a4dd1a5ce92fd9e1398691b8c5d012d66e1ddd430e9244376"},
|
||||||
|
{file = "mypy-1.19.1-cp39-cp39-win_amd64.whl", hash = "sha256:409088884802d511ee52ca067707b90c883426bd95514e8cfda8281dc2effe24"},
|
||||||
|
{file = "mypy-1.19.1-py3-none-any.whl", hash = "sha256:f1235f5ea01b7db5468d53ece6aaddf1ad0b88d9e7462b86ef96fe04995d7247"},
|
||||||
|
{file = "mypy-1.19.1.tar.gz", hash = "sha256:19d88bb05303fe63f71dd2c6270daca27cb9401c4ca8255fe50d1d920e0eb9ba"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
librt = {version = ">=0.6.2", markers = "platform_python_implementation != \"PyPy\""}
|
||||||
|
mypy_extensions = ">=1.0.0"
|
||||||
|
pathspec = ">=0.9.0"
|
||||||
|
typing_extensions = ">=4.6.0"
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
dmypy = ["psutil (>=4.0)"]
|
||||||
|
faster-cache = ["orjson"]
|
||||||
|
install-types = ["pip"]
|
||||||
|
mypyc = ["setuptools (>=50)"]
|
||||||
|
reports = ["lxml"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "mypy-extensions"
|
||||||
|
version = "1.1.0"
|
||||||
|
description = "Type system extensions for programs checked with the mypy type checker."
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.8"
|
||||||
|
groups = ["dev"]
|
||||||
|
files = [
|
||||||
|
{file = "mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505"},
|
||||||
|
{file = "mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558"},
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "mysqlclient"
|
name = "mysqlclient"
|
||||||
version = "2.2.7"
|
version = "2.2.7"
|
||||||
|
|
@ -3226,6 +3544,24 @@ files = [
|
||||||
qa = ["flake8 (==5.0.4)", "mypy (==0.971)", "types-setuptools (==67.2.0.1)"]
|
qa = ["flake8 (==5.0.4)", "mypy (==0.971)", "types-setuptools (==67.2.0.1)"]
|
||||||
testing = ["docopt", "pytest"]
|
testing = ["docopt", "pytest"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pathspec"
|
||||||
|
version = "1.0.4"
|
||||||
|
description = "Utility library for gitignore style pattern matching of file paths."
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.9"
|
||||||
|
groups = ["dev"]
|
||||||
|
files = [
|
||||||
|
{file = "pathspec-1.0.4-py3-none-any.whl", hash = "sha256:fb6ae2fd4e7c921a165808a552060e722767cfa526f99ca5156ed2ce45a5c723"},
|
||||||
|
{file = "pathspec-1.0.4.tar.gz", hash = "sha256:0210e2ae8a21a9137c0d470578cb0e595af87edaa6ebf12ff176f14a02e0e645"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
hyperscan = ["hyperscan (>=0.7)"]
|
||||||
|
optional = ["typing-extensions (>=4)"]
|
||||||
|
re2 = ["google-re2 (>=1.1)"]
|
||||||
|
tests = ["pytest (>=9)", "typing-extensions (>=4.15)"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pexpect"
|
name = "pexpect"
|
||||||
version = "4.9.0"
|
version = "4.9.0"
|
||||||
|
|
@ -3357,6 +3693,22 @@ docs = ["furo (>=2024.8.6)", "proselint (>=0.14)", "sphinx (>=8.1.3)", "sphinx-a
|
||||||
test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=8.3.4)", "pytest-cov (>=6)", "pytest-mock (>=3.14)"]
|
test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=8.3.4)", "pytest-cov (>=6)", "pytest-mock (>=3.14)"]
|
||||||
type = ["mypy (>=1.14.1)"]
|
type = ["mypy (>=1.14.1)"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pluggy"
|
||||||
|
version = "1.6.0"
|
||||||
|
description = "plugin and hook calling mechanisms for python"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.9"
|
||||||
|
groups = ["dev"]
|
||||||
|
files = [
|
||||||
|
{file = "pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746"},
|
||||||
|
{file = "pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
dev = ["pre-commit", "tox"]
|
||||||
|
testing = ["coverage", "pytest", "pytest-benchmark"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "podman-compose"
|
name = "podman-compose"
|
||||||
version = "1.5.0"
|
version = "1.5.0"
|
||||||
|
|
@ -3412,7 +3764,7 @@ version = "0.3.2"
|
||||||
description = "Accelerated property cache"
|
description = "Accelerated property cache"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.9"
|
python-versions = ">=3.9"
|
||||||
groups = ["main"]
|
groups = ["main", "dev"]
|
||||||
files = [
|
files = [
|
||||||
{file = "propcache-0.3.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:22d9962a358aedbb7a2e36187ff273adeaab9743373a272976d2e348d08c7770"},
|
{file = "propcache-0.3.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:22d9962a358aedbb7a2e36187ff273adeaab9743373a272976d2e348d08c7770"},
|
||||||
{file = "propcache-0.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0d0fda578d1dc3f77b6b5a5dce3b9ad69a8250a891760a548df850a5e8da87f3"},
|
{file = "propcache-0.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0d0fda578d1dc3f77b6b5a5dce3b9ad69a8250a891760a548df850a5e8da87f3"},
|
||||||
|
|
@ -3777,6 +4129,66 @@ files = [
|
||||||
packaging = ">=21.3"
|
packaging = ">=21.3"
|
||||||
Pillow = ">=8.0.0"
|
Pillow = ">=8.0.0"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pytest"
|
||||||
|
version = "8.4.2"
|
||||||
|
description = "pytest: simple powerful testing with Python"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.9"
|
||||||
|
groups = ["dev"]
|
||||||
|
files = [
|
||||||
|
{file = "pytest-8.4.2-py3-none-any.whl", hash = "sha256:872f880de3fc3a5bdc88a11b39c9710c3497a547cfa9320bc3c5e62fbf272e79"},
|
||||||
|
{file = "pytest-8.4.2.tar.gz", hash = "sha256:86c0d0b93306b961d58d62a4db4879f27fe25513d4b969df351abdddb3c30e01"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
colorama = {version = ">=0.4", markers = "sys_platform == \"win32\""}
|
||||||
|
iniconfig = ">=1"
|
||||||
|
packaging = ">=20"
|
||||||
|
pluggy = ">=1.5,<2"
|
||||||
|
pygments = ">=2.7.2"
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "requests", "setuptools", "xmlschema"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pytest-asyncio"
|
||||||
|
version = "0.23.8"
|
||||||
|
description = "Pytest support for asyncio"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.8"
|
||||||
|
groups = ["dev"]
|
||||||
|
files = [
|
||||||
|
{file = "pytest_asyncio-0.23.8-py3-none-any.whl", hash = "sha256:50265d892689a5faefb84df80819d1ecef566eb3549cf915dfb33569359d1ce2"},
|
||||||
|
{file = "pytest_asyncio-0.23.8.tar.gz", hash = "sha256:759b10b33a6dc61cce40a8bd5205e302978bbbcc00e279a8b61d9a6a3c82e4d3"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
pytest = ">=7.0.0,<9"
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
docs = ["sphinx (>=5.3)", "sphinx-rtd-theme (>=1.0)"]
|
||||||
|
testing = ["coverage (>=6.2)", "hypothesis (>=5.7.1)"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pytest-cov"
|
||||||
|
version = "4.1.0"
|
||||||
|
description = "Pytest plugin for measuring coverage."
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.7"
|
||||||
|
groups = ["dev"]
|
||||||
|
files = [
|
||||||
|
{file = "pytest-cov-4.1.0.tar.gz", hash = "sha256:3904b13dfbfec47f003b8e77fd5b589cd11904a21ddf1ab38a64f204d6a10ef6"},
|
||||||
|
{file = "pytest_cov-4.1.0-py3-none-any.whl", hash = "sha256:6ba70b9e97e69fcc3fb45bfeab2d0a138fb65c4d0d6a41ef33983ad114be8c3a"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
coverage = {version = ">=5.2.1", extras = ["toml"]}
|
||||||
|
pytest = ">=4.6"
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
testing = ["fields", "hunter", "process-tests", "pytest-xdist", "six", "virtualenv"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "python-dateutil"
|
name = "python-dateutil"
|
||||||
version = "2.9.0.post0"
|
version = "2.9.0.post0"
|
||||||
|
|
@ -4069,7 +4481,7 @@ version = "6.2.0"
|
||||||
description = "Python client for Redis database and key-value store"
|
description = "Python client for Redis database and key-value store"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.9"
|
python-versions = ">=3.9"
|
||||||
groups = ["main"]
|
groups = ["main", "dev"]
|
||||||
files = [
|
files = [
|
||||||
{file = "redis-6.2.0-py3-none-any.whl", hash = "sha256:c8ddf316ee0aab65f04a11229e94a64b2618451dab7a67cb2f77eb799d872d5e"},
|
{file = "redis-6.2.0-py3-none-any.whl", hash = "sha256:c8ddf316ee0aab65f04a11229e94a64b2618451dab7a67cb2f77eb799d872d5e"},
|
||||||
{file = "redis-6.2.0.tar.gz", hash = "sha256:e821f129b75dde6cb99dd35e5c76e8c49512a5a0d8dfdc560b2fbd44b85ca977"},
|
{file = "redis-6.2.0.tar.gz", hash = "sha256:e821f129b75dde6cb99dd35e5c76e8c49512a5a0d8dfdc560b2fbd44b85ca977"},
|
||||||
|
|
@ -4603,6 +5015,18 @@ files = [
|
||||||
{file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"},
|
{file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "sortedcontainers"
|
||||||
|
version = "2.4.0"
|
||||||
|
description = "Sorted Containers -- Sorted List, Sorted Dict, Sorted Set"
|
||||||
|
optional = false
|
||||||
|
python-versions = "*"
|
||||||
|
groups = ["dev"]
|
||||||
|
files = [
|
||||||
|
{file = "sortedcontainers-2.4.0-py2.py3-none-any.whl", hash = "sha256:a163dcaede0f1c021485e957a39245190e74249897e2ae4b2aa38595db237ee0"},
|
||||||
|
{file = "sortedcontainers-2.4.0.tar.gz", hash = "sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88"},
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "soupsieve"
|
name = "soupsieve"
|
||||||
version = "2.8"
|
version = "2.8"
|
||||||
|
|
@ -5005,6 +5429,37 @@ rich = ">=10.11.0"
|
||||||
shellingham = ">=1.3.0"
|
shellingham = ">=1.3.0"
|
||||||
typing-extensions = ">=3.7.4.3"
|
typing-extensions = ">=3.7.4.3"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "types-cffi"
|
||||||
|
version = "1.17.0.20250915"
|
||||||
|
description = "Typing stubs for cffi"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.9"
|
||||||
|
groups = ["dev"]
|
||||||
|
files = [
|
||||||
|
{file = "types_cffi-1.17.0.20250915-py3-none-any.whl", hash = "sha256:cef4af1116c83359c11bb4269283c50f0688e9fc1d7f0eeb390f3661546da52c"},
|
||||||
|
{file = "types_cffi-1.17.0.20250915.tar.gz", hash = "sha256:4362e20368f78dabd5c56bca8004752cc890e07a71605d9e0d9e069dbaac8c06"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
types-setuptools = "*"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "types-pyopenssl"
|
||||||
|
version = "24.1.0.20240722"
|
||||||
|
description = "Typing stubs for pyOpenSSL"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.8"
|
||||||
|
groups = ["dev"]
|
||||||
|
files = [
|
||||||
|
{file = "types-pyOpenSSL-24.1.0.20240722.tar.gz", hash = "sha256:47913b4678a01d879f503a12044468221ed8576263c1540dcb0484ca21b08c39"},
|
||||||
|
{file = "types_pyOpenSSL-24.1.0.20240722-py3-none-any.whl", hash = "sha256:6a7a5d2ec042537934cfb4c9d4deb0e16c4c6250b09358df1f083682fe6fda54"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
cryptography = ">=35.0.0"
|
||||||
|
types-cffi = "*"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "types-python-dateutil"
|
name = "types-python-dateutil"
|
||||||
version = "2.9.0.20250822"
|
version = "2.9.0.20250822"
|
||||||
|
|
@ -5017,6 +5472,49 @@ files = [
|
||||||
{file = "types_python_dateutil-2.9.0.20250822.tar.gz", hash = "sha256:84c92c34bd8e68b117bff742bc00b692a1e8531262d4507b33afcc9f7716cd53"},
|
{file = "types_python_dateutil-2.9.0.20250822.tar.gz", hash = "sha256:84c92c34bd8e68b117bff742bc00b692a1e8531262d4507b33afcc9f7716cd53"},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "types-redis"
|
||||||
|
version = "4.6.0.20241004"
|
||||||
|
description = "Typing stubs for redis"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.8"
|
||||||
|
groups = ["dev"]
|
||||||
|
files = [
|
||||||
|
{file = "types-redis-4.6.0.20241004.tar.gz", hash = "sha256:5f17d2b3f9091ab75384153bfa276619ffa1cf6a38da60e10d5e6749cc5b902e"},
|
||||||
|
{file = "types_redis-4.6.0.20241004-py3-none-any.whl", hash = "sha256:ef5da68cb827e5f606c8f9c0b49eeee4c2669d6d97122f301d3a55dc6a63f6ed"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
cryptography = ">=35.0.0"
|
||||||
|
types-pyOpenSSL = "*"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "types-requests"
|
||||||
|
version = "2.32.4.20260107"
|
||||||
|
description = "Typing stubs for requests"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.9"
|
||||||
|
groups = ["dev"]
|
||||||
|
files = [
|
||||||
|
{file = "types_requests-2.32.4.20260107-py3-none-any.whl", hash = "sha256:b703fe72f8ce5b31ef031264fe9395cac8f46a04661a79f7ed31a80fb308730d"},
|
||||||
|
{file = "types_requests-2.32.4.20260107.tar.gz", hash = "sha256:018a11ac158f801bfa84857ddec1650750e393df8a004a8a9ae2a9bec6fcb24f"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
urllib3 = ">=2"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "types-setuptools"
|
||||||
|
version = "80.10.0.20260124"
|
||||||
|
description = "Typing stubs for setuptools"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.9"
|
||||||
|
groups = ["dev"]
|
||||||
|
files = [
|
||||||
|
{file = "types_setuptools-80.10.0.20260124-py3-none-any.whl", hash = "sha256:efed7e044f01adb9c2806c7a8e1b6aa3656b8e382379b53d5f26ee3db24d4c01"},
|
||||||
|
{file = "types_setuptools-80.10.0.20260124.tar.gz", hash = "sha256:1b86d9f0368858663276a0cbe5fe5a9722caf94b5acde8aba0399a6e90680f20"},
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "typing-extensions"
|
name = "typing-extensions"
|
||||||
version = "4.14.0"
|
version = "4.14.0"
|
||||||
|
|
@ -5569,7 +6067,7 @@ version = "1.20.1"
|
||||||
description = "Yet another URL library"
|
description = "Yet another URL library"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.9"
|
python-versions = ">=3.9"
|
||||||
groups = ["main"]
|
groups = ["main", "dev"]
|
||||||
files = [
|
files = [
|
||||||
{file = "yarl-1.20.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:6032e6da6abd41e4acda34d75a816012717000fa6839f37124a47fcefc49bec4"},
|
{file = "yarl-1.20.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:6032e6da6abd41e4acda34d75a816012717000fa6839f37124a47fcefc49bec4"},
|
||||||
{file = "yarl-1.20.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2c7b34d804b8cf9b214f05015c4fee2ebe7ed05cf581e7192c06555c71f4446a"},
|
{file = "yarl-1.20.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2c7b34d804b8cf9b214f05015c4fee2ebe7ed05cf581e7192c06555c71f4446a"},
|
||||||
|
|
@ -5705,4 +6203,4 @@ type = ["pytest-mypy"]
|
||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "2.1"
|
lock-version = "2.1"
|
||||||
python-versions = ">3.11"
|
python-versions = ">3.11"
|
||||||
content-hash = "5f53cec7fc3cc93d494341e9fd6562076c1a8952f83075f671a3507c50fcb334"
|
content-hash = "10a74594d9f695ab1077ff992bcd012b93b174b25c3f2ca681d6308653abbd14"
|
||||||
|
|
|
||||||
|
|
@ -20,6 +20,7 @@ matplotlib = "^3.10.0"
|
||||||
opencv-python = "^4.11.0.86"
|
opencv-python = "^4.11.0.86"
|
||||||
click = "^8.2.0"
|
click = "^8.2.0"
|
||||||
aiohttp = "^3.11.18"
|
aiohttp = "^3.11.18"
|
||||||
|
aiohttp-socks = "^0.8.4"
|
||||||
sqlmodel = "^0.0.24"
|
sqlmodel = "^0.0.24"
|
||||||
alembic = "^1.16.1"
|
alembic = "^1.16.1"
|
||||||
sqlalchemy = {extras = ["asyncio"], version = "^2.0.41"}
|
sqlalchemy = {extras = ["asyncio"], version = "^2.0.41"}
|
||||||
|
|
@ -42,6 +43,15 @@ mysqlclient = "^2.2.7"
|
||||||
ipdb = "^0.13.13"
|
ipdb = "^0.13.13"
|
||||||
jupyterlab = "^4.4.7"
|
jupyterlab = "^4.4.7"
|
||||||
podman-compose = "^1.5.0"
|
podman-compose = "^1.5.0"
|
||||||
|
pytest = "^8.0.0"
|
||||||
|
pytest-asyncio = "^0.23.0"
|
||||||
|
pytest-cov = "^4.1.0"
|
||||||
|
httpx = "^0.27.0"
|
||||||
|
aioresponses = "^0.7.6"
|
||||||
|
fakeredis = "^2.21.0"
|
||||||
|
mypy = "^1.8.0"
|
||||||
|
types-requests = "^2.31.0"
|
||||||
|
types-redis = "^4.6.0"
|
||||||
|
|
||||||
[build-system]
|
[build-system]
|
||||||
requires = ["poetry-core>=1.0.0"]
|
requires = ["poetry-core>=1.0.0"]
|
||||||
|
|
@ -53,3 +63,22 @@ lint.ignore = [
|
||||||
"E741", # Ambigious name
|
"E741", # Ambigious name
|
||||||
]
|
]
|
||||||
exclude = ["*.ipynb"]
|
exclude = ["*.ipynb"]
|
||||||
|
|
||||||
|
[tool.pytest.ini_options]
|
||||||
|
asyncio_mode = "auto"
|
||||||
|
testpaths = ["tests"]
|
||||||
|
asyncio_default_fixture_loop_scope = "function"
|
||||||
|
|
||||||
|
[tool.mypy]
|
||||||
|
python_version = "3.11"
|
||||||
|
warn_return_any = true
|
||||||
|
warn_unused_ignores = true
|
||||||
|
disallow_untyped_defs = true
|
||||||
|
disallow_incomplete_defs = true
|
||||||
|
check_untyped_defs = true
|
||||||
|
strict_optional = true
|
||||||
|
plugins = ["pydantic.mypy"]
|
||||||
|
|
||||||
|
[[tool.mypy.overrides]]
|
||||||
|
module = ["transformers.*", "pytesseract.*", "cv2.*", "celery.*", "tqdm.*", "aiohttp.*", "aiohttp_socks.*", "tenacity.*", "geopy.*", "pandas.*", "numpy.*", "PIL.*", "sqlmodel.*", "sqlalchemy.*", "alembic.*", "apprise.*", "opentelemetry.*"]
|
||||||
|
ignore_missing_imports = true
|
||||||
137
crawler/rec/circuit_breaker.py
Normal file
137
crawler/rec/circuit_breaker.py
Normal file
|
|
@ -0,0 +1,137 @@
|
||||||
|
"""Circuit breaker pattern for protecting against cascading failures."""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import enum
|
||||||
|
import logging
|
||||||
|
import time
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
from rec.exceptions import CircuitBreakerOpenError
|
||||||
|
|
||||||
|
logger = logging.getLogger("uvicorn.error")
|
||||||
|
|
||||||
|
|
||||||
|
class CircuitState(enum.Enum):
|
||||||
|
"""Circuit breaker states."""
|
||||||
|
|
||||||
|
CLOSED = "closed" # Normal operation
|
||||||
|
OPEN = "open" # Too many failures, blocking requests
|
||||||
|
HALF_OPEN = "half_open" # Testing if service recovered
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class CircuitBreaker:
|
||||||
|
"""Circuit breaker for protecting against cascading failures.
|
||||||
|
|
||||||
|
Implements the circuit breaker pattern:
|
||||||
|
- CLOSED: Requests pass through normally, failures are counted
|
||||||
|
- OPEN: After N consecutive failures, circuit opens and blocks all requests
|
||||||
|
- HALF_OPEN: After recovery timeout, allow one request to test if service recovered
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
failure_threshold: Number of consecutive failures before opening.
|
||||||
|
recovery_timeout: Seconds to wait before attempting half-open state.
|
||||||
|
state: Current circuit state.
|
||||||
|
failure_count: Count of consecutive failures.
|
||||||
|
last_failure_time: Timestamp of last failure.
|
||||||
|
last_state_change: Timestamp of last state change.
|
||||||
|
"""
|
||||||
|
|
||||||
|
failure_threshold: int
|
||||||
|
recovery_timeout: float
|
||||||
|
state: CircuitState = CircuitState.CLOSED
|
||||||
|
failure_count: int = 0
|
||||||
|
last_failure_time: float = 0.0
|
||||||
|
last_state_change: float = 0.0
|
||||||
|
|
||||||
|
def __post_init__(self) -> None:
|
||||||
|
"""Initialize state change timestamp."""
|
||||||
|
self.last_state_change = time.time()
|
||||||
|
|
||||||
|
def call(self) -> None:
|
||||||
|
"""Check if a request should be allowed.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
CircuitBreakerOpenError: If circuit is open and blocking requests.
|
||||||
|
"""
|
||||||
|
current_time = time.time()
|
||||||
|
|
||||||
|
if self.state == CircuitState.OPEN:
|
||||||
|
# Check if we should transition to half-open
|
||||||
|
if current_time - self.last_failure_time >= self.recovery_timeout:
|
||||||
|
self._transition_to_half_open()
|
||||||
|
else:
|
||||||
|
raise CircuitBreakerOpenError(
|
||||||
|
f"Circuit breaker is open. "
|
||||||
|
f"Waiting {self.recovery_timeout - (current_time - self.last_failure_time):.1f}s "
|
||||||
|
f"before retry."
|
||||||
|
)
|
||||||
|
|
||||||
|
# Allow request to proceed (CLOSED or HALF_OPEN)
|
||||||
|
|
||||||
|
def record_success(self) -> None:
|
||||||
|
"""Record a successful request."""
|
||||||
|
if self.state == CircuitState.HALF_OPEN:
|
||||||
|
# Service has recovered, close the circuit
|
||||||
|
self._transition_to_closed()
|
||||||
|
|
||||||
|
# Reset failure count on success
|
||||||
|
self.failure_count = 0
|
||||||
|
|
||||||
|
def record_failure(self) -> None:
|
||||||
|
"""Record a failed request."""
|
||||||
|
self.failure_count += 1
|
||||||
|
self.last_failure_time = time.time()
|
||||||
|
|
||||||
|
if self.state == CircuitState.HALF_OPEN:
|
||||||
|
# Test request failed, reopen circuit
|
||||||
|
self._transition_to_open()
|
||||||
|
elif self.state == CircuitState.CLOSED:
|
||||||
|
# Check if we should open the circuit
|
||||||
|
if self.failure_count >= self.failure_threshold:
|
||||||
|
self._transition_to_open()
|
||||||
|
|
||||||
|
def _transition_to_open(self) -> None:
|
||||||
|
"""Transition to OPEN state."""
|
||||||
|
self.state = CircuitState.OPEN
|
||||||
|
self.last_state_change = time.time()
|
||||||
|
logger.warning(
|
||||||
|
f"Circuit breaker OPENED after {self.failure_count} consecutive failures. "
|
||||||
|
f"Will retry in {self.recovery_timeout}s"
|
||||||
|
)
|
||||||
|
|
||||||
|
def _transition_to_half_open(self) -> None:
|
||||||
|
"""Transition to HALF_OPEN state."""
|
||||||
|
self.state = CircuitState.HALF_OPEN
|
||||||
|
self.last_state_change = time.time()
|
||||||
|
logger.info("Circuit breaker entering HALF_OPEN state, testing service recovery")
|
||||||
|
|
||||||
|
def _transition_to_closed(self) -> None:
|
||||||
|
"""Transition to CLOSED state."""
|
||||||
|
self.state = CircuitState.CLOSED
|
||||||
|
self.last_state_change = time.time()
|
||||||
|
self.failure_count = 0
|
||||||
|
logger.info("Circuit breaker CLOSED, service recovered")
|
||||||
|
|
||||||
|
def reset(self) -> None:
|
||||||
|
"""Manually reset the circuit breaker to CLOSED state."""
|
||||||
|
self.state = CircuitState.CLOSED
|
||||||
|
self.failure_count = 0
|
||||||
|
self.last_failure_time = 0.0
|
||||||
|
self.last_state_change = time.time()
|
||||||
|
logger.info("Circuit breaker manually reset to CLOSED state")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_open(self) -> bool:
|
||||||
|
"""Check if circuit is currently open."""
|
||||||
|
return self.state == CircuitState.OPEN
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_closed(self) -> bool:
|
||||||
|
"""Check if circuit is currently closed."""
|
||||||
|
return self.state == CircuitState.CLOSED
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_half_open(self) -> bool:
|
||||||
|
"""Check if circuit is currently half-open."""
|
||||||
|
return self.state == CircuitState.HALF_OPEN
|
||||||
74
crawler/rec/exceptions.py
Normal file
74
crawler/rec/exceptions.py
Normal file
|
|
@ -0,0 +1,74 @@
|
||||||
|
"""Custom exceptions for Rightmove API errors."""
|
||||||
|
|
||||||
|
|
||||||
|
class RightmoveAPIError(Exception):
|
||||||
|
"""Base exception for all Rightmove API errors."""
|
||||||
|
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class ThrottlingError(RightmoveAPIError):
|
||||||
|
"""Base exception for throttling-related errors.
|
||||||
|
|
||||||
|
Indicates that Rightmove is limiting our requests and we should back off.
|
||||||
|
"""
|
||||||
|
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class RateLimitError(ThrottlingError):
|
||||||
|
"""HTTP 429 - Too Many Requests.
|
||||||
|
|
||||||
|
Rightmove is explicitly rate limiting our requests.
|
||||||
|
"""
|
||||||
|
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class ServiceUnavailableError(ThrottlingError):
|
||||||
|
"""HTTP 503 - Service Unavailable.
|
||||||
|
|
||||||
|
Rightmove's service is temporarily unavailable, possibly due to overload.
|
||||||
|
"""
|
||||||
|
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class IPBlockedError(ThrottlingError):
|
||||||
|
"""HTTP 403 - Forbidden (IP blocked).
|
||||||
|
|
||||||
|
Our IP may be blocked or blacklisted by Rightmove.
|
||||||
|
"""
|
||||||
|
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class SlowResponseError(ThrottlingError):
|
||||||
|
"""Response time exceeded threshold.
|
||||||
|
|
||||||
|
API is responding very slowly, indicating potential throttling or overload.
|
||||||
|
"""
|
||||||
|
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class UnexpectedEmptyResponseError(RightmoveAPIError):
|
||||||
|
"""Empty response received when data was expected."""
|
||||||
|
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class InvalidResponseError(RightmoveAPIError):
|
||||||
|
"""Response contains error messages or invalid data."""
|
||||||
|
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class CircuitBreakerOpenError(RightmoveAPIError):
|
||||||
|
"""Circuit breaker is open, requests are being blocked.
|
||||||
|
|
||||||
|
The circuit breaker has detected too many failures and is preventing
|
||||||
|
further requests to allow the service to recover.
|
||||||
|
"""
|
||||||
|
|
||||||
|
pass
|
||||||
|
|
@ -1,16 +1,38 @@
|
||||||
import enum
|
import enum
|
||||||
|
import logging
|
||||||
|
import time
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
from contextlib import asynccontextmanager
|
||||||
|
from collections.abc import AsyncIterator
|
||||||
|
|
||||||
import aiohttp
|
import aiohttp
|
||||||
from models.listing import FurnishType, ListingType
|
from models.listing import FurnishType, ListingType
|
||||||
from rec import districts
|
from rec import districts
|
||||||
from tenacity import retry, stop_after_attempt, wait_random
|
from rec.exceptions import (
|
||||||
|
CircuitBreakerOpenError,
|
||||||
|
ThrottlingError,
|
||||||
|
)
|
||||||
|
from rec.throttle_detector import get_throttle_metrics, validate_response
|
||||||
|
from rec.circuit_breaker import CircuitBreaker
|
||||||
|
from tenacity import (
|
||||||
|
retry,
|
||||||
|
retry_if_exception_type,
|
||||||
|
stop_after_attempt,
|
||||||
|
wait_exponential,
|
||||||
|
wait_random,
|
||||||
|
)
|
||||||
|
from config.scraper_config import ScraperConfig
|
||||||
|
|
||||||
|
logger = logging.getLogger("uvicorn.error")
|
||||||
|
|
||||||
|
# Global circuit breaker instance
|
||||||
|
_circuit_breaker: CircuitBreaker | None = None
|
||||||
|
|
||||||
|
|
||||||
headers = {
|
DEFAULT_HEADERS = {
|
||||||
"Host": "api.rightmove.co.uk",
|
"Host": "api.rightmove.co.uk",
|
||||||
# 'Accept-Encoding': 'gzip, deflate, br',
|
"User-Agent": "okhttp/4.12.0",
|
||||||
"User-Agent": "okhttp/4.10.0",
|
"Connection": "keep-alive",
|
||||||
"Connection": "close",
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -24,24 +46,171 @@ class PropertyType(enum.StrEnum):
|
||||||
TERRACED = "terraced"
|
TERRACED = "terraced"
|
||||||
|
|
||||||
|
|
||||||
@retry(wait=wait_random(min=1, max=2), stop=stop_after_attempt(3))
|
@asynccontextmanager
|
||||||
async def detail_query(detail_id: int) -> dict[str, Any]:
|
async def create_session(
|
||||||
|
config: ScraperConfig | None = None,
|
||||||
|
) -> AsyncIterator[aiohttp.ClientSession]:
|
||||||
|
"""Create an aiohttp session with optional proxy support.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
config: Scraper configuration. Loads from environment if not provided.
|
||||||
|
|
||||||
|
Yields:
|
||||||
|
Configured aiohttp ClientSession.
|
||||||
|
"""
|
||||||
|
if config is None:
|
||||||
|
config = ScraperConfig.from_env()
|
||||||
|
|
||||||
|
connector = None
|
||||||
|
if config.proxy_url:
|
||||||
|
try:
|
||||||
|
from aiohttp_socks import ProxyConnector
|
||||||
|
|
||||||
|
connector = ProxyConnector.from_url(config.proxy_url)
|
||||||
|
except ImportError:
|
||||||
|
raise ImportError(
|
||||||
|
"aiohttp-socks is required for proxy support. "
|
||||||
|
"Install with: pip install aiohttp-socks"
|
||||||
|
)
|
||||||
|
|
||||||
|
session = aiohttp.ClientSession(
|
||||||
|
trust_env=True,
|
||||||
|
connector=connector,
|
||||||
|
headers=DEFAULT_HEADERS,
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
yield session
|
||||||
|
finally:
|
||||||
|
await session.close()
|
||||||
|
|
||||||
|
|
||||||
|
def get_circuit_breaker(config: ScraperConfig | None = None) -> CircuitBreaker | None:
|
||||||
|
"""Get the global circuit breaker instance.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
config: Configuration for initializing the circuit breaker.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
CircuitBreaker instance if enabled, None otherwise.
|
||||||
|
"""
|
||||||
|
global _circuit_breaker
|
||||||
|
if config is None:
|
||||||
|
config = ScraperConfig.from_env()
|
||||||
|
|
||||||
|
if not config.enable_circuit_breaker:
|
||||||
|
return None
|
||||||
|
|
||||||
|
if _circuit_breaker is None:
|
||||||
|
_circuit_breaker = CircuitBreaker(
|
||||||
|
failure_threshold=config.circuit_breaker_failure_threshold,
|
||||||
|
recovery_timeout=config.circuit_breaker_recovery_timeout,
|
||||||
|
)
|
||||||
|
return _circuit_breaker
|
||||||
|
|
||||||
|
|
||||||
|
def reset_circuit_breaker() -> None:
|
||||||
|
"""Reset the global circuit breaker."""
|
||||||
|
global _circuit_breaker
|
||||||
|
if _circuit_breaker is not None:
|
||||||
|
_circuit_breaker.reset()
|
||||||
|
|
||||||
|
|
||||||
|
def check_circuit_breaker(config: ScraperConfig | None = None) -> None:
|
||||||
|
"""Check if the circuit breaker allows requests.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
config: Configuration for the circuit breaker.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
CircuitBreakerOpenError: If the circuit is open.
|
||||||
|
"""
|
||||||
|
cb = get_circuit_breaker(config)
|
||||||
|
if cb is not None:
|
||||||
|
cb.call()
|
||||||
|
|
||||||
|
|
||||||
|
@retry(
|
||||||
|
retry=retry_if_exception_type(ThrottlingError),
|
||||||
|
wait=wait_exponential(multiplier=2, min=2, max=120),
|
||||||
|
stop=stop_after_attempt(5),
|
||||||
|
)
|
||||||
|
async def detail_query(
|
||||||
|
detail_id: int,
|
||||||
|
session: aiohttp.ClientSession | None = None,
|
||||||
|
config: ScraperConfig | None = None,
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
"""Fetch detailed property information.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
detail_id: The property identifier.
|
||||||
|
session: Optional aiohttp session. Creates new one if not provided.
|
||||||
|
config: Scraper configuration. Loads from environment if not provided.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Property details as a dictionary.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
CircuitBreakerOpenError: If the circuit breaker is open.
|
||||||
|
ThrottlingError: If the request is throttled.
|
||||||
|
"""
|
||||||
|
if config is None:
|
||||||
|
config = ScraperConfig.from_env()
|
||||||
|
|
||||||
|
check_circuit_breaker(config)
|
||||||
|
cb = get_circuit_breaker(config)
|
||||||
|
|
||||||
params = {
|
params = {
|
||||||
"apiApplication": "ANDROID",
|
"apiApplication": "ANDROID",
|
||||||
"appVersion": "3.70.0",
|
"appVersion": "3.70.0",
|
||||||
}
|
}
|
||||||
url = f"https://api.rightmove.co.uk/api/property/{detail_id}"
|
url = f"https://api.rightmove.co.uk/api/property/{detail_id}"
|
||||||
async with aiohttp.ClientSession(trust_env=True) as session:
|
|
||||||
async with session.get(url, params=params, headers=headers) as response:
|
async def do_request(s: aiohttp.ClientSession) -> dict[str, Any]:
|
||||||
|
start_time = time.time()
|
||||||
|
try:
|
||||||
|
async with s.get(url, params=params, headers=DEFAULT_HEADERS) as response:
|
||||||
|
response_time = time.time() - start_time
|
||||||
|
body = await response.json() if response.status == 200 else None
|
||||||
|
|
||||||
|
# Validate response for throttling
|
||||||
|
validate_response(
|
||||||
|
response,
|
||||||
|
response_time,
|
||||||
|
body,
|
||||||
|
config.slow_response_threshold,
|
||||||
|
expect_data=True,
|
||||||
|
)
|
||||||
|
|
||||||
if response.status != 200:
|
if response.status != 200:
|
||||||
raise Exception(
|
raise Exception(
|
||||||
f"""id: {detail_id}. Status Code: {response.status}."""
|
f"""id: {detail_id}. Status Code: {response.status}."""
|
||||||
f"""Failed due to: {await response.text()}"""
|
f"""Failed due to: {await response.text()}"""
|
||||||
)
|
)
|
||||||
return await response.json()
|
|
||||||
|
if cb is not None:
|
||||||
|
cb.record_success()
|
||||||
|
return body # type: ignore
|
||||||
|
except ThrottlingError:
|
||||||
|
if cb is not None:
|
||||||
|
cb.record_failure()
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
if cb is not None:
|
||||||
|
cb.record_failure()
|
||||||
|
raise e
|
||||||
|
|
||||||
|
if session:
|
||||||
|
return await do_request(session)
|
||||||
|
else:
|
||||||
|
async with aiohttp.ClientSession(trust_env=True) as new_session:
|
||||||
|
return await do_request(new_session)
|
||||||
|
|
||||||
|
|
||||||
@retry(wait=wait_random(min=1, max=60), stop=stop_after_attempt(3))
|
@retry(
|
||||||
|
retry=retry_if_exception_type(ThrottlingError),
|
||||||
|
wait=wait_exponential(multiplier=2, min=2, max=120),
|
||||||
|
stop=stop_after_attempt(5),
|
||||||
|
)
|
||||||
async def listing_query(
|
async def listing_query(
|
||||||
*,
|
*,
|
||||||
page: int,
|
page: int,
|
||||||
|
|
@ -57,7 +226,41 @@ async def listing_query(
|
||||||
property_type: list[PropertyType] = [],
|
property_type: list[PropertyType] = [],
|
||||||
page_size: int = 25,
|
page_size: int = 25,
|
||||||
furnish_types: list[FurnishType] = [],
|
furnish_types: list[FurnishType] = [],
|
||||||
|
session: aiohttp.ClientSession | None = None,
|
||||||
|
config: ScraperConfig | None = None,
|
||||||
) -> dict[str, Any]:
|
) -> dict[str, Any]:
|
||||||
|
"""Execute a listing search query.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
page: Page number to fetch (1-indexed).
|
||||||
|
channel: Listing type (BUY or RENT).
|
||||||
|
min_bedrooms: Minimum number of bedrooms.
|
||||||
|
max_bedrooms: Maximum number of bedrooms.
|
||||||
|
radius: Search radius.
|
||||||
|
min_price: Minimum price.
|
||||||
|
max_price: Maximum price.
|
||||||
|
district: District identifier string.
|
||||||
|
mustNewHome: Filter for new homes only (BUY only).
|
||||||
|
max_days_since_added: Maximum days since listing was added (BUY only).
|
||||||
|
property_type: List of property types to filter (BUY only).
|
||||||
|
page_size: Number of results per page (default 25).
|
||||||
|
furnish_types: List of furnish types to filter (RENT only).
|
||||||
|
session: Optional aiohttp session. Creates new one if not provided.
|
||||||
|
config: Scraper configuration. Loads from environment if not provided.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
API response as a dictionary.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
CircuitBreakerOpenError: If the circuit breaker is open.
|
||||||
|
ThrottlingError: If the request is throttled.
|
||||||
|
"""
|
||||||
|
if config is None:
|
||||||
|
config = ScraperConfig.from_env()
|
||||||
|
|
||||||
|
check_circuit_breaker(config)
|
||||||
|
cb = get_circuit_breaker(config)
|
||||||
|
|
||||||
params: dict[str, str] = {
|
params: dict[str, str] = {
|
||||||
"locationIdentifier": districts.get_districts()[district],
|
"locationIdentifier": districts.get_districts()[district],
|
||||||
"channel": str(channel).upper(),
|
"channel": str(channel).upper(),
|
||||||
|
|
@ -95,19 +298,171 @@ async def listing_query(
|
||||||
if furnish_types:
|
if furnish_types:
|
||||||
params["furnishTypes"] = ",".join(furnish_types)
|
params["furnishTypes"] = ",".join(furnish_types)
|
||||||
|
|
||||||
headers = {
|
request_headers = {
|
||||||
"Host": "api.rightmove.co.uk",
|
"Host": "api.rightmove.co.uk",
|
||||||
"Accept-Encoding": "gzip, deflate, br",
|
"Accept-Encoding": "gzip, deflate, br",
|
||||||
"User-Agent": "okhttp/4.12.0",
|
"User-Agent": "okhttp/4.12.0",
|
||||||
"Connection": "keep-alive",
|
"Connection": "keep-alive",
|
||||||
}
|
}
|
||||||
|
|
||||||
async with aiohttp.ClientSession(trust_env=True) as session:
|
async def do_request(s: aiohttp.ClientSession) -> dict[str, Any]:
|
||||||
|
start_time = time.time()
|
||||||
|
try:
|
||||||
|
async with s.get(
|
||||||
|
"https://api.rightmove.co.uk/api/property-listing",
|
||||||
|
params=params,
|
||||||
|
headers=request_headers,
|
||||||
|
) as response:
|
||||||
|
response_time = time.time() - start_time
|
||||||
|
body = await response.json() if response.status == 200 else None
|
||||||
|
|
||||||
|
# Validate response for throttling
|
||||||
|
validate_response(
|
||||||
|
response,
|
||||||
|
response_time,
|
||||||
|
body,
|
||||||
|
config.slow_response_threshold,
|
||||||
|
expect_data=(page == 1), # Only expect data on first page
|
||||||
|
)
|
||||||
|
|
||||||
|
if response.status != 200:
|
||||||
|
raise Exception(f"Failed due to: {await response.text()}")
|
||||||
|
|
||||||
|
if cb is not None:
|
||||||
|
cb.record_success()
|
||||||
|
return body # type: ignore
|
||||||
|
except ThrottlingError:
|
||||||
|
if cb is not None:
|
||||||
|
cb.record_failure()
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
if cb is not None:
|
||||||
|
cb.record_failure()
|
||||||
|
raise e
|
||||||
|
|
||||||
|
if session:
|
||||||
|
return await do_request(session)
|
||||||
|
else:
|
||||||
|
async with aiohttp.ClientSession(trust_env=True) as new_session:
|
||||||
|
return await do_request(new_session)
|
||||||
|
|
||||||
|
|
||||||
|
@retry(
|
||||||
|
retry=retry_if_exception_type(ThrottlingError),
|
||||||
|
wait=wait_exponential(multiplier=2, min=2, max=60),
|
||||||
|
stop=stop_after_attempt(5),
|
||||||
|
)
|
||||||
|
async def probe_query(
|
||||||
|
*,
|
||||||
|
session: aiohttp.ClientSession,
|
||||||
|
channel: ListingType,
|
||||||
|
min_bedrooms: int,
|
||||||
|
max_bedrooms: int,
|
||||||
|
radius: float,
|
||||||
|
min_price: int,
|
||||||
|
max_price: int,
|
||||||
|
district: str,
|
||||||
|
max_days_since_added: int = 30,
|
||||||
|
furnish_types: list[FurnishType] = [],
|
||||||
|
config: ScraperConfig | None = None,
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
"""Probe the API to get result count without fetching full results.
|
||||||
|
|
||||||
|
Makes a minimal request (page_size=1) to efficiently get totalAvailableResults.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
session: aiohttp session for making requests.
|
||||||
|
channel: Listing type (BUY or RENT).
|
||||||
|
min_bedrooms: Minimum number of bedrooms.
|
||||||
|
max_bedrooms: Maximum number of bedrooms.
|
||||||
|
radius: Search radius.
|
||||||
|
min_price: Minimum price.
|
||||||
|
max_price: Maximum price.
|
||||||
|
district: District identifier string.
|
||||||
|
max_days_since_added: Maximum days since listing was added (BUY only).
|
||||||
|
furnish_types: List of furnish types to filter (RENT only).
|
||||||
|
config: Scraper configuration. Loads from environment if not provided.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
API response containing totalAvailableResults.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
CircuitBreakerOpenError: If the circuit breaker is open.
|
||||||
|
ThrottlingError: If the request is throttled.
|
||||||
|
"""
|
||||||
|
if config is None:
|
||||||
|
config = ScraperConfig.from_env()
|
||||||
|
|
||||||
|
check_circuit_breaker(config)
|
||||||
|
cb = get_circuit_breaker(config)
|
||||||
|
|
||||||
|
params: dict[str, str] = {
|
||||||
|
"locationIdentifier": districts.get_districts()[district],
|
||||||
|
"channel": str(channel).upper(),
|
||||||
|
"page": "1",
|
||||||
|
"numberOfPropertiesPerPage": "1", # Minimal page size for probing
|
||||||
|
"radius": str(radius),
|
||||||
|
"sortBy": "distance",
|
||||||
|
"includeUnavailableProperties": "false",
|
||||||
|
"minPrice": str(min_price),
|
||||||
|
"maxPrice": str(max_price),
|
||||||
|
"minBedrooms": str(min_bedrooms),
|
||||||
|
"maxBedrooms": str(max_bedrooms),
|
||||||
|
"apiApplication": "ANDROID",
|
||||||
|
"appVersion": "4.28.0",
|
||||||
|
}
|
||||||
|
|
||||||
|
if channel is ListingType.BUY:
|
||||||
|
params["dontShow"] = "sharedOwnership,retirement"
|
||||||
|
if max_days_since_added is not None and max_days_since_added in [
|
||||||
|
1,
|
||||||
|
3,
|
||||||
|
7,
|
||||||
|
14,
|
||||||
|
]:
|
||||||
|
params["maxDaysSinceAdded"] = str(max_days_since_added)
|
||||||
|
|
||||||
|
if channel is ListingType.RENT:
|
||||||
|
if furnish_types:
|
||||||
|
params["furnishTypes"] = ",".join(furnish_types)
|
||||||
|
|
||||||
|
request_headers = {
|
||||||
|
"Host": "api.rightmove.co.uk",
|
||||||
|
"Accept-Encoding": "gzip, deflate, br",
|
||||||
|
"User-Agent": "okhttp/4.12.0",
|
||||||
|
"Connection": "keep-alive",
|
||||||
|
}
|
||||||
|
|
||||||
|
start_time = time.time()
|
||||||
|
try:
|
||||||
async with session.get(
|
async with session.get(
|
||||||
"https://api.rightmove.co.uk/api/property-listing",
|
"https://api.rightmove.co.uk/api/property-listing",
|
||||||
params=params,
|
params=params,
|
||||||
headers=headers,
|
headers=request_headers,
|
||||||
) as response:
|
) as response:
|
||||||
|
response_time = time.time() - start_time
|
||||||
|
body = await response.json() if response.status == 200 else None
|
||||||
|
|
||||||
|
# Validate response for throttling
|
||||||
|
validate_response(
|
||||||
|
response,
|
||||||
|
response_time,
|
||||||
|
body,
|
||||||
|
config.slow_response_threshold,
|
||||||
|
expect_data=False, # Probe doesn't need data, just count
|
||||||
|
)
|
||||||
|
|
||||||
if response.status != 200:
|
if response.status != 200:
|
||||||
raise Exception(f"Failed due to: {await response.text()}")
|
raise Exception(f"Probe failed: {await response.text()}")
|
||||||
return await response.json()
|
|
||||||
|
if cb is not None:
|
||||||
|
cb.record_success()
|
||||||
|
return body # type: ignore
|
||||||
|
except ThrottlingError:
|
||||||
|
if cb is not None:
|
||||||
|
cb.record_failure()
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
if cb is not None:
|
||||||
|
cb.record_failure()
|
||||||
|
raise e
|
||||||
|
|
|
||||||
232
crawler/rec/throttle_detector.py
Normal file
232
crawler/rec/throttle_detector.py
Normal file
|
|
@ -0,0 +1,232 @@
|
||||||
|
"""Throttling detection and metrics for Rightmove API."""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import time
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import aiohttp
|
||||||
|
|
||||||
|
from rec.exceptions import (
|
||||||
|
InvalidResponseError,
|
||||||
|
IPBlockedError,
|
||||||
|
RateLimitError,
|
||||||
|
ServiceUnavailableError,
|
||||||
|
SlowResponseError,
|
||||||
|
UnexpectedEmptyResponseError,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ThrottleMetrics:
|
||||||
|
"""Tracks throttling events and metrics.
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
rate_limit_count: Number of HTTP 429 errors.
|
||||||
|
service_unavailable_count: Number of HTTP 503 errors.
|
||||||
|
ip_blocked_count: Number of HTTP 403 errors.
|
||||||
|
slow_response_count: Number of slow responses.
|
||||||
|
empty_response_count: Number of unexpected empty responses.
|
||||||
|
invalid_response_count: Number of invalid/error responses.
|
||||||
|
total_requests: Total number of requests made.
|
||||||
|
total_response_time: Cumulative response time in seconds.
|
||||||
|
"""
|
||||||
|
|
||||||
|
rate_limit_count: int = 0
|
||||||
|
service_unavailable_count: int = 0
|
||||||
|
ip_blocked_count: int = 0
|
||||||
|
slow_response_count: int = 0
|
||||||
|
empty_response_count: int = 0
|
||||||
|
invalid_response_count: int = 0
|
||||||
|
total_requests: int = 0
|
||||||
|
total_response_time: float = 0.0
|
||||||
|
_start_time: float = field(default_factory=time.time)
|
||||||
|
|
||||||
|
def record_rate_limit(self) -> None:
|
||||||
|
"""Record a rate limit error (HTTP 429)."""
|
||||||
|
self.rate_limit_count += 1
|
||||||
|
|
||||||
|
def record_service_unavailable(self) -> None:
|
||||||
|
"""Record a service unavailable error (HTTP 503)."""
|
||||||
|
self.service_unavailable_count += 1
|
||||||
|
|
||||||
|
def record_ip_blocked(self) -> None:
|
||||||
|
"""Record an IP blocked error (HTTP 403)."""
|
||||||
|
self.ip_blocked_count += 1
|
||||||
|
|
||||||
|
def record_slow_response(self, response_time: float) -> None:
|
||||||
|
"""Record a slow response.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
response_time: Response time in seconds.
|
||||||
|
"""
|
||||||
|
self.slow_response_count += 1
|
||||||
|
self.total_response_time += response_time
|
||||||
|
self.total_requests += 1
|
||||||
|
|
||||||
|
def record_empty_response(self) -> None:
|
||||||
|
"""Record an unexpected empty response."""
|
||||||
|
self.empty_response_count += 1
|
||||||
|
|
||||||
|
def record_invalid_response(self) -> None:
|
||||||
|
"""Record an invalid or error response."""
|
||||||
|
self.invalid_response_count += 1
|
||||||
|
|
||||||
|
def record_request(self, response_time: float) -> None:
|
||||||
|
"""Record a successful request.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
response_time: Response time in seconds.
|
||||||
|
"""
|
||||||
|
self.total_requests += 1
|
||||||
|
self.total_response_time += response_time
|
||||||
|
|
||||||
|
@property
|
||||||
|
def average_response_time(self) -> float:
|
||||||
|
"""Calculate average response time in seconds."""
|
||||||
|
if self.total_requests == 0:
|
||||||
|
return 0.0
|
||||||
|
return self.total_response_time / self.total_requests
|
||||||
|
|
||||||
|
@property
|
||||||
|
def total_throttling_events(self) -> int:
|
||||||
|
"""Total number of throttling events."""
|
||||||
|
return (
|
||||||
|
self.rate_limit_count
|
||||||
|
+ self.service_unavailable_count
|
||||||
|
+ self.ip_blocked_count
|
||||||
|
+ self.slow_response_count
|
||||||
|
)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def throttle_rate(self) -> float:
|
||||||
|
"""Percentage of requests that were throttled."""
|
||||||
|
if self.total_requests == 0:
|
||||||
|
return 0.0
|
||||||
|
return (self.total_throttling_events / self.total_requests) * 100
|
||||||
|
|
||||||
|
@property
|
||||||
|
def elapsed_time(self) -> float:
|
||||||
|
"""Time elapsed since metrics started tracking."""
|
||||||
|
return time.time() - self._start_time
|
||||||
|
|
||||||
|
def summary(self) -> str:
|
||||||
|
"""Generate a summary of throttling metrics."""
|
||||||
|
return (
|
||||||
|
f"Throttle Metrics Summary:\n"
|
||||||
|
f" Total Requests: {self.total_requests}\n"
|
||||||
|
f" Total Throttling Events: {self.total_throttling_events}\n"
|
||||||
|
f" Throttle Rate: {self.throttle_rate:.2f}%\n"
|
||||||
|
f" Rate Limit (429): {self.rate_limit_count}\n"
|
||||||
|
f" Service Unavailable (503): {self.service_unavailable_count}\n"
|
||||||
|
f" IP Blocked (403): {self.ip_blocked_count}\n"
|
||||||
|
f" Slow Responses: {self.slow_response_count}\n"
|
||||||
|
f" Empty Responses: {self.empty_response_count}\n"
|
||||||
|
f" Invalid Responses: {self.invalid_response_count}\n"
|
||||||
|
f" Average Response Time: {self.average_response_time:.2f}s\n"
|
||||||
|
f" Elapsed Time: {self.elapsed_time:.2f}s"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# Global metrics instance
|
||||||
|
_global_metrics: ThrottleMetrics | None = None
|
||||||
|
|
||||||
|
|
||||||
|
def get_throttle_metrics() -> ThrottleMetrics:
|
||||||
|
"""Get the global throttle metrics instance.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Global ThrottleMetrics instance.
|
||||||
|
"""
|
||||||
|
global _global_metrics
|
||||||
|
if _global_metrics is None:
|
||||||
|
_global_metrics = ThrottleMetrics()
|
||||||
|
return _global_metrics
|
||||||
|
|
||||||
|
|
||||||
|
def reset_throttle_metrics() -> None:
|
||||||
|
"""Reset the global throttle metrics."""
|
||||||
|
global _global_metrics
|
||||||
|
_global_metrics = ThrottleMetrics()
|
||||||
|
|
||||||
|
|
||||||
|
def validate_response(
|
||||||
|
response: aiohttp.ClientResponse,
|
||||||
|
response_time: float,
|
||||||
|
response_body: dict[str, Any] | None,
|
||||||
|
slow_response_threshold: float,
|
||||||
|
expect_data: bool = True,
|
||||||
|
) -> None:
|
||||||
|
"""Validate an API response and raise appropriate exceptions for throttling.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
response: The aiohttp response object.
|
||||||
|
response_time: Time taken for the request in seconds.
|
||||||
|
response_body: Parsed JSON response body (if available).
|
||||||
|
slow_response_threshold: Threshold in seconds for slow responses.
|
||||||
|
expect_data: Whether we expect data in the response.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
RateLimitError: If HTTP 429 is returned.
|
||||||
|
ServiceUnavailableError: If HTTP 503 is returned.
|
||||||
|
IPBlockedError: If HTTP 403 is returned.
|
||||||
|
SlowResponseError: If response time exceeds threshold.
|
||||||
|
UnexpectedEmptyResponseError: If response is empty when data is expected.
|
||||||
|
InvalidResponseError: If response contains error messages.
|
||||||
|
"""
|
||||||
|
metrics = get_throttle_metrics()
|
||||||
|
|
||||||
|
# Check HTTP status codes
|
||||||
|
if response.status == 429:
|
||||||
|
metrics.record_rate_limit()
|
||||||
|
raise RateLimitError(
|
||||||
|
f"Rate limit exceeded (HTTP 429). "
|
||||||
|
f"Response time: {response_time:.2f}s"
|
||||||
|
)
|
||||||
|
|
||||||
|
if response.status == 503:
|
||||||
|
metrics.record_service_unavailable()
|
||||||
|
raise ServiceUnavailableError(
|
||||||
|
f"Service unavailable (HTTP 503). "
|
||||||
|
f"Response time: {response_time:.2f}s"
|
||||||
|
)
|
||||||
|
|
||||||
|
if response.status == 403:
|
||||||
|
metrics.record_ip_blocked()
|
||||||
|
raise IPBlockedError(
|
||||||
|
f"Access forbidden, possible IP block (HTTP 403). "
|
||||||
|
f"Response time: {response_time:.2f}s"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Check response time
|
||||||
|
if response_time > slow_response_threshold:
|
||||||
|
metrics.record_slow_response(response_time)
|
||||||
|
raise SlowResponseError(
|
||||||
|
f"Slow response detected: {response_time:.2f}s "
|
||||||
|
f"(threshold: {slow_response_threshold}s)"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Check response body if available
|
||||||
|
if response_body is not None:
|
||||||
|
# Check for error messages
|
||||||
|
if "error" in response_body or "GENERIC_ERROR" in str(response_body):
|
||||||
|
metrics.record_invalid_response()
|
||||||
|
raise InvalidResponseError(
|
||||||
|
f"Error in response body: {response_body}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Check for unexpected empty responses
|
||||||
|
if expect_data:
|
||||||
|
properties = response_body.get("properties", [])
|
||||||
|
total_results = response_body.get("totalAvailableResults", 0)
|
||||||
|
|
||||||
|
# If we expect data but got none (and total shows there should be some)
|
||||||
|
if total_results > 0 and len(properties) == 0:
|
||||||
|
metrics.record_empty_response()
|
||||||
|
raise UnexpectedEmptyResponseError(
|
||||||
|
f"Expected data but got empty response. "
|
||||||
|
f"Total available: {total_results}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Record successful request
|
||||||
|
metrics.record_request(response_time)
|
||||||
99
crawler/services/listing_cache.py
Normal file
99
crawler/services/listing_cache.py
Normal file
|
|
@ -0,0 +1,99 @@
|
||||||
|
"""Redis-based caching for listing GeoJSON query results."""
|
||||||
|
import hashlib
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
from typing import Generator
|
||||||
|
|
||||||
|
import redis
|
||||||
|
|
||||||
|
from models.listing import QueryParameters
|
||||||
|
|
||||||
|
logger = logging.getLogger("uvicorn.error")
|
||||||
|
|
||||||
|
CACHE_PREFIX = "listings:geojson:"
|
||||||
|
CACHE_TTL_SECONDS = 30 * 60 # 30 minutes
|
||||||
|
CACHE_DB = 2
|
||||||
|
|
||||||
|
|
||||||
|
def _get_redis_client() -> redis.Redis:
|
||||||
|
"""Get Redis client using Celery broker URL but overriding to db=2."""
|
||||||
|
broker_url = os.getenv("CELERY_BROKER_URL", "redis://localhost:6379/0")
|
||||||
|
# Replace the db number in the URL
|
||||||
|
base_url = broker_url.rsplit("/", 1)[0]
|
||||||
|
return redis.from_url(f"{base_url}/{CACHE_DB}", decode_responses=True)
|
||||||
|
|
||||||
|
|
||||||
|
def make_cache_key(query_params: QueryParameters) -> str:
|
||||||
|
"""Generate a cache key from query parameters."""
|
||||||
|
params_json = query_params.model_dump_json()
|
||||||
|
hash_suffix = hashlib.sha256(params_json.encode()).hexdigest()[:16]
|
||||||
|
return f"{CACHE_PREFIX}{hash_suffix}"
|
||||||
|
|
||||||
|
|
||||||
|
def get_cached_count(query_params: QueryParameters) -> int | None:
|
||||||
|
"""Return the number of cached features for a query, or None if not cached."""
|
||||||
|
try:
|
||||||
|
client = _get_redis_client()
|
||||||
|
key = make_cache_key(query_params)
|
||||||
|
if not client.exists(key):
|
||||||
|
return None
|
||||||
|
return client.llen(key)
|
||||||
|
except redis.RedisError as e:
|
||||||
|
logger.warning(f"Redis cache read error: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def get_cached_features(
|
||||||
|
query_params: QueryParameters, batch_size: int = 50
|
||||||
|
) -> Generator[list[dict], None, None]:
|
||||||
|
"""Yield batches of cached GeoJSON features."""
|
||||||
|
try:
|
||||||
|
client = _get_redis_client()
|
||||||
|
key = make_cache_key(query_params)
|
||||||
|
total = client.llen(key)
|
||||||
|
|
||||||
|
for start in range(0, total, batch_size):
|
||||||
|
end = start + batch_size - 1
|
||||||
|
items = client.lrange(key, start, end)
|
||||||
|
batch = [json.loads(item) for item in items]
|
||||||
|
if batch:
|
||||||
|
yield batch
|
||||||
|
except redis.RedisError as e:
|
||||||
|
logger.warning(f"Redis cache read error during streaming: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
def cache_features_batch(query_params: QueryParameters, features: list[dict]) -> None:
|
||||||
|
"""Append a batch of features to the cache list."""
|
||||||
|
if not features:
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
client = _get_redis_client()
|
||||||
|
key = make_cache_key(query_params)
|
||||||
|
pipeline = client.pipeline()
|
||||||
|
for feature in features:
|
||||||
|
pipeline.rpush(key, json.dumps(feature))
|
||||||
|
# Set/refresh TTL
|
||||||
|
pipeline.expire(key, CACHE_TTL_SECONDS)
|
||||||
|
pipeline.execute()
|
||||||
|
except redis.RedisError as e:
|
||||||
|
logger.warning(f"Redis cache write error: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
def invalidate_cache() -> None:
|
||||||
|
"""Delete all listing GeoJSON cache entries."""
|
||||||
|
try:
|
||||||
|
client = _get_redis_client()
|
||||||
|
cursor = 0
|
||||||
|
deleted = 0
|
||||||
|
while True:
|
||||||
|
cursor, keys = client.scan(cursor, match=f"{CACHE_PREFIX}*", count=100)
|
||||||
|
if keys:
|
||||||
|
client.delete(*keys)
|
||||||
|
deleted += len(keys)
|
||||||
|
if cursor == 0:
|
||||||
|
break
|
||||||
|
if deleted:
|
||||||
|
logger.info(f"Invalidated {deleted} listing cache entries")
|
||||||
|
except redis.RedisError as e:
|
||||||
|
logger.warning(f"Redis cache invalidation error: {e}")
|
||||||
170
crawler/services/listing_fetcher.py
Normal file
170
crawler/services/listing_fetcher.py
Normal file
|
|
@ -0,0 +1,170 @@
|
||||||
|
"""Listing fetcher service - fetches listing data from Rightmove API."""
|
||||||
|
import asyncio
|
||||||
|
import logging
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from config.scraper_config import ScraperConfig
|
||||||
|
from listing_processor import ListingProcessor
|
||||||
|
from rec.query import create_session, listing_query
|
||||||
|
from rec.exceptions import CircuitBreakerOpenError, ThrottlingError
|
||||||
|
from rec.throttle_detector import get_throttle_metrics, reset_throttle_metrics
|
||||||
|
from models.listing import QueryParameters
|
||||||
|
from repositories import ListingRepository
|
||||||
|
from tqdm.asyncio import tqdm
|
||||||
|
from models import Listing as modelListing
|
||||||
|
from services.query_splitter import QuerySplitter, SubQuery
|
||||||
|
|
||||||
|
logger = logging.getLogger("uvicorn.error")
|
||||||
|
|
||||||
|
|
||||||
|
async def dump_listings_full(
|
||||||
|
parameters: QueryParameters,
|
||||||
|
repository: ListingRepository,
|
||||||
|
) -> list[modelListing]:
|
||||||
|
"""Fetches all listings, images as well as detects floorplans."""
|
||||||
|
new_listings = await dump_listings(parameters, repository)
|
||||||
|
logger.debug(f"Upserted {len(new_listings)} new listings")
|
||||||
|
# refresh listings
|
||||||
|
listings = await repository.get_listings(parameters) # this can be better
|
||||||
|
new_listings = [x for x in listings if x.id in new_listings]
|
||||||
|
return new_listings
|
||||||
|
|
||||||
|
|
||||||
|
async def dump_listings(
|
||||||
|
parameters: QueryParameters,
|
||||||
|
repository: ListingRepository,
|
||||||
|
) -> list[modelListing]:
|
||||||
|
"""Fetch listings from Rightmove API and process them.
|
||||||
|
|
||||||
|
Uses intelligent query splitting to maximize data extraction
|
||||||
|
while respecting Rightmove's result caps.
|
||||||
|
"""
|
||||||
|
config = ScraperConfig.from_env()
|
||||||
|
splitter = QuerySplitter(config)
|
||||||
|
|
||||||
|
# Reset throttle metrics at start
|
||||||
|
reset_throttle_metrics()
|
||||||
|
|
||||||
|
try:
|
||||||
|
async with create_session(config) as session:
|
||||||
|
# Phase 1 & 2: Split and probe queries
|
||||||
|
logger.info("Splitting query and probing result counts...")
|
||||||
|
subqueries = await splitter.split(parameters, session)
|
||||||
|
|
||||||
|
total_estimated = splitter.calculate_total_estimated_results(subqueries)
|
||||||
|
logger.info(
|
||||||
|
f"Split into {len(subqueries)} subqueries, "
|
||||||
|
f"estimated {total_estimated} total results"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Phase 3: Fetch all pages for each subquery
|
||||||
|
semaphore = asyncio.Semaphore(config.max_concurrent_requests)
|
||||||
|
|
||||||
|
async def fetch_subquery(sq: SubQuery) -> list[dict[str, Any]]:
|
||||||
|
"""Fetch all pages for a single subquery."""
|
||||||
|
results: list[dict[str, Any]] = []
|
||||||
|
|
||||||
|
estimated = sq.estimated_results or 0
|
||||||
|
if estimated == 0:
|
||||||
|
return results
|
||||||
|
|
||||||
|
page_size = parameters.page_size
|
||||||
|
max_pages = min(
|
||||||
|
config.max_pages_per_query,
|
||||||
|
(estimated // page_size) + 1,
|
||||||
|
)
|
||||||
|
|
||||||
|
for page_id in range(1, max_pages + 1):
|
||||||
|
async with semaphore:
|
||||||
|
await asyncio.sleep(config.request_delay_ms / 1000)
|
||||||
|
try:
|
||||||
|
result = await listing_query(
|
||||||
|
page=page_id,
|
||||||
|
channel=parameters.listing_type,
|
||||||
|
min_bedrooms=sq.min_bedrooms,
|
||||||
|
max_bedrooms=sq.max_bedrooms,
|
||||||
|
radius=parameters.radius,
|
||||||
|
min_price=sq.min_price,
|
||||||
|
max_price=sq.max_price,
|
||||||
|
district=sq.district,
|
||||||
|
page_size=page_size,
|
||||||
|
max_days_since_added=parameters.max_days_since_added,
|
||||||
|
furnish_types=parameters.furnish_types or [],
|
||||||
|
session=session,
|
||||||
|
config=config,
|
||||||
|
)
|
||||||
|
results.append(result)
|
||||||
|
|
||||||
|
properties = result.get("properties", [])
|
||||||
|
if len(properties) < page_size:
|
||||||
|
break
|
||||||
|
|
||||||
|
except CircuitBreakerOpenError as e:
|
||||||
|
logger.error(f"Circuit breaker open: {e}")
|
||||||
|
break
|
||||||
|
except ThrottlingError as e:
|
||||||
|
logger.warning(
|
||||||
|
f"Throttling error on page {page_id} for {sq.district}: {e}"
|
||||||
|
)
|
||||||
|
break
|
||||||
|
except Exception as e:
|
||||||
|
if "GENERIC_ERROR" in str(e):
|
||||||
|
logger.debug(
|
||||||
|
f"Max page for {sq.district}: {page_id - 1}"
|
||||||
|
)
|
||||||
|
break
|
||||||
|
logger.warning(
|
||||||
|
f"Error fetching page {page_id} for {sq.district}: {e}"
|
||||||
|
)
|
||||||
|
break
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
# Fetch all subqueries with progress bar
|
||||||
|
all_results = await tqdm.gather(
|
||||||
|
*[fetch_subquery(sq) for sq in subqueries],
|
||||||
|
desc="Fetching listings",
|
||||||
|
)
|
||||||
|
except CircuitBreakerOpenError as e:
|
||||||
|
logger.error(f"Circuit breaker prevented listing fetch: {e}")
|
||||||
|
logger.info(get_throttle_metrics().summary())
|
||||||
|
return []
|
||||||
|
finally:
|
||||||
|
# Log throttle metrics at end
|
||||||
|
metrics = get_throttle_metrics()
|
||||||
|
if metrics.total_requests > 0:
|
||||||
|
logger.info("\n" + metrics.summary())
|
||||||
|
|
||||||
|
# Extract listing identifiers from results
|
||||||
|
listing_ids: list[int] = []
|
||||||
|
for subquery_results in all_results:
|
||||||
|
for response_json in subquery_results:
|
||||||
|
if not response_json:
|
||||||
|
continue
|
||||||
|
if response_json.get("totalAvailableResults", 0) == 0:
|
||||||
|
continue
|
||||||
|
for property_data in response_json.get("properties", []):
|
||||||
|
identifier = property_data.get("identifier")
|
||||||
|
if identifier:
|
||||||
|
listing_ids.append(identifier)
|
||||||
|
|
||||||
|
logger.info(f"Found {len(listing_ids)} total listings")
|
||||||
|
|
||||||
|
# Deduplicate
|
||||||
|
unique_ids = list(set(listing_ids))
|
||||||
|
logger.info(f"After deduplication: {len(unique_ids)} unique listings")
|
||||||
|
|
||||||
|
# Filter out listings already in database
|
||||||
|
all_listing_ids = [x.id for x in await repository.get_listings()]
|
||||||
|
missing_ids = [
|
||||||
|
listing_id for listing_id in unique_ids if listing_id not in all_listing_ids
|
||||||
|
]
|
||||||
|
|
||||||
|
listing_processor = ListingProcessor(repository)
|
||||||
|
logger.info(f"Starting processing {len(missing_ids)} new listings")
|
||||||
|
processed_listings = await tqdm.gather(
|
||||||
|
*[listing_processor.process_listing(id) for id in missing_ids]
|
||||||
|
)
|
||||||
|
filtered_listings = [x for x in processed_listings if x is not None]
|
||||||
|
|
||||||
|
return filtered_listings
|
||||||
316
crawler/services/query_splitter.py
Normal file
316
crawler/services/query_splitter.py
Normal file
|
|
@ -0,0 +1,316 @@
|
||||||
|
"""Query splitting service for handling Rightmove's result cap.
|
||||||
|
|
||||||
|
This module provides intelligent query splitting to work around Rightmove's
|
||||||
|
~1,500 listing cap per search. It adaptively splits queries by price bands
|
||||||
|
based on actual result counts.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import logging
|
||||||
|
from dataclasses import dataclass, replace
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import aiohttp
|
||||||
|
|
||||||
|
from config.scraper_config import ScraperConfig
|
||||||
|
from models.listing import ListingType, QueryParameters
|
||||||
|
from rec.districts import get_districts
|
||||||
|
from rec.exceptions import CircuitBreakerOpenError, ThrottlingError
|
||||||
|
|
||||||
|
logger = logging.getLogger("uvicorn.error")
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class SubQuery:
|
||||||
|
"""Represents a single query subdivision.
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
district: District identifier string.
|
||||||
|
min_bedrooms: Minimum number of bedrooms.
|
||||||
|
max_bedrooms: Maximum number of bedrooms.
|
||||||
|
min_price: Minimum price in currency units.
|
||||||
|
max_price: Maximum price in currency units.
|
||||||
|
estimated_results: Cached result count from probing (None if not probed).
|
||||||
|
"""
|
||||||
|
|
||||||
|
district: str
|
||||||
|
min_bedrooms: int
|
||||||
|
max_bedrooms: int
|
||||||
|
min_price: int
|
||||||
|
max_price: int
|
||||||
|
estimated_results: int | None = None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def price_range(self) -> int:
|
||||||
|
"""Returns the width of the price band."""
|
||||||
|
return self.max_price - self.min_price
|
||||||
|
|
||||||
|
|
||||||
|
class QuerySplitter:
|
||||||
|
"""Splits large queries into smaller subqueries to avoid result caps.
|
||||||
|
|
||||||
|
Uses adaptive binary search on price ranges to find optimal subdivisions
|
||||||
|
that keep each subquery under the result threshold.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, config: ScraperConfig | None = None) -> None:
|
||||||
|
"""Initialize the splitter with configuration.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
config: Scraper configuration. Loads from environment if not provided.
|
||||||
|
"""
|
||||||
|
self.config = config or ScraperConfig.from_env()
|
||||||
|
|
||||||
|
def create_initial_subqueries(
|
||||||
|
self,
|
||||||
|
parameters: QueryParameters,
|
||||||
|
districts: dict[str, str],
|
||||||
|
) -> list[SubQuery]:
|
||||||
|
"""Create initial subqueries by splitting on district and bedrooms.
|
||||||
|
|
||||||
|
This creates the initial split before probing for result counts.
|
||||||
|
Each bedroom count gets its own subquery to enable finer-grained splitting.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
parameters: Original query parameters.
|
||||||
|
districts: Dictionary of district name to location ID.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of initial SubQuery objects.
|
||||||
|
"""
|
||||||
|
subqueries: list[SubQuery] = []
|
||||||
|
|
||||||
|
for district in districts.keys():
|
||||||
|
for num_bedrooms in range(
|
||||||
|
parameters.min_bedrooms, parameters.max_bedrooms + 1
|
||||||
|
):
|
||||||
|
subqueries.append(
|
||||||
|
SubQuery(
|
||||||
|
district=district,
|
||||||
|
min_bedrooms=num_bedrooms,
|
||||||
|
max_bedrooms=num_bedrooms,
|
||||||
|
min_price=parameters.min_price,
|
||||||
|
max_price=parameters.max_price,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
return subqueries
|
||||||
|
|
||||||
|
async def probe_result_count(
|
||||||
|
self,
|
||||||
|
subquery: SubQuery,
|
||||||
|
session: aiohttp.ClientSession,
|
||||||
|
parameters: QueryParameters,
|
||||||
|
) -> int:
|
||||||
|
"""Probe the API to get the total result count for a subquery.
|
||||||
|
|
||||||
|
Makes a minimal request (page_size=1) to get totalAvailableResults.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
subquery: The subquery to probe.
|
||||||
|
session: aiohttp session for making requests.
|
||||||
|
parameters: Original query parameters for additional settings.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Total available results for this subquery.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
CircuitBreakerOpenError: If the circuit breaker is open.
|
||||||
|
"""
|
||||||
|
from rec.query import probe_query
|
||||||
|
|
||||||
|
try:
|
||||||
|
result = await probe_query(
|
||||||
|
session=session,
|
||||||
|
channel=parameters.listing_type,
|
||||||
|
min_bedrooms=subquery.min_bedrooms,
|
||||||
|
max_bedrooms=subquery.max_bedrooms,
|
||||||
|
radius=parameters.radius,
|
||||||
|
min_price=subquery.min_price,
|
||||||
|
max_price=subquery.max_price,
|
||||||
|
district=subquery.district,
|
||||||
|
max_days_since_added=parameters.max_days_since_added,
|
||||||
|
furnish_types=parameters.furnish_types or [],
|
||||||
|
config=self.config,
|
||||||
|
)
|
||||||
|
return result.get("totalAvailableResults", 0)
|
||||||
|
except CircuitBreakerOpenError:
|
||||||
|
logger.error("Circuit breaker is open, stopping probe operations")
|
||||||
|
raise
|
||||||
|
except ThrottlingError as e:
|
||||||
|
logger.warning(
|
||||||
|
f"Throttling detected during probe for {subquery.district}: {e}"
|
||||||
|
)
|
||||||
|
return 0
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to probe subquery {subquery}: {e}")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
def split_by_price(self, subquery: SubQuery) -> list[SubQuery]:
|
||||||
|
"""Split a subquery into two by halving the price range.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
subquery: The subquery to split.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of two subqueries covering the same price range.
|
||||||
|
"""
|
||||||
|
mid_price = (subquery.min_price + subquery.max_price) // 2
|
||||||
|
|
||||||
|
return [
|
||||||
|
replace(
|
||||||
|
subquery,
|
||||||
|
max_price=mid_price,
|
||||||
|
estimated_results=None,
|
||||||
|
),
|
||||||
|
replace(
|
||||||
|
subquery,
|
||||||
|
min_price=mid_price,
|
||||||
|
estimated_results=None,
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
async def adaptive_split(
|
||||||
|
self,
|
||||||
|
subquery: SubQuery,
|
||||||
|
session: aiohttp.ClientSession,
|
||||||
|
parameters: QueryParameters,
|
||||||
|
semaphore: asyncio.Semaphore,
|
||||||
|
) -> list[SubQuery]:
|
||||||
|
"""Recursively split a subquery until all parts are under threshold.
|
||||||
|
|
||||||
|
Uses binary search on price range to find optimal splits.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
subquery: The subquery to split.
|
||||||
|
session: aiohttp session for making requests.
|
||||||
|
parameters: Original query parameters.
|
||||||
|
semaphore: Semaphore for rate limiting.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of subqueries that are all under the split threshold.
|
||||||
|
"""
|
||||||
|
# Check if we can split further
|
||||||
|
if subquery.price_range <= self.config.min_price_band:
|
||||||
|
logger.warning(
|
||||||
|
f"Cannot split further, price band at minimum: {subquery}"
|
||||||
|
)
|
||||||
|
return [subquery]
|
||||||
|
|
||||||
|
# Split into two halves
|
||||||
|
halves = self.split_by_price(subquery)
|
||||||
|
result: list[SubQuery] = []
|
||||||
|
|
||||||
|
for half in halves:
|
||||||
|
async with semaphore:
|
||||||
|
await asyncio.sleep(self.config.request_delay_ms / 1000)
|
||||||
|
count = await self.probe_result_count(half, session, parameters)
|
||||||
|
|
||||||
|
half = replace(half, estimated_results=count)
|
||||||
|
|
||||||
|
if count > self.config.split_threshold:
|
||||||
|
# Need to split further
|
||||||
|
result.extend(
|
||||||
|
await self.adaptive_split(
|
||||||
|
half, session, parameters, semaphore
|
||||||
|
)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
result.append(half)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
async def split(
|
||||||
|
self,
|
||||||
|
parameters: QueryParameters,
|
||||||
|
session: aiohttp.ClientSession,
|
||||||
|
on_progress: Any = None,
|
||||||
|
) -> list[SubQuery]:
|
||||||
|
"""Split query parameters into optimized subqueries.
|
||||||
|
|
||||||
|
Performs the full splitting algorithm:
|
||||||
|
1. Create initial splits by district and bedroom count
|
||||||
|
2. Probe each to get result counts
|
||||||
|
3. Adaptively split any that exceed the threshold
|
||||||
|
|
||||||
|
Args:
|
||||||
|
parameters: Original query parameters to split.
|
||||||
|
session: aiohttp session for making requests.
|
||||||
|
on_progress: Optional callback for progress updates.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of SubQuery objects, each under the result threshold.
|
||||||
|
"""
|
||||||
|
# Get valid districts
|
||||||
|
if parameters.district_names:
|
||||||
|
districts = {
|
||||||
|
district: locid
|
||||||
|
for district, locid in get_districts().items()
|
||||||
|
if district in parameters.district_names
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
districts = get_districts()
|
||||||
|
|
||||||
|
# Phase 1: Create initial subqueries
|
||||||
|
initial_subqueries = self.create_initial_subqueries(parameters, districts)
|
||||||
|
logger.info(f"Created {len(initial_subqueries)} initial subqueries")
|
||||||
|
|
||||||
|
if on_progress:
|
||||||
|
on_progress(
|
||||||
|
phase="splitting",
|
||||||
|
message=f"Created {len(initial_subqueries)} initial subqueries",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Phase 2: Probe and adaptively split
|
||||||
|
semaphore = asyncio.Semaphore(self.config.max_concurrent_requests)
|
||||||
|
refined_subqueries: list[SubQuery] = []
|
||||||
|
|
||||||
|
# Probe all initial subqueries in parallel
|
||||||
|
async def probe_and_split(sq: SubQuery) -> list[SubQuery]:
|
||||||
|
async with semaphore:
|
||||||
|
await asyncio.sleep(self.config.request_delay_ms / 1000)
|
||||||
|
count = await self.probe_result_count(sq, session, parameters)
|
||||||
|
|
||||||
|
sq = replace(sq, estimated_results=count)
|
||||||
|
|
||||||
|
if count > self.config.split_threshold:
|
||||||
|
logger.info(
|
||||||
|
f"Subquery {sq.district}/{sq.min_bedrooms}BR "
|
||||||
|
f"has {count} results, splitting..."
|
||||||
|
)
|
||||||
|
return await self.adaptive_split(
|
||||||
|
sq, session, parameters, semaphore
|
||||||
|
)
|
||||||
|
return [sq]
|
||||||
|
|
||||||
|
tasks = [probe_and_split(sq) for sq in initial_subqueries]
|
||||||
|
results = await asyncio.gather(*tasks)
|
||||||
|
|
||||||
|
for subquery_list in results:
|
||||||
|
refined_subqueries.extend(subquery_list)
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"Refined to {len(refined_subqueries)} subqueries after splitting"
|
||||||
|
)
|
||||||
|
|
||||||
|
if on_progress:
|
||||||
|
on_progress(
|
||||||
|
phase="splitting_complete",
|
||||||
|
message=f"Refined to {len(refined_subqueries)} subqueries",
|
||||||
|
)
|
||||||
|
|
||||||
|
return refined_subqueries
|
||||||
|
|
||||||
|
def calculate_total_estimated_results(
|
||||||
|
self, subqueries: list[SubQuery]
|
||||||
|
) -> int:
|
||||||
|
"""Calculate total estimated results across all subqueries.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
subqueries: List of subqueries with estimated_results set.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Sum of all estimated results.
|
||||||
|
"""
|
||||||
|
return sum(sq.estimated_results or 0 for sq in subqueries)
|
||||||
|
|
@ -1,22 +1,35 @@
|
||||||
import asyncio
|
import asyncio
|
||||||
import itertools
|
|
||||||
import logging
|
import logging
|
||||||
|
import time
|
||||||
from typing import Any
|
from typing import Any
|
||||||
from celery import Task
|
from celery import Task
|
||||||
from celery.schedules import crontab
|
from celery.schedules import crontab
|
||||||
from celery_app import app
|
from celery_app import app
|
||||||
from config.schedule_config import SchedulesConfig
|
from config.schedule_config import SchedulesConfig
|
||||||
|
from config.scraper_config import ScraperConfig
|
||||||
from listing_processor import ListingProcessor
|
from listing_processor import ListingProcessor
|
||||||
from models.listing import Listing, QueryParameters
|
from models.listing import Listing, QueryParameters
|
||||||
from rec.districts import get_districts
|
from rec.query import create_session, listing_query
|
||||||
from rec.query import listing_query
|
from rec.exceptions import CircuitBreakerOpenError, ThrottlingError
|
||||||
|
from rec.throttle_detector import get_throttle_metrics, reset_throttle_metrics
|
||||||
from repositories.listing_repository import ListingRepository
|
from repositories.listing_repository import ListingRepository
|
||||||
from database import engine
|
from database import engine
|
||||||
from services import image_fetcher, floorplan_detector
|
from services.query_splitter import QuerySplitter, SubQuery
|
||||||
from utils.redis_lock import redis_lock
|
from utils.redis_lock import redis_lock
|
||||||
|
from services.listing_cache import invalidate_cache
|
||||||
|
|
||||||
logger = logging.getLogger("uvicorn.error")
|
logger = logging.getLogger("uvicorn.error")
|
||||||
|
|
||||||
|
# Also configure a celery-specific logger that always outputs to stdout
|
||||||
|
celery_logger = logging.getLogger("celery.task")
|
||||||
|
if not celery_logger.handlers:
|
||||||
|
handler = logging.StreamHandler()
|
||||||
|
handler.setFormatter(logging.Formatter(
|
||||||
|
"%(asctime)s [%(levelname)s] %(name)s: %(message)s"
|
||||||
|
))
|
||||||
|
celery_logger.addHandler(handler)
|
||||||
|
celery_logger.setLevel(logging.INFO)
|
||||||
|
|
||||||
SCRAPE_LOCK_NAME = "scrape_listings"
|
SCRAPE_LOCK_NAME = "scrape_listings"
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -24,12 +37,18 @@ SCRAPE_LOCK_NAME = "scrape_listings"
|
||||||
def dump_listings_task(self: Task, parameters_json: str) -> dict[str, Any]:
|
def dump_listings_task(self: Task, parameters_json: str) -> dict[str, Any]:
|
||||||
with redis_lock(SCRAPE_LOCK_NAME) as acquired:
|
with redis_lock(SCRAPE_LOCK_NAME) as acquired:
|
||||||
if not acquired:
|
if not acquired:
|
||||||
logger.warning("Another scrape job is already running, skipping this execution")
|
msg = "Another scrape job is already running, skipping this execution"
|
||||||
|
logger.warning(msg)
|
||||||
|
celery_logger.warning(msg)
|
||||||
self.update_state(state="SKIPPED", meta={"reason": "Another scrape job is running"})
|
self.update_state(state="SKIPPED", meta={"reason": "Another scrape job is running"})
|
||||||
return {"status": "skipped", "reason": "another_job_running"}
|
return {"status": "skipped", "reason": "another_job_running"}
|
||||||
|
|
||||||
|
celery_logger.info(f"Acquired lock: {SCRAPE_LOCK_NAME}")
|
||||||
logger.info(f"Acquired lock: {SCRAPE_LOCK_NAME}")
|
logger.info(f"Acquired lock: {SCRAPE_LOCK_NAME}")
|
||||||
|
|
||||||
parsed_parameters = QueryParameters.model_validate_json(parameters_json)
|
parsed_parameters = QueryParameters.model_validate_json(parameters_json)
|
||||||
|
celery_logger.info(f"Starting scrape with parameters: {parsed_parameters}")
|
||||||
|
|
||||||
self.update_state(state="Starting...", meta={"progress": 0})
|
self.update_state(state="Starting...", meta={"progress": 0})
|
||||||
asyncio.run(dump_listings_full(task=self, parameters=parsed_parameters))
|
asyncio.run(dump_listings_full(task=self, parameters=parsed_parameters))
|
||||||
return {"progress": 0}
|
return {"progress": 0}
|
||||||
|
|
@ -51,46 +70,94 @@ async def dump_listings_full(
|
||||||
*, task: Task, parameters: QueryParameters
|
*, task: Task, parameters: QueryParameters
|
||||||
) -> list[Listing]:
|
) -> list[Listing]:
|
||||||
"""Fetches all listings, images as well as detects floorplans"""
|
"""Fetches all listings, images as well as detects floorplans"""
|
||||||
|
start_time = time.time()
|
||||||
|
celery_logger.info("=" * 60)
|
||||||
|
celery_logger.info("PHASE 1: Initializing listing fetch")
|
||||||
|
celery_logger.info("=" * 60)
|
||||||
|
|
||||||
repository = ListingRepository(engine)
|
repository = ListingRepository(engine)
|
||||||
|
|
||||||
task.update_state(state="Identifying missing listings", meta={"progress": 0})
|
task.update_state(state="Identifying missing listings", meta={"progress": 0})
|
||||||
|
celery_logger.info("Querying Rightmove API to identify new listings...")
|
||||||
ids_to_process = await get_ids_to_process(
|
ids_to_process = await get_ids_to_process(
|
||||||
parameters=parameters, repository=repository, task=task
|
parameters=parameters, repository=repository, task=task
|
||||||
)
|
)
|
||||||
|
|
||||||
|
celery_logger.info(f"Found {len(ids_to_process)} new listings to process")
|
||||||
logger.info(f"Found {len(ids_to_process)} listings to process")
|
logger.info(f"Found {len(ids_to_process)} listings to process")
|
||||||
|
|
||||||
if len(ids_to_process) == 0:
|
if len(ids_to_process) == 0:
|
||||||
|
elapsed = time.time() - start_time
|
||||||
|
celery_logger.info(f"No new listings found. Completed in {elapsed:.1f}s")
|
||||||
|
invalidate_cache()
|
||||||
task.update_state(
|
task.update_state(
|
||||||
state="No new listings found",
|
state="No new listings found",
|
||||||
meta={"progress": 1, "processed": 0, "total": 0, "message": "All listings are up to date"},
|
meta={"progress": 1, "processed": 0, "total": 0, "message": "All listings are up to date"},
|
||||||
)
|
)
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
celery_logger.info("=" * 60)
|
||||||
|
celery_logger.info("PHASE 2: Processing listings (fetch details, images, OCR)")
|
||||||
|
celery_logger.info("=" * 60)
|
||||||
|
|
||||||
listing_processor = ListingProcessor(repository)
|
listing_processor = ListingProcessor(repository)
|
||||||
|
celery_logger.info(f"Starting processing {len(ids_to_process)} listings")
|
||||||
logger.info(f"Starting processing {len(ids_to_process)} listings")
|
logger.info(f"Starting processing {len(ids_to_process)} listings")
|
||||||
return await dump_listings_and_monitor(
|
|
||||||
|
result = await dump_listings_and_monitor(
|
||||||
task=task, listing_processor=listing_processor, missing_ids=ids_to_process
|
task=task, listing_processor=listing_processor, missing_ids=ids_to_process
|
||||||
)
|
)
|
||||||
|
|
||||||
|
elapsed = time.time() - start_time
|
||||||
|
celery_logger.info("=" * 60)
|
||||||
|
celery_logger.info(f"COMPLETED: Processed {len(result)} listings in {elapsed:.1f}s")
|
||||||
|
celery_logger.info("=" * 60)
|
||||||
|
|
||||||
|
invalidate_cache()
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
async def dump_listings_and_monitor(
|
async def dump_listings_and_monitor(
|
||||||
*, task: Task, listing_processor: ListingProcessor, missing_ids: set[int]
|
*, task: Task, listing_processor: ListingProcessor, missing_ids: set[int]
|
||||||
) -> list[Listing]:
|
) -> list[Listing]:
|
||||||
task_progress = {missing_id: 0 for missing_id in missing_ids}
|
task_progress = {missing_id: 0 for missing_id in missing_ids}
|
||||||
|
processed_count = 0
|
||||||
|
failed_count = 0
|
||||||
|
start_time = time.time()
|
||||||
|
|
||||||
async def process(missing_id: int) -> Listing | None:
|
async def process(missing_id: int) -> Listing | None:
|
||||||
|
nonlocal processed_count, failed_count
|
||||||
listing = await listing_processor.process_listing(missing_id)
|
listing = await listing_processor.process_listing(missing_id)
|
||||||
task_progress[missing_id] = 1
|
task_progress[missing_id] = 1
|
||||||
|
if listing is not None:
|
||||||
|
processed_count += 1
|
||||||
|
else:
|
||||||
|
failed_count += 1
|
||||||
return listing
|
return listing
|
||||||
|
|
||||||
async def monitor() -> None:
|
async def monitor() -> None:
|
||||||
|
last_progress = 0
|
||||||
while (progress := sum(task_progress.values())) < len(missing_ids):
|
while (progress := sum(task_progress.values())) < len(missing_ids):
|
||||||
progress_ratio = round(progress / len(missing_ids), 2)
|
progress_ratio = round(progress / len(missing_ids), 2)
|
||||||
logger.error(
|
|
||||||
f"Task progress: {progress_ratio * 100}% ({progress} out of {len(missing_ids)})"
|
# Log every 10% progress or at least every update
|
||||||
|
if progress_ratio >= last_progress + 0.1 or progress == 1:
|
||||||
|
elapsed = time.time() - start_time
|
||||||
|
rate = progress / elapsed if elapsed > 0 else 0
|
||||||
|
eta = (len(missing_ids) - progress) / rate if rate > 0 else 0
|
||||||
|
|
||||||
|
celery_logger.info(
|
||||||
|
f"Progress: {progress_ratio * 100:.0f}% "
|
||||||
|
f"({progress}/{len(missing_ids)}) "
|
||||||
|
f"| Elapsed: {elapsed:.0f}s "
|
||||||
|
f"| Rate: {rate:.1f}/s "
|
||||||
|
f"| ETA: {eta:.0f}s"
|
||||||
)
|
)
|
||||||
|
last_progress = progress_ratio
|
||||||
|
|
||||||
task.update_state(
|
task.update_state(
|
||||||
state=f"Progress: {progress_ratio * 100}% ({progress} out of {len(missing_ids)})",
|
state=f"Processing: {progress_ratio * 100:.0f}% ({progress}/{len(missing_ids)})",
|
||||||
meta={"progress": progress_ratio, "processed": progress, "total": len(missing_ids)},
|
meta={"progress": progress_ratio, "processed": progress, "total": len(missing_ids)},
|
||||||
)
|
)
|
||||||
await asyncio.sleep(1)
|
await asyncio.sleep(1)
|
||||||
|
|
@ -98,7 +165,11 @@ async def dump_listings_and_monitor(
|
||||||
processed_listings = await asyncio.gather(
|
processed_listings = await asyncio.gather(
|
||||||
*[process(id) for id in missing_ids], *[monitor()]
|
*[process(id) for id in missing_ids], *[monitor()]
|
||||||
)
|
)
|
||||||
filtered_listings = [l for l in processed_listings if l is not None]
|
filtered_listings = [listing for listing in processed_listings if listing is not None]
|
||||||
|
|
||||||
|
celery_logger.info(
|
||||||
|
f"Processing complete: {processed_count} successful, {failed_count} failed"
|
||||||
|
)
|
||||||
|
|
||||||
return filtered_listings
|
return filtered_listings
|
||||||
|
|
||||||
|
|
@ -134,106 +205,194 @@ async def get_ids_to_process(
|
||||||
repository: ListingRepository,
|
repository: ListingRepository,
|
||||||
task: Task,
|
task: Task,
|
||||||
) -> set[int]:
|
) -> set[int]:
|
||||||
semaphore = asyncio.Semaphore(5) # if too high, rightmove drops connections
|
"""Fetch all listing IDs using intelligent query splitting.
|
||||||
districts = await get_valid_districts_to_scrape(parameters.district_names)
|
|
||||||
task.update_state(state="Fetching listings to scrape", meta={"progress": 0})
|
|
||||||
json_responses: list[list[dict[str, Any]]] = await asyncio.gather(
|
|
||||||
*[
|
|
||||||
_fetch_listings_with_semaphore(
|
|
||||||
task=task, semaphore=semaphore, parameters=parameters, district=district
|
|
||||||
)
|
|
||||||
for district in districts.keys()
|
|
||||||
],
|
|
||||||
)
|
|
||||||
json_responses_flat = list(itertools.chain.from_iterable(json_responses))
|
|
||||||
logger.debug(f"Total listings fetched {len(json_responses_flat)}")
|
|
||||||
|
|
||||||
identifiers: set[int] = set()
|
Uses the QuerySplitter to adaptively split large queries and maximize
|
||||||
for response_json in json_responses_flat:
|
data extraction while respecting Rightmove's result caps.
|
||||||
if response_json == {}:
|
|
||||||
continue
|
|
||||||
if response_json["totalAvailableResults"] == 0:
|
|
||||||
continue
|
|
||||||
for property in response_json["properties"]:
|
|
||||||
identifier = property["identifier"]
|
|
||||||
identifiers.add(identifier)
|
|
||||||
|
|
||||||
# if listing is already in db, do not fetch details again
|
Args:
|
||||||
all_listing_ids = {l.id for l in await repository.get_listings()}
|
parameters: Query parameters for the search.
|
||||||
new_ids = identifiers - all_listing_ids
|
repository: Repository for checking existing listings.
|
||||||
return new_ids
|
task: Celery task for progress updates.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Set of new listing IDs that need to be processed.
|
||||||
|
"""
|
||||||
|
config = ScraperConfig.from_env()
|
||||||
|
splitter = QuerySplitter(config)
|
||||||
|
|
||||||
async def get_valid_districts_to_scrape(
|
# Reset throttle metrics
|
||||||
district_names: set[str] | None,
|
reset_throttle_metrics()
|
||||||
) -> dict[str, str]:
|
|
||||||
if district_names:
|
|
||||||
districts = {
|
|
||||||
district: locid
|
|
||||||
for district, locid in get_districts().items()
|
|
||||||
if district in district_names
|
|
||||||
}
|
|
||||||
else:
|
|
||||||
districts = get_districts()
|
|
||||||
return districts
|
|
||||||
|
|
||||||
|
def on_progress(phase: str, message: str) -> None:
|
||||||
|
task.update_state(state=message, meta={"phase": phase})
|
||||||
|
celery_logger.info(f"[{phase}] {message}")
|
||||||
|
|
||||||
async def _fetch_listings_with_semaphore(
|
celery_logger.info("Starting query splitting and probing...")
|
||||||
*,
|
|
||||||
task: Task,
|
|
||||||
semaphore: asyncio.Semaphore,
|
|
||||||
parameters: QueryParameters,
|
|
||||||
district: str,
|
|
||||||
) -> list[dict[str, Any]]:
|
|
||||||
result = []
|
|
||||||
# split the price in N bands to avoid the 1.5k capping by rightmove
|
|
||||||
# basically instead of 1 query with price between 1k and 5k that is capped at 1500 results
|
|
||||||
# we do 10 queries each with an increment in price range so we send more queries but each
|
|
||||||
# has a smaller chance of returning more than 1.5k results
|
|
||||||
|
|
||||||
number_of_steps = 10
|
|
||||||
price_step = parameters.max_price // number_of_steps
|
|
||||||
|
|
||||||
for step in range(number_of_steps):
|
|
||||||
task.update_state(
|
|
||||||
state=f"Fetching listings ({step} out of {number_of_steps})",
|
|
||||||
meta={"progress": step / number_of_steps},
|
|
||||||
)
|
|
||||||
min_price = step * price_step
|
|
||||||
max_price = (step + 1) * price_step
|
|
||||||
logger.debug(
|
|
||||||
f"Step {step} of {number_of_steps} with {min_price=} and {max_price=}"
|
|
||||||
)
|
|
||||||
|
|
||||||
for num_bedrooms in range(parameters.min_bedrooms, parameters.max_bedrooms + 1):
|
|
||||||
for page_id in range(
|
|
||||||
1,
|
|
||||||
3, # seems like all searches stop at 1500 entries (page_id * page_size)
|
|
||||||
):
|
|
||||||
logger.debug(f"Processing {page_id=} for {district=}")
|
|
||||||
|
|
||||||
async with semaphore:
|
|
||||||
try:
|
try:
|
||||||
listing_query_result = await listing_query(
|
async with create_session(config) as session:
|
||||||
|
# Phase 1 & 2: Split and probe queries
|
||||||
|
task.update_state(
|
||||||
|
state="Analyzing query and splitting by price bands...",
|
||||||
|
meta={"phase": "splitting", "progress": 0},
|
||||||
|
)
|
||||||
|
subqueries = await splitter.split(parameters, session, on_progress)
|
||||||
|
|
||||||
|
total_estimated = splitter.calculate_total_estimated_results(subqueries)
|
||||||
|
celery_logger.info(
|
||||||
|
f"Query split complete: {len(subqueries)} subqueries, "
|
||||||
|
f"~{total_estimated} estimated total results"
|
||||||
|
)
|
||||||
|
logger.info(
|
||||||
|
f"Split into {len(subqueries)} subqueries, "
|
||||||
|
f"estimated {total_estimated} total results"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Phase 3: Fetch all pages for each subquery
|
||||||
|
task.update_state(
|
||||||
|
state=f"Fetching listings from {len(subqueries)} subqueries...",
|
||||||
|
meta={
|
||||||
|
"phase": "fetching",
|
||||||
|
"subqueries": len(subqueries),
|
||||||
|
"estimated_results": total_estimated,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
celery_logger.info(f"Fetching pages from {len(subqueries)} subqueries...")
|
||||||
|
|
||||||
|
semaphore = asyncio.Semaphore(config.max_concurrent_requests)
|
||||||
|
identifiers: set[int] = set()
|
||||||
|
completed_subqueries = 0
|
||||||
|
total_pages_fetched = 0
|
||||||
|
|
||||||
|
async def fetch_subquery(sq: SubQuery) -> list[dict[str, Any]]:
|
||||||
|
"""Fetch all pages for a single subquery."""
|
||||||
|
nonlocal completed_subqueries, total_pages_fetched
|
||||||
|
results: list[dict[str, Any]] = []
|
||||||
|
|
||||||
|
# Calculate how many pages we need based on estimated results
|
||||||
|
estimated = sq.estimated_results or 0
|
||||||
|
if estimated == 0:
|
||||||
|
completed_subqueries += 1
|
||||||
|
return results
|
||||||
|
|
||||||
|
# Fetch pages up to max_pages_per_query or until no more results
|
||||||
|
page_size = parameters.page_size
|
||||||
|
max_pages = min(
|
||||||
|
config.max_pages_per_query,
|
||||||
|
(estimated // page_size) + 1,
|
||||||
|
)
|
||||||
|
|
||||||
|
for page_id in range(1, max_pages + 1):
|
||||||
|
async with semaphore:
|
||||||
|
await asyncio.sleep(config.request_delay_ms / 1000)
|
||||||
|
try:
|
||||||
|
result = await listing_query(
|
||||||
page=page_id,
|
page=page_id,
|
||||||
channel=parameters.listing_type,
|
channel=parameters.listing_type,
|
||||||
# min_bedrooms=parameters.min_bedrooms,
|
min_bedrooms=sq.min_bedrooms,
|
||||||
# max_bedrooms=parameters.max_bedrooms,
|
max_bedrooms=sq.max_bedrooms,
|
||||||
min_bedrooms=num_bedrooms,
|
|
||||||
max_bedrooms=num_bedrooms,
|
|
||||||
radius=parameters.radius,
|
radius=parameters.radius,
|
||||||
min_price=min_price,
|
min_price=sq.min_price,
|
||||||
max_price=max_price,
|
max_price=sq.max_price,
|
||||||
district=district,
|
district=sq.district,
|
||||||
page_size=parameters.page_size,
|
page_size=page_size,
|
||||||
max_days_since_added=parameters.max_days_since_added,
|
max_days_since_added=parameters.max_days_since_added,
|
||||||
furnish_types=parameters.furnish_types or [],
|
furnish_types=parameters.furnish_types or [],
|
||||||
|
session=session,
|
||||||
|
config=config,
|
||||||
|
)
|
||||||
|
results.append(result)
|
||||||
|
total_pages_fetched += 1
|
||||||
|
|
||||||
|
# Check if we've received all results
|
||||||
|
properties = result.get("properties", [])
|
||||||
|
if len(properties) < page_size:
|
||||||
|
# No more results on next page
|
||||||
|
break
|
||||||
|
|
||||||
|
except CircuitBreakerOpenError as e:
|
||||||
|
celery_logger.error(f"Circuit breaker open: {e}")
|
||||||
|
break
|
||||||
|
except ThrottlingError as e:
|
||||||
|
celery_logger.warning(
|
||||||
|
f"Throttling on {sq.district} page {page_id}: {e}"
|
||||||
|
)
|
||||||
|
break
|
||||||
|
except Exception as e:
|
||||||
|
if "GENERIC_ERROR" in str(e):
|
||||||
|
# Reached end of results
|
||||||
|
logger.debug(
|
||||||
|
f"Max page for {sq.district}: {page_id - 1}"
|
||||||
|
)
|
||||||
|
break
|
||||||
|
logger.warning(
|
||||||
|
f"Error fetching page {page_id} for {sq.district}: {e}"
|
||||||
|
)
|
||||||
|
break
|
||||||
|
|
||||||
|
completed_subqueries += 1
|
||||||
|
return results
|
||||||
|
|
||||||
|
# Fetch all subqueries concurrently
|
||||||
|
all_results = await asyncio.gather(
|
||||||
|
*[fetch_subquery(sq) for sq in subqueries]
|
||||||
)
|
)
|
||||||
|
|
||||||
except Exception as e:
|
celery_logger.info(
|
||||||
if "GENERIC_ERROR" in str(e): # Too big page id
|
f"Fetch complete: {total_pages_fetched} pages from "
|
||||||
logger.debug(f"Max page id for {district=}: {page_id-1}")
|
f"{completed_subqueries} subqueries"
|
||||||
break
|
)
|
||||||
raise e
|
|
||||||
result.append(listing_query_result)
|
# Extract identifiers from all results
|
||||||
return result
|
for subquery_results in all_results:
|
||||||
|
for response_json in subquery_results:
|
||||||
|
if not response_json:
|
||||||
|
continue
|
||||||
|
if response_json.get("totalAvailableResults", 0) == 0:
|
||||||
|
continue
|
||||||
|
for property_data in response_json.get("properties", []):
|
||||||
|
identifier = property_data.get("identifier")
|
||||||
|
if identifier:
|
||||||
|
identifiers.add(identifier)
|
||||||
|
|
||||||
|
except CircuitBreakerOpenError as e:
|
||||||
|
celery_logger.error(f"Circuit breaker prevented query: {e}")
|
||||||
|
# Log throttle metrics
|
||||||
|
metrics = get_throttle_metrics()
|
||||||
|
if metrics.total_requests > 0:
|
||||||
|
celery_logger.info(metrics.summary())
|
||||||
|
return set()
|
||||||
|
finally:
|
||||||
|
# Log throttle metrics
|
||||||
|
metrics = get_throttle_metrics()
|
||||||
|
if metrics.total_requests > 0:
|
||||||
|
celery_logger.info(f"API Stats: {metrics.total_requests} requests, "
|
||||||
|
f"avg {metrics.average_response_time:.2f}s, "
|
||||||
|
f"{metrics.total_throttling_events} throttled")
|
||||||
|
|
||||||
|
celery_logger.info(f"Found {len(identifiers)} unique listing IDs from API")
|
||||||
|
logger.info(f"Found {len(identifiers)} unique listings")
|
||||||
|
|
||||||
|
# Filter out listings already in the database
|
||||||
|
celery_logger.info("Checking database for existing listings...")
|
||||||
|
all_listing_ids = {listing.id for listing in await repository.get_listings()}
|
||||||
|
new_ids = identifiers - all_listing_ids
|
||||||
|
|
||||||
|
celery_logger.info(
|
||||||
|
f"Filtering: {len(identifiers)} total, "
|
||||||
|
f"{len(all_listing_ids)} existing in DB, "
|
||||||
|
f"{len(new_ids)} new to process"
|
||||||
|
)
|
||||||
|
|
||||||
|
task.update_state(
|
||||||
|
state=f"Found {len(new_ids)} new listings to process",
|
||||||
|
meta={
|
||||||
|
"phase": "filtering",
|
||||||
|
"total_found": len(identifiers),
|
||||||
|
"new_listings": len(new_ids),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
return new_ids
|
||||||
|
|
|
||||||
311
crawler/tests/integration/test_throttle_integration.py
Normal file
311
crawler/tests/integration/test_throttle_integration.py
Normal file
|
|
@ -0,0 +1,311 @@
|
||||||
|
"""Integration tests for throttle detection and circuit breaker."""
|
||||||
|
import asyncio
|
||||||
|
import pytest
|
||||||
|
from unittest.mock import AsyncMock, MagicMock, patch
|
||||||
|
from aiohttp import ClientResponse
|
||||||
|
|
||||||
|
from config.scraper_config import ScraperConfig
|
||||||
|
from rec.exceptions import (
|
||||||
|
CircuitBreakerOpenError,
|
||||||
|
RateLimitError,
|
||||||
|
ServiceUnavailableError,
|
||||||
|
ThrottlingError,
|
||||||
|
)
|
||||||
|
from rec.query import (
|
||||||
|
detail_query,
|
||||||
|
listing_query,
|
||||||
|
probe_query,
|
||||||
|
get_circuit_breaker,
|
||||||
|
reset_circuit_breaker,
|
||||||
|
)
|
||||||
|
from rec.throttle_detector import reset_throttle_metrics, get_throttle_metrics
|
||||||
|
from rec.circuit_breaker import CircuitBreaker, CircuitState
|
||||||
|
from models.listing import ListingType
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def config() -> ScraperConfig:
|
||||||
|
"""Create a test configuration."""
|
||||||
|
return ScraperConfig(
|
||||||
|
max_concurrent_requests=5,
|
||||||
|
request_delay_ms=10,
|
||||||
|
slow_response_threshold=2.0,
|
||||||
|
enable_circuit_breaker=True,
|
||||||
|
circuit_breaker_failure_threshold=3,
|
||||||
|
circuit_breaker_recovery_timeout=0.5,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
def reset_globals() -> None:
|
||||||
|
"""Reset global state before each test."""
|
||||||
|
reset_throttle_metrics()
|
||||||
|
reset_circuit_breaker()
|
||||||
|
|
||||||
|
|
||||||
|
class MockResponse:
|
||||||
|
"""Mock aiohttp response."""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
status: int = 200,
|
||||||
|
json_data: dict | None = None,
|
||||||
|
text: str = "",
|
||||||
|
):
|
||||||
|
self.status = status
|
||||||
|
self._json_data = json_data or {}
|
||||||
|
self._text = text
|
||||||
|
|
||||||
|
async def json(self) -> dict:
|
||||||
|
return self._json_data
|
||||||
|
|
||||||
|
async def text(self) -> str:
|
||||||
|
return self._text
|
||||||
|
|
||||||
|
async def __aenter__(self) -> "MockResponse":
|
||||||
|
return self
|
||||||
|
|
||||||
|
async def __aexit__(self, *args: object) -> None:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class TestThrottlingRetryBehavior:
|
||||||
|
"""Test retry behavior for throttling errors."""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_rate_limit_triggers_retry(self, config: ScraperConfig) -> None:
|
||||||
|
"""Test that 429 responses trigger retry with backoff."""
|
||||||
|
call_count = 0
|
||||||
|
|
||||||
|
async def mock_get(*args: object, **kwargs: object) -> MockResponse:
|
||||||
|
nonlocal call_count
|
||||||
|
call_count += 1
|
||||||
|
if call_count < 3:
|
||||||
|
return MockResponse(status=429)
|
||||||
|
return MockResponse(
|
||||||
|
status=200,
|
||||||
|
json_data={"totalAvailableResults": 10, "properties": []},
|
||||||
|
)
|
||||||
|
|
||||||
|
mock_session = MagicMock()
|
||||||
|
mock_session.get = mock_get
|
||||||
|
|
||||||
|
# Mock district lookup
|
||||||
|
with patch("rec.query.districts.get_districts", return_value={"Test": "LOC1"}):
|
||||||
|
# The retry decorator will catch RateLimitError and retry
|
||||||
|
# We need to patch the tenacity wait to speed up the test
|
||||||
|
with patch("tenacity.wait_exponential.__call__", return_value=0):
|
||||||
|
result = await probe_query(
|
||||||
|
session=mock_session,
|
||||||
|
channel=ListingType.RENT,
|
||||||
|
min_bedrooms=1,
|
||||||
|
max_bedrooms=2,
|
||||||
|
radius=1.0,
|
||||||
|
min_price=1000,
|
||||||
|
max_price=2000,
|
||||||
|
district="Test",
|
||||||
|
config=config,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result["totalAvailableResults"] == 10
|
||||||
|
assert call_count == 3
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_service_unavailable_triggers_retry(
|
||||||
|
self, config: ScraperConfig
|
||||||
|
) -> None:
|
||||||
|
"""Test that 503 responses trigger retry."""
|
||||||
|
call_count = 0
|
||||||
|
|
||||||
|
async def mock_get(*args: object, **kwargs: object) -> MockResponse:
|
||||||
|
nonlocal call_count
|
||||||
|
call_count += 1
|
||||||
|
if call_count < 2:
|
||||||
|
return MockResponse(status=503)
|
||||||
|
return MockResponse(
|
||||||
|
status=200,
|
||||||
|
json_data={"totalAvailableResults": 5, "properties": []},
|
||||||
|
)
|
||||||
|
|
||||||
|
mock_session = MagicMock()
|
||||||
|
mock_session.get = mock_get
|
||||||
|
|
||||||
|
with patch("rec.query.districts.get_districts", return_value={"Test": "LOC1"}):
|
||||||
|
with patch("tenacity.wait_exponential.__call__", return_value=0):
|
||||||
|
result = await probe_query(
|
||||||
|
session=mock_session,
|
||||||
|
channel=ListingType.RENT,
|
||||||
|
min_bedrooms=1,
|
||||||
|
max_bedrooms=2,
|
||||||
|
radius=1.0,
|
||||||
|
min_price=1000,
|
||||||
|
max_price=2000,
|
||||||
|
district="Test",
|
||||||
|
config=config,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert call_count == 2
|
||||||
|
|
||||||
|
|
||||||
|
class TestCircuitBreakerIntegration:
|
||||||
|
"""Test circuit breaker integration with queries."""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_circuit_breaker_opens_after_failures(
|
||||||
|
self, config: ScraperConfig
|
||||||
|
) -> None:
|
||||||
|
"""Test that circuit breaker opens after consecutive failures."""
|
||||||
|
call_count = 0
|
||||||
|
|
||||||
|
async def mock_get(*args: object, **kwargs: object) -> MockResponse:
|
||||||
|
nonlocal call_count
|
||||||
|
call_count += 1
|
||||||
|
return MockResponse(status=429)
|
||||||
|
|
||||||
|
mock_session = MagicMock()
|
||||||
|
mock_session.get = mock_get
|
||||||
|
|
||||||
|
with patch("rec.query.districts.get_districts", return_value={"Test": "LOC1"}):
|
||||||
|
# After enough failures, circuit should open
|
||||||
|
with pytest.raises((RateLimitError, CircuitBreakerOpenError)):
|
||||||
|
with patch("tenacity.wait_exponential.__call__", return_value=0):
|
||||||
|
await probe_query(
|
||||||
|
session=mock_session,
|
||||||
|
channel=ListingType.RENT,
|
||||||
|
min_bedrooms=1,
|
||||||
|
max_bedrooms=2,
|
||||||
|
radius=1.0,
|
||||||
|
min_price=1000,
|
||||||
|
max_price=2000,
|
||||||
|
district="Test",
|
||||||
|
config=config,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Check circuit breaker state
|
||||||
|
cb = get_circuit_breaker(config)
|
||||||
|
assert cb is not None
|
||||||
|
# After many failures, the circuit should be open
|
||||||
|
assert cb.failure_count >= config.circuit_breaker_failure_threshold
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_circuit_breaker_blocks_requests_when_open(
|
||||||
|
self, config: ScraperConfig
|
||||||
|
) -> None:
|
||||||
|
"""Test that open circuit breaker blocks requests immediately."""
|
||||||
|
# Force open the circuit breaker
|
||||||
|
cb = get_circuit_breaker(config)
|
||||||
|
assert cb is not None
|
||||||
|
for _ in range(config.circuit_breaker_failure_threshold):
|
||||||
|
cb.record_failure()
|
||||||
|
|
||||||
|
assert cb.is_open
|
||||||
|
|
||||||
|
mock_session = MagicMock()
|
||||||
|
|
||||||
|
with patch("rec.query.districts.get_districts", return_value={"Test": "LOC1"}):
|
||||||
|
with pytest.raises(CircuitBreakerOpenError):
|
||||||
|
await probe_query(
|
||||||
|
session=mock_session,
|
||||||
|
channel=ListingType.RENT,
|
||||||
|
min_bedrooms=1,
|
||||||
|
max_bedrooms=2,
|
||||||
|
radius=1.0,
|
||||||
|
min_price=1000,
|
||||||
|
max_price=2000,
|
||||||
|
district="Test",
|
||||||
|
config=config,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestMetricsTracking:
|
||||||
|
"""Test throttle metrics are properly tracked."""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_metrics_tracked_on_rate_limit(self, config: ScraperConfig) -> None:
|
||||||
|
"""Test that rate limit errors are tracked in metrics."""
|
||||||
|
async def mock_get(*args: object, **kwargs: object) -> MockResponse:
|
||||||
|
return MockResponse(status=429)
|
||||||
|
|
||||||
|
mock_session = MagicMock()
|
||||||
|
mock_session.get = mock_get
|
||||||
|
|
||||||
|
with patch("rec.query.districts.get_districts", return_value={"Test": "LOC1"}):
|
||||||
|
with pytest.raises(RateLimitError):
|
||||||
|
with patch("tenacity.wait_exponential.__call__", return_value=0):
|
||||||
|
await probe_query(
|
||||||
|
session=mock_session,
|
||||||
|
channel=ListingType.RENT,
|
||||||
|
min_bedrooms=1,
|
||||||
|
max_bedrooms=2,
|
||||||
|
radius=1.0,
|
||||||
|
min_price=1000,
|
||||||
|
max_price=2000,
|
||||||
|
district="Test",
|
||||||
|
config=config,
|
||||||
|
)
|
||||||
|
|
||||||
|
metrics = get_throttle_metrics()
|
||||||
|
assert metrics.rate_limit_count > 0
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_metrics_tracked_on_success(self, config: ScraperConfig) -> None:
|
||||||
|
"""Test that successful requests are tracked in metrics."""
|
||||||
|
async def mock_get(*args: object, **kwargs: object) -> MockResponse:
|
||||||
|
return MockResponse(
|
||||||
|
status=200,
|
||||||
|
json_data={"totalAvailableResults": 10, "properties": []},
|
||||||
|
)
|
||||||
|
|
||||||
|
mock_session = MagicMock()
|
||||||
|
mock_session.get = mock_get
|
||||||
|
|
||||||
|
with patch("rec.query.districts.get_districts", return_value={"Test": "LOC1"}):
|
||||||
|
await probe_query(
|
||||||
|
session=mock_session,
|
||||||
|
channel=ListingType.RENT,
|
||||||
|
min_bedrooms=1,
|
||||||
|
max_bedrooms=2,
|
||||||
|
radius=1.0,
|
||||||
|
min_price=1000,
|
||||||
|
max_price=2000,
|
||||||
|
district="Test",
|
||||||
|
config=config,
|
||||||
|
)
|
||||||
|
|
||||||
|
metrics = get_throttle_metrics()
|
||||||
|
assert metrics.total_requests == 1
|
||||||
|
assert metrics.total_throttling_events == 0
|
||||||
|
|
||||||
|
|
||||||
|
class TestConfigIntegration:
|
||||||
|
"""Test configuration integration."""
|
||||||
|
|
||||||
|
def test_config_from_env_includes_throttle_settings(self) -> None:
|
||||||
|
"""Test that config loads throttle settings from environment."""
|
||||||
|
import os
|
||||||
|
|
||||||
|
original_env = os.environ.copy()
|
||||||
|
try:
|
||||||
|
os.environ["RIGHTMOVE_SLOW_RESPONSE_THRESHOLD"] = "5.0"
|
||||||
|
os.environ["RIGHTMOVE_ENABLE_CIRCUIT_BREAKER"] = "false"
|
||||||
|
os.environ["RIGHTMOVE_CIRCUIT_BREAKER_FAILURES"] = "10"
|
||||||
|
os.environ["RIGHTMOVE_CIRCUIT_BREAKER_TIMEOUT"] = "120.0"
|
||||||
|
|
||||||
|
config = ScraperConfig.from_env()
|
||||||
|
|
||||||
|
assert config.slow_response_threshold == 5.0
|
||||||
|
assert config.enable_circuit_breaker is False
|
||||||
|
assert config.circuit_breaker_failure_threshold == 10
|
||||||
|
assert config.circuit_breaker_recovery_timeout == 120.0
|
||||||
|
finally:
|
||||||
|
os.environ.clear()
|
||||||
|
os.environ.update(original_env)
|
||||||
|
|
||||||
|
def test_circuit_breaker_disabled_returns_none(self) -> None:
|
||||||
|
"""Test that disabled circuit breaker returns None."""
|
||||||
|
config = ScraperConfig(
|
||||||
|
enable_circuit_breaker=False,
|
||||||
|
)
|
||||||
|
reset_circuit_breaker()
|
||||||
|
cb = get_circuit_breaker(config)
|
||||||
|
assert cb is None
|
||||||
374
crawler/tests/unit/test_query_splitter.py
Normal file
374
crawler/tests/unit/test_query_splitter.py
Normal file
|
|
@ -0,0 +1,374 @@
|
||||||
|
"""Unit tests for QuerySplitter service."""
|
||||||
|
import pytest
|
||||||
|
from unittest.mock import AsyncMock, patch
|
||||||
|
|
||||||
|
from config.scraper_config import ScraperConfig
|
||||||
|
from models.listing import ListingType, QueryParameters
|
||||||
|
from services.query_splitter import QuerySplitter, SubQuery
|
||||||
|
|
||||||
|
|
||||||
|
class TestScraperConfig:
|
||||||
|
"""Tests for the ScraperConfig dataclass."""
|
||||||
|
|
||||||
|
def test_default_values(self) -> None:
|
||||||
|
"""Test that default values are set correctly."""
|
||||||
|
config = ScraperConfig()
|
||||||
|
assert config.max_concurrent_requests == 5
|
||||||
|
assert config.request_delay_ms == 100
|
||||||
|
assert config.result_cap == 1500
|
||||||
|
assert config.split_threshold == 1200
|
||||||
|
assert config.min_price_band == 100
|
||||||
|
assert config.max_pages_per_query == 60
|
||||||
|
assert config.proxy_url is None
|
||||||
|
|
||||||
|
def test_from_env(self) -> None:
|
||||||
|
"""Test loading configuration from environment variables."""
|
||||||
|
with patch.dict(
|
||||||
|
"os.environ",
|
||||||
|
{
|
||||||
|
"RIGHTMOVE_MAX_CONCURRENT": "10",
|
||||||
|
"RIGHTMOVE_REQUEST_DELAY_MS": "200",
|
||||||
|
"RIGHTMOVE_SPLIT_THRESHOLD": "1000",
|
||||||
|
"RIGHTMOVE_MIN_PRICE_BAND": "50",
|
||||||
|
"RIGHTMOVE_MAX_PAGES": "30",
|
||||||
|
"RIGHTMOVE_PROXY_URL": "socks5://localhost:9050",
|
||||||
|
},
|
||||||
|
):
|
||||||
|
config = ScraperConfig.from_env()
|
||||||
|
assert config.max_concurrent_requests == 10
|
||||||
|
assert config.request_delay_ms == 200
|
||||||
|
assert config.split_threshold == 1000
|
||||||
|
assert config.min_price_band == 50
|
||||||
|
assert config.max_pages_per_query == 30
|
||||||
|
assert config.proxy_url == "socks5://localhost:9050"
|
||||||
|
|
||||||
|
def test_from_env_empty_proxy(self) -> None:
|
||||||
|
"""Test that empty proxy URL is converted to None."""
|
||||||
|
with patch.dict(
|
||||||
|
"os.environ",
|
||||||
|
{
|
||||||
|
"RIGHTMOVE_PROXY_URL": "",
|
||||||
|
},
|
||||||
|
clear=False,
|
||||||
|
):
|
||||||
|
config = ScraperConfig.from_env()
|
||||||
|
assert config.proxy_url is None
|
||||||
|
|
||||||
|
|
||||||
|
class TestSubQuery:
|
||||||
|
"""Tests for the SubQuery dataclass."""
|
||||||
|
|
||||||
|
def test_price_range_calculation(self) -> None:
|
||||||
|
"""Test that price_range is calculated correctly."""
|
||||||
|
sq = SubQuery(
|
||||||
|
district="Kings Cross",
|
||||||
|
min_bedrooms=2,
|
||||||
|
max_bedrooms=2,
|
||||||
|
min_price=1000,
|
||||||
|
max_price=2000,
|
||||||
|
)
|
||||||
|
assert sq.price_range == 1000
|
||||||
|
|
||||||
|
|
||||||
|
class TestQuerySplitter:
|
||||||
|
"""Tests for the QuerySplitter class."""
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def config(self) -> ScraperConfig:
|
||||||
|
"""Create a test configuration."""
|
||||||
|
return ScraperConfig(
|
||||||
|
max_concurrent_requests=5,
|
||||||
|
request_delay_ms=10, # Faster for testing
|
||||||
|
result_cap=1500,
|
||||||
|
split_threshold=1200,
|
||||||
|
min_price_band=100,
|
||||||
|
max_pages_per_query=60,
|
||||||
|
proxy_url=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def splitter(self, config: ScraperConfig) -> QuerySplitter:
|
||||||
|
"""Create a QuerySplitter instance."""
|
||||||
|
return QuerySplitter(config)
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def parameters(self) -> QueryParameters:
|
||||||
|
"""Create test query parameters."""
|
||||||
|
return QueryParameters(
|
||||||
|
listing_type=ListingType.RENT,
|
||||||
|
min_bedrooms=2,
|
||||||
|
max_bedrooms=3,
|
||||||
|
min_price=1000,
|
||||||
|
max_price=5000,
|
||||||
|
district_names={"Kings Cross", "Angel"},
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_create_initial_subqueries(
|
||||||
|
self, splitter: QuerySplitter, parameters: QueryParameters
|
||||||
|
) -> None:
|
||||||
|
"""Test that initial subqueries are created correctly."""
|
||||||
|
districts = {"Kings Cross": "STATION^5168", "Angel": "STATION^1234"}
|
||||||
|
|
||||||
|
subqueries = splitter.create_initial_subqueries(parameters, districts)
|
||||||
|
|
||||||
|
# 2 districts × 2 bedroom counts (2,3) = 4 subqueries
|
||||||
|
assert len(subqueries) == 4
|
||||||
|
|
||||||
|
# Check first subquery
|
||||||
|
assert subqueries[0].district == "Kings Cross"
|
||||||
|
assert subqueries[0].min_bedrooms == 2
|
||||||
|
assert subqueries[0].max_bedrooms == 2
|
||||||
|
assert subqueries[0].min_price == 1000
|
||||||
|
assert subqueries[0].max_price == 5000
|
||||||
|
|
||||||
|
def test_split_by_price(self, splitter: QuerySplitter) -> None:
|
||||||
|
"""Test that price splitting works correctly."""
|
||||||
|
sq = SubQuery(
|
||||||
|
district="Kings Cross",
|
||||||
|
min_bedrooms=2,
|
||||||
|
max_bedrooms=2,
|
||||||
|
min_price=1000,
|
||||||
|
max_price=5000,
|
||||||
|
)
|
||||||
|
|
||||||
|
halves = splitter.split_by_price(sq)
|
||||||
|
|
||||||
|
assert len(halves) == 2
|
||||||
|
assert halves[0].min_price == 1000
|
||||||
|
assert halves[0].max_price == 3000 # midpoint
|
||||||
|
assert halves[1].min_price == 3000
|
||||||
|
assert halves[1].max_price == 5000
|
||||||
|
|
||||||
|
# Both should have same bedroom range and district
|
||||||
|
for half in halves:
|
||||||
|
assert half.district == "Kings Cross"
|
||||||
|
assert half.min_bedrooms == 2
|
||||||
|
assert half.max_bedrooms == 2
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_probe_result_count(
|
||||||
|
self, splitter: QuerySplitter, parameters: QueryParameters
|
||||||
|
) -> None:
|
||||||
|
"""Test probing API for result count."""
|
||||||
|
sq = SubQuery(
|
||||||
|
district="Kings Cross",
|
||||||
|
min_bedrooms=2,
|
||||||
|
max_bedrooms=2,
|
||||||
|
min_price=1000,
|
||||||
|
max_price=5000,
|
||||||
|
)
|
||||||
|
|
||||||
|
mock_session = AsyncMock()
|
||||||
|
|
||||||
|
# Mock the probe_query function
|
||||||
|
with patch("services.query_splitter.probe_query") as mock_probe:
|
||||||
|
mock_probe.return_value = {"totalAvailableResults": 800}
|
||||||
|
|
||||||
|
count = await splitter.probe_result_count(sq, mock_session, parameters)
|
||||||
|
|
||||||
|
assert count == 800
|
||||||
|
mock_probe.assert_called_once()
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_probe_result_count_handles_error(
|
||||||
|
self, splitter: QuerySplitter, parameters: QueryParameters
|
||||||
|
) -> None:
|
||||||
|
"""Test that probe_result_count handles errors gracefully."""
|
||||||
|
sq = SubQuery(
|
||||||
|
district="Kings Cross",
|
||||||
|
min_bedrooms=2,
|
||||||
|
max_bedrooms=2,
|
||||||
|
min_price=1000,
|
||||||
|
max_price=5000,
|
||||||
|
)
|
||||||
|
|
||||||
|
mock_session = AsyncMock()
|
||||||
|
|
||||||
|
with patch("services.query_splitter.probe_query") as mock_probe:
|
||||||
|
mock_probe.side_effect = Exception("API error")
|
||||||
|
|
||||||
|
count = await splitter.probe_result_count(sq, mock_session, parameters)
|
||||||
|
|
||||||
|
# Should return 0 on error
|
||||||
|
assert count == 0
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_adaptive_split_no_split_needed(
|
||||||
|
self, splitter: QuerySplitter, parameters: QueryParameters
|
||||||
|
) -> None:
|
||||||
|
"""Test adaptive split when results are below threshold."""
|
||||||
|
sq = SubQuery(
|
||||||
|
district="Kings Cross",
|
||||||
|
min_bedrooms=2,
|
||||||
|
max_bedrooms=2,
|
||||||
|
min_price=1000,
|
||||||
|
max_price=2000,
|
||||||
|
)
|
||||||
|
|
||||||
|
mock_session = AsyncMock()
|
||||||
|
mock_semaphore = AsyncMock()
|
||||||
|
|
||||||
|
with patch("services.query_splitter.probe_query") as mock_probe:
|
||||||
|
# First half has 600 results, second half has 500
|
||||||
|
mock_probe.side_effect = [
|
||||||
|
{"totalAvailableResults": 600},
|
||||||
|
{"totalAvailableResults": 500},
|
||||||
|
]
|
||||||
|
|
||||||
|
result = await splitter.adaptive_split(
|
||||||
|
sq, mock_session, parameters, mock_semaphore
|
||||||
|
)
|
||||||
|
|
||||||
|
# Both halves are under threshold (1200), so we get 2 subqueries back
|
||||||
|
assert len(result) == 2
|
||||||
|
assert result[0].estimated_results == 600
|
||||||
|
assert result[1].estimated_results == 500
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_adaptive_split_recursive_splitting(
|
||||||
|
self, splitter: QuerySplitter, parameters: QueryParameters
|
||||||
|
) -> None:
|
||||||
|
"""Test adaptive split performs recursive splitting when needed."""
|
||||||
|
sq = SubQuery(
|
||||||
|
district="Kings Cross",
|
||||||
|
min_bedrooms=2,
|
||||||
|
max_bedrooms=2,
|
||||||
|
min_price=1000,
|
||||||
|
max_price=5000,
|
||||||
|
)
|
||||||
|
|
||||||
|
mock_session = AsyncMock()
|
||||||
|
mock_semaphore = AsyncMock()
|
||||||
|
|
||||||
|
with patch("services.query_splitter.probe_query") as mock_probe:
|
||||||
|
# First split: 1000-3000 has 1300 (over threshold), 3000-5000 has 800
|
||||||
|
# Second split of 1000-3000: 1000-2000 has 700, 2000-3000 has 600
|
||||||
|
mock_probe.side_effect = [
|
||||||
|
{"totalAvailableResults": 1300}, # First half - needs more splitting
|
||||||
|
{"totalAvailableResults": 800}, # Second half - OK
|
||||||
|
{"totalAvailableResults": 700}, # First quarter - OK
|
||||||
|
{"totalAvailableResults": 600}, # Second quarter - OK
|
||||||
|
]
|
||||||
|
|
||||||
|
result = await splitter.adaptive_split(
|
||||||
|
sq, mock_session, parameters, mock_semaphore
|
||||||
|
)
|
||||||
|
|
||||||
|
# Should get 3 subqueries: [1000-2000 (700), 2000-3000 (600), 3000-5000 (800)]
|
||||||
|
assert len(result) == 3
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_adaptive_split_respects_min_price_band(
|
||||||
|
self, splitter: QuerySplitter, parameters: QueryParameters
|
||||||
|
) -> None:
|
||||||
|
"""Test that adaptive split stops at min_price_band."""
|
||||||
|
sq = SubQuery(
|
||||||
|
district="Kings Cross",
|
||||||
|
min_bedrooms=2,
|
||||||
|
max_bedrooms=2,
|
||||||
|
min_price=1000,
|
||||||
|
max_price=1050, # Only 50 range, below min_price_band of 100
|
||||||
|
estimated_results=1500, # Over threshold but can't split
|
||||||
|
)
|
||||||
|
|
||||||
|
mock_session = AsyncMock()
|
||||||
|
mock_semaphore = AsyncMock()
|
||||||
|
|
||||||
|
result = await splitter.adaptive_split(
|
||||||
|
sq, mock_session, parameters, mock_semaphore
|
||||||
|
)
|
||||||
|
|
||||||
|
# Can't split below min_price_band, should return original
|
||||||
|
assert len(result) == 1
|
||||||
|
assert result[0].min_price == 1000
|
||||||
|
assert result[0].max_price == 1050
|
||||||
|
|
||||||
|
def test_calculate_total_estimated_results(
|
||||||
|
self, splitter: QuerySplitter
|
||||||
|
) -> None:
|
||||||
|
"""Test calculation of total estimated results."""
|
||||||
|
subqueries = [
|
||||||
|
SubQuery(
|
||||||
|
district="Kings Cross",
|
||||||
|
min_bedrooms=2,
|
||||||
|
max_bedrooms=2,
|
||||||
|
min_price=1000,
|
||||||
|
max_price=2000,
|
||||||
|
estimated_results=500,
|
||||||
|
),
|
||||||
|
SubQuery(
|
||||||
|
district="Kings Cross",
|
||||||
|
min_bedrooms=3,
|
||||||
|
max_bedrooms=3,
|
||||||
|
min_price=1000,
|
||||||
|
max_price=2000,
|
||||||
|
estimated_results=300,
|
||||||
|
),
|
||||||
|
SubQuery(
|
||||||
|
district="Angel",
|
||||||
|
min_bedrooms=2,
|
||||||
|
max_bedrooms=2,
|
||||||
|
min_price=1000,
|
||||||
|
max_price=2000,
|
||||||
|
estimated_results=None, # Not probed
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
total = splitter.calculate_total_estimated_results(subqueries)
|
||||||
|
assert total == 800 # 500 + 300 + 0
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_split_integration(
|
||||||
|
self, splitter: QuerySplitter, parameters: QueryParameters
|
||||||
|
) -> None:
|
||||||
|
"""Integration test for the full split workflow."""
|
||||||
|
mock_session = AsyncMock()
|
||||||
|
mock_districts = {"Kings Cross": "STATION^5168", "Angel": "STATION^1234"}
|
||||||
|
|
||||||
|
with patch("services.query_splitter.get_districts", return_value=mock_districts):
|
||||||
|
with patch("services.query_splitter.probe_query") as mock_probe:
|
||||||
|
# Mock probe results for each initial subquery
|
||||||
|
# 2 districts × 2 bedroom counts = 4 initial subqueries
|
||||||
|
mock_probe.side_effect = [
|
||||||
|
{"totalAvailableResults": 500}, # KC 2BR - OK
|
||||||
|
{"totalAvailableResults": 1300}, # KC 3BR - needs split
|
||||||
|
{"totalAvailableResults": 600}, # Angel 2BR - OK
|
||||||
|
{"totalAvailableResults": 800}, # Angel 3BR - OK
|
||||||
|
# Split KC 3BR
|
||||||
|
{"totalAvailableResults": 700}, # KC 3BR first half
|
||||||
|
{"totalAvailableResults": 600}, # KC 3BR second half
|
||||||
|
]
|
||||||
|
|
||||||
|
result = await splitter.split(parameters, mock_session)
|
||||||
|
|
||||||
|
# Should have 5 subqueries total:
|
||||||
|
# KC 2BR (500), KC 3BR split into 2 (700+600), Angel 2BR (600), Angel 3BR (800)
|
||||||
|
assert len(result) == 5
|
||||||
|
|
||||||
|
# Verify total estimated results
|
||||||
|
total = splitter.calculate_total_estimated_results(result)
|
||||||
|
assert total == 3200 # 500 + 700 + 600 + 600 + 800
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_split_with_on_progress_callback(
|
||||||
|
self, splitter: QuerySplitter, parameters: QueryParameters
|
||||||
|
) -> None:
|
||||||
|
"""Test that on_progress callback is called during split."""
|
||||||
|
mock_session = AsyncMock()
|
||||||
|
mock_districts = {"Kings Cross": "STATION^5168", "Angel": "STATION^1234"}
|
||||||
|
progress_calls = []
|
||||||
|
|
||||||
|
def on_progress(phase: str, message: str) -> None:
|
||||||
|
progress_calls.append((phase, message))
|
||||||
|
|
||||||
|
with patch("services.query_splitter.get_districts", return_value=mock_districts):
|
||||||
|
with patch("services.query_splitter.probe_query") as mock_probe:
|
||||||
|
mock_probe.return_value = {"totalAvailableResults": 500}
|
||||||
|
|
||||||
|
await splitter.split(parameters, mock_session, on_progress)
|
||||||
|
|
||||||
|
# Should have received at least 2 progress updates
|
||||||
|
assert len(progress_calls) >= 2
|
||||||
|
phases = [call[0] for call in progress_calls]
|
||||||
|
assert "splitting" in phases
|
||||||
|
assert "splitting_complete" in phases
|
||||||
334
crawler/tests/unit/test_throttle_detection.py
Normal file
334
crawler/tests/unit/test_throttle_detection.py
Normal file
|
|
@ -0,0 +1,334 @@
|
||||||
|
"""Unit tests for throttle detection and circuit breaker."""
|
||||||
|
import pytest
|
||||||
|
from unittest.mock import MagicMock, AsyncMock
|
||||||
|
import time
|
||||||
|
|
||||||
|
from rec.exceptions import (
|
||||||
|
RightmoveAPIError,
|
||||||
|
ThrottlingError,
|
||||||
|
RateLimitError,
|
||||||
|
ServiceUnavailableError,
|
||||||
|
IPBlockedError,
|
||||||
|
SlowResponseError,
|
||||||
|
UnexpectedEmptyResponseError,
|
||||||
|
InvalidResponseError,
|
||||||
|
CircuitBreakerOpenError,
|
||||||
|
)
|
||||||
|
from rec.throttle_detector import (
|
||||||
|
ThrottleMetrics,
|
||||||
|
validate_response,
|
||||||
|
get_throttle_metrics,
|
||||||
|
reset_throttle_metrics,
|
||||||
|
)
|
||||||
|
from rec.circuit_breaker import CircuitBreaker, CircuitState
|
||||||
|
|
||||||
|
|
||||||
|
class TestExceptionHierarchy:
|
||||||
|
"""Test custom exception hierarchy."""
|
||||||
|
|
||||||
|
def test_rightmove_api_error_is_exception(self) -> None:
|
||||||
|
assert issubclass(RightmoveAPIError, Exception)
|
||||||
|
|
||||||
|
def test_throttling_error_is_rightmove_api_error(self) -> None:
|
||||||
|
assert issubclass(ThrottlingError, RightmoveAPIError)
|
||||||
|
|
||||||
|
def test_rate_limit_error_is_throttling_error(self) -> None:
|
||||||
|
assert issubclass(RateLimitError, ThrottlingError)
|
||||||
|
|
||||||
|
def test_service_unavailable_error_is_throttling_error(self) -> None:
|
||||||
|
assert issubclass(ServiceUnavailableError, ThrottlingError)
|
||||||
|
|
||||||
|
def test_ip_blocked_error_is_throttling_error(self) -> None:
|
||||||
|
assert issubclass(IPBlockedError, ThrottlingError)
|
||||||
|
|
||||||
|
def test_slow_response_error_is_throttling_error(self) -> None:
|
||||||
|
assert issubclass(SlowResponseError, ThrottlingError)
|
||||||
|
|
||||||
|
def test_unexpected_empty_response_error_is_rightmove_api_error(self) -> None:
|
||||||
|
assert issubclass(UnexpectedEmptyResponseError, RightmoveAPIError)
|
||||||
|
assert not issubclass(UnexpectedEmptyResponseError, ThrottlingError)
|
||||||
|
|
||||||
|
def test_invalid_response_error_is_rightmove_api_error(self) -> None:
|
||||||
|
assert issubclass(InvalidResponseError, RightmoveAPIError)
|
||||||
|
assert not issubclass(InvalidResponseError, ThrottlingError)
|
||||||
|
|
||||||
|
def test_circuit_breaker_open_error_is_rightmove_api_error(self) -> None:
|
||||||
|
assert issubclass(CircuitBreakerOpenError, RightmoveAPIError)
|
||||||
|
|
||||||
|
def test_exception_messages(self) -> None:
|
||||||
|
error = RateLimitError("Too many requests")
|
||||||
|
assert str(error) == "Too many requests"
|
||||||
|
|
||||||
|
|
||||||
|
class TestThrottleMetrics:
|
||||||
|
"""Test ThrottleMetrics class."""
|
||||||
|
|
||||||
|
def test_initial_state(self) -> None:
|
||||||
|
metrics = ThrottleMetrics()
|
||||||
|
assert metrics.rate_limit_count == 0
|
||||||
|
assert metrics.service_unavailable_count == 0
|
||||||
|
assert metrics.ip_blocked_count == 0
|
||||||
|
assert metrics.slow_response_count == 0
|
||||||
|
assert metrics.empty_response_count == 0
|
||||||
|
assert metrics.invalid_response_count == 0
|
||||||
|
assert metrics.total_requests == 0
|
||||||
|
assert metrics.total_response_time == 0.0
|
||||||
|
|
||||||
|
def test_record_rate_limit(self) -> None:
|
||||||
|
metrics = ThrottleMetrics()
|
||||||
|
metrics.record_rate_limit()
|
||||||
|
assert metrics.rate_limit_count == 1
|
||||||
|
metrics.record_rate_limit()
|
||||||
|
assert metrics.rate_limit_count == 2
|
||||||
|
|
||||||
|
def test_record_service_unavailable(self) -> None:
|
||||||
|
metrics = ThrottleMetrics()
|
||||||
|
metrics.record_service_unavailable()
|
||||||
|
assert metrics.service_unavailable_count == 1
|
||||||
|
|
||||||
|
def test_record_ip_blocked(self) -> None:
|
||||||
|
metrics = ThrottleMetrics()
|
||||||
|
metrics.record_ip_blocked()
|
||||||
|
assert metrics.ip_blocked_count == 1
|
||||||
|
|
||||||
|
def test_record_slow_response(self) -> None:
|
||||||
|
metrics = ThrottleMetrics()
|
||||||
|
metrics.record_slow_response(15.0)
|
||||||
|
assert metrics.slow_response_count == 1
|
||||||
|
assert metrics.total_response_time == 15.0
|
||||||
|
assert metrics.total_requests == 1
|
||||||
|
|
||||||
|
def test_record_empty_response(self) -> None:
|
||||||
|
metrics = ThrottleMetrics()
|
||||||
|
metrics.record_empty_response()
|
||||||
|
assert metrics.empty_response_count == 1
|
||||||
|
|
||||||
|
def test_record_invalid_response(self) -> None:
|
||||||
|
metrics = ThrottleMetrics()
|
||||||
|
metrics.record_invalid_response()
|
||||||
|
assert metrics.invalid_response_count == 1
|
||||||
|
|
||||||
|
def test_record_request(self) -> None:
|
||||||
|
metrics = ThrottleMetrics()
|
||||||
|
metrics.record_request(0.5)
|
||||||
|
assert metrics.total_requests == 1
|
||||||
|
assert metrics.total_response_time == 0.5
|
||||||
|
|
||||||
|
def test_average_response_time(self) -> None:
|
||||||
|
metrics = ThrottleMetrics()
|
||||||
|
metrics.record_request(1.0)
|
||||||
|
metrics.record_request(2.0)
|
||||||
|
metrics.record_request(3.0)
|
||||||
|
assert metrics.average_response_time == 2.0
|
||||||
|
|
||||||
|
def test_average_response_time_zero_requests(self) -> None:
|
||||||
|
metrics = ThrottleMetrics()
|
||||||
|
assert metrics.average_response_time == 0.0
|
||||||
|
|
||||||
|
def test_total_throttling_events(self) -> None:
|
||||||
|
metrics = ThrottleMetrics()
|
||||||
|
metrics.record_rate_limit()
|
||||||
|
metrics.record_service_unavailable()
|
||||||
|
metrics.record_ip_blocked()
|
||||||
|
metrics.record_slow_response(15.0)
|
||||||
|
assert metrics.total_throttling_events == 4
|
||||||
|
|
||||||
|
def test_throttle_rate(self) -> None:
|
||||||
|
metrics = ThrottleMetrics()
|
||||||
|
metrics.record_request(0.5) # 1 normal request
|
||||||
|
metrics.record_request(0.5) # 2 normal requests
|
||||||
|
metrics.record_rate_limit()
|
||||||
|
metrics.record_request(0.5) # 3 normal requests (rate limit doesn't count as request)
|
||||||
|
# 1 throttling event, 3 requests = 33.33%
|
||||||
|
assert metrics.throttle_rate == pytest.approx(33.33, rel=0.01)
|
||||||
|
|
||||||
|
def test_throttle_rate_zero_requests(self) -> None:
|
||||||
|
metrics = ThrottleMetrics()
|
||||||
|
assert metrics.throttle_rate == 0.0
|
||||||
|
|
||||||
|
def test_elapsed_time(self) -> None:
|
||||||
|
metrics = ThrottleMetrics()
|
||||||
|
time.sleep(0.1)
|
||||||
|
assert metrics.elapsed_time >= 0.1
|
||||||
|
|
||||||
|
def test_summary(self) -> None:
|
||||||
|
metrics = ThrottleMetrics()
|
||||||
|
metrics.record_request(1.0)
|
||||||
|
metrics.record_rate_limit()
|
||||||
|
summary = metrics.summary()
|
||||||
|
assert "Total Requests:" in summary
|
||||||
|
assert "Rate Limit (429):" in summary
|
||||||
|
assert "1" in summary
|
||||||
|
|
||||||
|
|
||||||
|
class TestGlobalMetrics:
|
||||||
|
"""Test global metrics accessor."""
|
||||||
|
|
||||||
|
def test_get_throttle_metrics_singleton(self) -> None:
|
||||||
|
reset_throttle_metrics()
|
||||||
|
m1 = get_throttle_metrics()
|
||||||
|
m2 = get_throttle_metrics()
|
||||||
|
assert m1 is m2
|
||||||
|
|
||||||
|
def test_reset_throttle_metrics(self) -> None:
|
||||||
|
reset_throttle_metrics()
|
||||||
|
metrics = get_throttle_metrics()
|
||||||
|
metrics.record_rate_limit()
|
||||||
|
assert metrics.rate_limit_count == 1
|
||||||
|
reset_throttle_metrics()
|
||||||
|
new_metrics = get_throttle_metrics()
|
||||||
|
assert new_metrics.rate_limit_count == 0
|
||||||
|
|
||||||
|
|
||||||
|
class TestValidateResponse:
|
||||||
|
"""Test validate_response function."""
|
||||||
|
|
||||||
|
def setup_method(self) -> None:
|
||||||
|
reset_throttle_metrics()
|
||||||
|
|
||||||
|
def create_mock_response(self, status: int) -> MagicMock:
|
||||||
|
response = MagicMock()
|
||||||
|
response.status = status
|
||||||
|
return response
|
||||||
|
|
||||||
|
def test_rate_limit_error(self) -> None:
|
||||||
|
response = self.create_mock_response(429)
|
||||||
|
with pytest.raises(RateLimitError):
|
||||||
|
validate_response(response, 0.5, None, 10.0)
|
||||||
|
assert get_throttle_metrics().rate_limit_count == 1
|
||||||
|
|
||||||
|
def test_service_unavailable_error(self) -> None:
|
||||||
|
response = self.create_mock_response(503)
|
||||||
|
with pytest.raises(ServiceUnavailableError):
|
||||||
|
validate_response(response, 0.5, None, 10.0)
|
||||||
|
assert get_throttle_metrics().service_unavailable_count == 1
|
||||||
|
|
||||||
|
def test_ip_blocked_error(self) -> None:
|
||||||
|
response = self.create_mock_response(403)
|
||||||
|
with pytest.raises(IPBlockedError):
|
||||||
|
validate_response(response, 0.5, None, 10.0)
|
||||||
|
assert get_throttle_metrics().ip_blocked_count == 1
|
||||||
|
|
||||||
|
def test_slow_response_error(self) -> None:
|
||||||
|
response = self.create_mock_response(200)
|
||||||
|
body = {"totalAvailableResults": 0, "properties": []}
|
||||||
|
with pytest.raises(SlowResponseError):
|
||||||
|
validate_response(response, 15.0, body, 10.0)
|
||||||
|
assert get_throttle_metrics().slow_response_count == 1
|
||||||
|
|
||||||
|
def test_slow_response_just_under_threshold(self) -> None:
|
||||||
|
response = self.create_mock_response(200)
|
||||||
|
body = {"totalAvailableResults": 0, "properties": []}
|
||||||
|
# Should not raise
|
||||||
|
validate_response(response, 9.9, body, 10.0)
|
||||||
|
assert get_throttle_metrics().slow_response_count == 0
|
||||||
|
|
||||||
|
def test_error_in_response_body(self) -> None:
|
||||||
|
response = self.create_mock_response(200)
|
||||||
|
body = {"error": "Something went wrong"}
|
||||||
|
with pytest.raises(InvalidResponseError):
|
||||||
|
validate_response(response, 0.5, body, 10.0)
|
||||||
|
assert get_throttle_metrics().invalid_response_count == 1
|
||||||
|
|
||||||
|
def test_generic_error_in_body(self) -> None:
|
||||||
|
response = self.create_mock_response(200)
|
||||||
|
body = {"message": "GENERIC_ERROR occurred"}
|
||||||
|
with pytest.raises(InvalidResponseError):
|
||||||
|
validate_response(response, 0.5, body, 10.0)
|
||||||
|
|
||||||
|
def test_unexpected_empty_response(self) -> None:
|
||||||
|
response = self.create_mock_response(200)
|
||||||
|
body = {"totalAvailableResults": 100, "properties": []}
|
||||||
|
with pytest.raises(UnexpectedEmptyResponseError):
|
||||||
|
validate_response(response, 0.5, body, 10.0, expect_data=True)
|
||||||
|
assert get_throttle_metrics().empty_response_count == 1
|
||||||
|
|
||||||
|
def test_empty_response_when_not_expecting_data(self) -> None:
|
||||||
|
response = self.create_mock_response(200)
|
||||||
|
body = {"totalAvailableResults": 100, "properties": []}
|
||||||
|
# Should not raise when expect_data=False
|
||||||
|
validate_response(response, 0.5, body, 10.0, expect_data=False)
|
||||||
|
assert get_throttle_metrics().empty_response_count == 0
|
||||||
|
|
||||||
|
def test_valid_response(self) -> None:
|
||||||
|
response = self.create_mock_response(200)
|
||||||
|
body = {
|
||||||
|
"totalAvailableResults": 10,
|
||||||
|
"properties": [{"id": 1}, {"id": 2}],
|
||||||
|
}
|
||||||
|
validate_response(response, 0.5, body, 10.0, expect_data=True)
|
||||||
|
assert get_throttle_metrics().total_requests == 1
|
||||||
|
assert get_throttle_metrics().total_throttling_events == 0
|
||||||
|
|
||||||
|
|
||||||
|
class TestCircuitBreaker:
|
||||||
|
"""Test CircuitBreaker class."""
|
||||||
|
|
||||||
|
def test_initial_state_is_closed(self) -> None:
|
||||||
|
cb = CircuitBreaker(failure_threshold=3, recovery_timeout=10.0)
|
||||||
|
assert cb.state == CircuitState.CLOSED
|
||||||
|
assert cb.is_closed
|
||||||
|
assert not cb.is_open
|
||||||
|
assert not cb.is_half_open
|
||||||
|
|
||||||
|
def test_allows_requests_when_closed(self) -> None:
|
||||||
|
cb = CircuitBreaker(failure_threshold=3, recovery_timeout=10.0)
|
||||||
|
# Should not raise
|
||||||
|
cb.call()
|
||||||
|
|
||||||
|
def test_opens_after_threshold_failures(self) -> None:
|
||||||
|
cb = CircuitBreaker(failure_threshold=3, recovery_timeout=10.0)
|
||||||
|
cb.record_failure()
|
||||||
|
cb.record_failure()
|
||||||
|
assert cb.is_closed
|
||||||
|
cb.record_failure()
|
||||||
|
assert cb.is_open
|
||||||
|
|
||||||
|
def test_blocks_requests_when_open(self) -> None:
|
||||||
|
cb = CircuitBreaker(failure_threshold=1, recovery_timeout=60.0)
|
||||||
|
cb.record_failure()
|
||||||
|
assert cb.is_open
|
||||||
|
with pytest.raises(CircuitBreakerOpenError):
|
||||||
|
cb.call()
|
||||||
|
|
||||||
|
def test_success_resets_failure_count(self) -> None:
|
||||||
|
cb = CircuitBreaker(failure_threshold=3, recovery_timeout=10.0)
|
||||||
|
cb.record_failure()
|
||||||
|
cb.record_failure()
|
||||||
|
assert cb.failure_count == 2
|
||||||
|
cb.record_success()
|
||||||
|
assert cb.failure_count == 0
|
||||||
|
|
||||||
|
def test_transitions_to_half_open_after_timeout(self) -> None:
|
||||||
|
cb = CircuitBreaker(failure_threshold=1, recovery_timeout=0.1)
|
||||||
|
cb.record_failure()
|
||||||
|
assert cb.is_open
|
||||||
|
time.sleep(0.15)
|
||||||
|
cb.call() # Should transition to half-open
|
||||||
|
assert cb.is_half_open
|
||||||
|
|
||||||
|
def test_half_open_success_closes_circuit(self) -> None:
|
||||||
|
cb = CircuitBreaker(failure_threshold=1, recovery_timeout=0.1)
|
||||||
|
cb.record_failure()
|
||||||
|
time.sleep(0.15)
|
||||||
|
cb.call() # Transition to half-open
|
||||||
|
assert cb.is_half_open
|
||||||
|
cb.record_success()
|
||||||
|
assert cb.is_closed
|
||||||
|
|
||||||
|
def test_half_open_failure_reopens_circuit(self) -> None:
|
||||||
|
cb = CircuitBreaker(failure_threshold=1, recovery_timeout=0.1)
|
||||||
|
cb.record_failure()
|
||||||
|
time.sleep(0.15)
|
||||||
|
cb.call() # Transition to half-open
|
||||||
|
assert cb.is_half_open
|
||||||
|
cb.record_failure()
|
||||||
|
assert cb.is_open
|
||||||
|
|
||||||
|
def test_reset(self) -> None:
|
||||||
|
cb = CircuitBreaker(failure_threshold=1, recovery_timeout=60.0)
|
||||||
|
cb.record_failure()
|
||||||
|
assert cb.is_open
|
||||||
|
cb.reset()
|
||||||
|
assert cb.is_closed
|
||||||
|
assert cb.failure_count == 0
|
||||||
Loading…
Add table
Add a link
Reference in a new issue