Add intelligent query splitting to maximize Rightmove data extraction
This commit is contained in:
parent
29ba739063
commit
e8293c6042
11 changed files with 1970 additions and 113 deletions
|
|
@ -7,6 +7,15 @@ export DB_CONNECTION_STRING="sqlite:///data/wrongmove.db" # by default use SQLit
|
|||
export CELERY_BROKER_URL="redis://localhost:6379/0" # processing background tasks
|
||||
export CELERY_RESULT_BACKEND="redis://localhost:6379/1"
|
||||
|
||||
# Rightmove scraper configuration
|
||||
# These settings control query splitting to work around Rightmove's ~1500 result cap
|
||||
RIGHTMOVE_MAX_CONCURRENT=5 # Max concurrent HTTP requests
|
||||
RIGHTMOVE_REQUEST_DELAY_MS=100 # Delay between requests in milliseconds
|
||||
RIGHTMOVE_SPLIT_THRESHOLD=1200 # Split query when results exceed this threshold
|
||||
RIGHTMOVE_MIN_PRICE_BAND=100 # Minimum price band width (won't split below this)
|
||||
RIGHTMOVE_MAX_PAGES=60 # Max pages per subquery (60 * 25 = 1500 max results)
|
||||
RIGHTMOVE_PROXY_URL= # Optional SOCKS proxy URL (e.g., socks5://localhost:9050 for Tor)
|
||||
|
||||
# Periodic scraping schedules (JSON array)
|
||||
# Each schedule has: name, enabled, hour, minute, day_of_week, listing_type, min/max_bedrooms, min/max_price, district_names, furnish_types
|
||||
# Cron fields: minute (0-59), hour (0-23), day_of_week (0-6, 0=Sunday)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue