Flatten repo structure: move crawler/ to root, remove vqa/ and immoweb/
The crawler subdirectory was the only active project. Moving it to the repo root simplifies paths and removes the unnecessary nesting. The vqa/ and immoweb/ directories were legacy/unused and have been removed. Updated .drone.yml, .gitignore, .claude/ docs, and skills to reflect the new flat structure.
This commit is contained in:
parent
e2247be700
commit
eafbc1ac52
221 changed files with 70 additions and 146140 deletions
43
.env.sample
Normal file
43
.env.sample
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
# Copy me to .env and source me
|
||||
|
||||
export ROUTING_API_KEY="<CHANGE ME>" # fetch from https://console.cloud.google.com/google/maps-apis/; prices - https://developers.google.com/maps/billing-and-pricing/pricing
|
||||
|
||||
# export DB_CONNECTION_STRING="mysql://wrongmove:wrongmove@localhost:3306/wrongmove" # example for mysql
|
||||
export DB_CONNECTION_STRING="sqlite:///data/wrongmove.db" # by default use SQLite locally
|
||||
export CELERY_BROKER_URL="redis://localhost:6379/0" # processing background tasks
|
||||
export CELERY_RESULT_BACKEND="redis://localhost:6379/1"
|
||||
|
||||
# Rightmove scraper configuration
|
||||
# These settings control query splitting to work around Rightmove's ~1500 result cap
|
||||
RIGHTMOVE_MAX_CONCURRENT=5 # Max concurrent HTTP requests
|
||||
RIGHTMOVE_REQUEST_DELAY_MS=100 # Delay between requests in milliseconds
|
||||
RIGHTMOVE_SPLIT_THRESHOLD=1200 # Split query when results exceed this threshold
|
||||
RIGHTMOVE_MIN_PRICE_BAND=100 # Minimum price band width (won't split below this)
|
||||
RIGHTMOVE_MAX_PAGES=60 # Max pages per subquery (60 * 25 = 1500 max results)
|
||||
RIGHTMOVE_PROXY_URL= # Optional SOCKS proxy URL (e.g., socks5://localhost:9050 for Tor)
|
||||
|
||||
# Throttling detection and circuit breaker
|
||||
RIGHTMOVE_SLOW_RESPONSE_THRESHOLD=10.0 # Response time threshold in seconds
|
||||
RIGHTMOVE_ENABLE_CIRCUIT_BREAKER=true # Enable circuit breaker protection
|
||||
RIGHTMOVE_CIRCUIT_BREAKER_FAILURES=5 # Consecutive failures to open circuit
|
||||
RIGHTMOVE_CIRCUIT_BREAKER_TIMEOUT=60.0 # Seconds to wait before recovery attempt
|
||||
|
||||
# Periodic scraping schedules (JSON array)
|
||||
# Each schedule has: name, enabled, hour, minute, day_of_week, listing_type, min/max_bedrooms, min/max_price, district_names, furnish_types
|
||||
# Cron fields: minute (0-59), hour (0-23), day_of_week (0-6, 0=Sunday)
|
||||
# Example:
|
||||
# SCRAPE_SCHEDULES='[{"name":"Daily RENT","listing_type":"RENT","hour":"2","min_bedrooms":2,"max_bedrooms":3,"min_price":2000,"max_price":4000}]'
|
||||
# Multiple schedules:
|
||||
# SCRAPE_SCHEDULES='[{"name":"RENT 2am","listing_type":"RENT","hour":"2"},{"name":"BUY 4am","listing_type":"BUY","hour":"4"}]'
|
||||
SCRAPE_SCHEDULES=
|
||||
|
||||
# WebAuthn / Passkey configuration
|
||||
WEBAUTHN_RP_ID=localhost # Relying Party ID (domain)
|
||||
WEBAUTHN_RP_NAME=Wrongmove # Relying Party display name
|
||||
WEBAUTHN_ORIGIN=https://localhost # Expected WebAuthn origin
|
||||
|
||||
# JWT configuration (for passkey-issued tokens)
|
||||
JWT_SECRET=change-me-in-production # HMAC secret for HS256 signing
|
||||
JWT_ALGORITHM=HS256 # JWT signing algorithm
|
||||
JWT_EXPIRATION_HOURS=24 # Token expiry in hours
|
||||
JWT_ISSUER=wrongmove # JWT issuer claim
|
||||
Loading…
Add table
Add a link
Reference in a new issue