Add configurable request timeout and retry on TimeoutError
Requests to Rightmove API previously had no explicit timeout, causing hung connections to block workers indefinitely. Add a configurable request_timeout (default 30s) to ScraperConfig and apply it to all aiohttp sessions. Also retry on TimeoutError in addition to ThrottlingError for all API query functions.
This commit is contained in:
parent
7a1042741e
commit
578b97b0c5
2 changed files with 14 additions and 4 deletions
|
|
@ -23,6 +23,7 @@ class ScraperConfig:
|
|||
max_pages_per_query: Maximum pages to fetch per subquery (60 * 25 = 1500).
|
||||
proxy_url: Optional SOCKS proxy URL (e.g., socks5://localhost:9050 for Tor).
|
||||
slow_response_threshold: Response time threshold in seconds for throttle detection.
|
||||
request_timeout: Total timeout per HTTP request in seconds.
|
||||
enable_circuit_breaker: Whether to enable circuit breaker protection.
|
||||
circuit_breaker_failure_threshold: Number of consecutive failures to open circuit.
|
||||
circuit_breaker_recovery_timeout: Seconds to wait before testing recovery.
|
||||
|
|
@ -36,6 +37,7 @@ class ScraperConfig:
|
|||
max_pages_per_query: int = 60 # 60 * 25 = 1500 results max
|
||||
proxy_url: str | None = None
|
||||
slow_response_threshold: float = 10.0 # seconds
|
||||
request_timeout: float = 30.0 # seconds
|
||||
enable_circuit_breaker: bool = True
|
||||
circuit_breaker_failure_threshold: int = 5
|
||||
circuit_breaker_recovery_timeout: float = 60.0
|
||||
|
|
@ -53,6 +55,7 @@ class ScraperConfig:
|
|||
RIGHTMOVE_MAX_PAGES: Max pages per query (default: 60)
|
||||
RIGHTMOVE_PROXY_URL: SOCKS proxy URL (default: None)
|
||||
RIGHTMOVE_SLOW_RESPONSE_THRESHOLD: Slow response threshold in seconds (default: 10.0)
|
||||
RIGHTMOVE_REQUEST_TIMEOUT: Total timeout per HTTP request in seconds (default: 30.0)
|
||||
RIGHTMOVE_ENABLE_CIRCUIT_BREAKER: Enable circuit breaker (default: True)
|
||||
RIGHTMOVE_CIRCUIT_BREAKER_FAILURES: Failures to open circuit (default: 5)
|
||||
RIGHTMOVE_CIRCUIT_BREAKER_TIMEOUT: Recovery timeout in seconds (default: 60.0)
|
||||
|
|
@ -81,6 +84,9 @@ class ScraperConfig:
|
|||
slow_response_threshold=float(
|
||||
os.environ.get("RIGHTMOVE_SLOW_RESPONSE_THRESHOLD", "10.0")
|
||||
),
|
||||
request_timeout=float(
|
||||
os.environ.get("RIGHTMOVE_REQUEST_TIMEOUT", "30.0")
|
||||
),
|
||||
enable_circuit_breaker=os.environ.get(
|
||||
"RIGHTMOVE_ENABLE_CIRCUIT_BREAKER", "true"
|
||||
).lower() in ("true", "1", "yes"),
|
||||
|
|
|
|||
12
rec/query.py
12
rec/query.py
|
|
@ -88,6 +88,7 @@ async def create_session(
|
|||
trust_env=True,
|
||||
connector=connector,
|
||||
headers=DEFAULT_HEADERS,
|
||||
timeout=aiohttp.ClientTimeout(total=config.request_timeout),
|
||||
)
|
||||
try:
|
||||
yield session
|
||||
|
|
@ -307,12 +308,15 @@ async def _execute_api_request(
|
|||
if session:
|
||||
return await do_request(session)
|
||||
else:
|
||||
async with aiohttp.ClientSession(trust_env=True) as new_session:
|
||||
async with aiohttp.ClientSession(
|
||||
trust_env=True,
|
||||
timeout=aiohttp.ClientTimeout(total=config.request_timeout),
|
||||
) as new_session:
|
||||
return await do_request(new_session)
|
||||
|
||||
|
||||
@retry(
|
||||
retry=retry_if_exception_type(ThrottlingError),
|
||||
retry=retry_if_exception_type((ThrottlingError, TimeoutError)),
|
||||
wait=wait_exponential(multiplier=2, min=2, max=120),
|
||||
stop=stop_after_attempt(5),
|
||||
)
|
||||
|
|
@ -356,7 +360,7 @@ async def detail_query(
|
|||
|
||||
|
||||
@retry(
|
||||
retry=retry_if_exception_type(ThrottlingError),
|
||||
retry=retry_if_exception_type((ThrottlingError, TimeoutError)),
|
||||
wait=wait_exponential(multiplier=2, min=2, max=120),
|
||||
stop=stop_after_attempt(5),
|
||||
)
|
||||
|
|
@ -438,7 +442,7 @@ async def listing_query(
|
|||
|
||||
|
||||
@retry(
|
||||
retry=retry_if_exception_type(ThrottlingError),
|
||||
retry=retry_if_exception_type((ThrottlingError, TimeoutError)),
|
||||
wait=wait_exponential(multiplier=2, min=2, max=60),
|
||||
stop=stop_after_attempt(5),
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue