diff --git a/config/scraper_config.py b/config/scraper_config.py index 2b2232c..bd3f8f2 100644 --- a/config/scraper_config.py +++ b/config/scraper_config.py @@ -23,6 +23,7 @@ class ScraperConfig: max_pages_per_query: Maximum pages to fetch per subquery (60 * 25 = 1500). proxy_url: Optional SOCKS proxy URL (e.g., socks5://localhost:9050 for Tor). slow_response_threshold: Response time threshold in seconds for throttle detection. + request_timeout: Total timeout per HTTP request in seconds. enable_circuit_breaker: Whether to enable circuit breaker protection. circuit_breaker_failure_threshold: Number of consecutive failures to open circuit. circuit_breaker_recovery_timeout: Seconds to wait before testing recovery. @@ -36,6 +37,7 @@ class ScraperConfig: max_pages_per_query: int = 60 # 60 * 25 = 1500 results max proxy_url: str | None = None slow_response_threshold: float = 10.0 # seconds + request_timeout: float = 30.0 # seconds enable_circuit_breaker: bool = True circuit_breaker_failure_threshold: int = 5 circuit_breaker_recovery_timeout: float = 60.0 @@ -53,6 +55,7 @@ class ScraperConfig: RIGHTMOVE_MAX_PAGES: Max pages per query (default: 60) RIGHTMOVE_PROXY_URL: SOCKS proxy URL (default: None) RIGHTMOVE_SLOW_RESPONSE_THRESHOLD: Slow response threshold in seconds (default: 10.0) + RIGHTMOVE_REQUEST_TIMEOUT: Total timeout per HTTP request in seconds (default: 30.0) RIGHTMOVE_ENABLE_CIRCUIT_BREAKER: Enable circuit breaker (default: True) RIGHTMOVE_CIRCUIT_BREAKER_FAILURES: Failures to open circuit (default: 5) RIGHTMOVE_CIRCUIT_BREAKER_TIMEOUT: Recovery timeout in seconds (default: 60.0) @@ -81,6 +84,9 @@ class ScraperConfig: slow_response_threshold=float( os.environ.get("RIGHTMOVE_SLOW_RESPONSE_THRESHOLD", "10.0") ), + request_timeout=float( + os.environ.get("RIGHTMOVE_REQUEST_TIMEOUT", "30.0") + ), enable_circuit_breaker=os.environ.get( "RIGHTMOVE_ENABLE_CIRCUIT_BREAKER", "true" ).lower() in ("true", "1", "yes"), diff --git a/rec/query.py b/rec/query.py index 805fdab..8c092bb 100644 --- a/rec/query.py +++ b/rec/query.py @@ -88,6 +88,7 @@ async def create_session( trust_env=True, connector=connector, headers=DEFAULT_HEADERS, + timeout=aiohttp.ClientTimeout(total=config.request_timeout), ) try: yield session @@ -307,12 +308,15 @@ async def _execute_api_request( if session: return await do_request(session) else: - async with aiohttp.ClientSession(trust_env=True) as new_session: + async with aiohttp.ClientSession( + trust_env=True, + timeout=aiohttp.ClientTimeout(total=config.request_timeout), + ) as new_session: return await do_request(new_session) @retry( - retry=retry_if_exception_type(ThrottlingError), + retry=retry_if_exception_type((ThrottlingError, TimeoutError)), wait=wait_exponential(multiplier=2, min=2, max=120), stop=stop_after_attempt(5), ) @@ -356,7 +360,7 @@ async def detail_query( @retry( - retry=retry_if_exception_type(ThrottlingError), + retry=retry_if_exception_type((ThrottlingError, TimeoutError)), wait=wait_exponential(multiplier=2, min=2, max=120), stop=stop_after_attempt(5), ) @@ -438,7 +442,7 @@ async def listing_query( @retry( - retry=retry_if_exception_type(ThrottlingError), + retry=retry_if_exception_type((ThrottlingError, TimeoutError)), wait=wait_exponential(multiplier=2, min=2, max=60), stop=stop_after_attempt(5), )