Add throttling detection and circuit breaker for Rightmove scraper
This commit is contained in:
parent
e8293c6042
commit
f880664a98
10 changed files with 1428 additions and 86 deletions
137
crawler/rec/circuit_breaker.py
Normal file
137
crawler/rec/circuit_breaker.py
Normal file
|
|
@ -0,0 +1,137 @@
|
|||
"""Circuit breaker pattern for protecting against cascading failures."""
|
||||
from __future__ import annotations
|
||||
|
||||
import enum
|
||||
import logging
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
|
||||
from rec.exceptions import CircuitBreakerOpenError
|
||||
|
||||
logger = logging.getLogger("uvicorn.error")
|
||||
|
||||
|
||||
class CircuitState(enum.Enum):
|
||||
"""Circuit breaker states."""
|
||||
|
||||
CLOSED = "closed" # Normal operation
|
||||
OPEN = "open" # Too many failures, blocking requests
|
||||
HALF_OPEN = "half_open" # Testing if service recovered
|
||||
|
||||
|
||||
@dataclass
|
||||
class CircuitBreaker:
|
||||
"""Circuit breaker for protecting against cascading failures.
|
||||
|
||||
Implements the circuit breaker pattern:
|
||||
- CLOSED: Requests pass through normally, failures are counted
|
||||
- OPEN: After N consecutive failures, circuit opens and blocks all requests
|
||||
- HALF_OPEN: After recovery timeout, allow one request to test if service recovered
|
||||
|
||||
Attributes:
|
||||
failure_threshold: Number of consecutive failures before opening.
|
||||
recovery_timeout: Seconds to wait before attempting half-open state.
|
||||
state: Current circuit state.
|
||||
failure_count: Count of consecutive failures.
|
||||
last_failure_time: Timestamp of last failure.
|
||||
last_state_change: Timestamp of last state change.
|
||||
"""
|
||||
|
||||
failure_threshold: int
|
||||
recovery_timeout: float
|
||||
state: CircuitState = CircuitState.CLOSED
|
||||
failure_count: int = 0
|
||||
last_failure_time: float = 0.0
|
||||
last_state_change: float = 0.0
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
"""Initialize state change timestamp."""
|
||||
self.last_state_change = time.time()
|
||||
|
||||
def call(self) -> None:
|
||||
"""Check if a request should be allowed.
|
||||
|
||||
Raises:
|
||||
CircuitBreakerOpenError: If circuit is open and blocking requests.
|
||||
"""
|
||||
current_time = time.time()
|
||||
|
||||
if self.state == CircuitState.OPEN:
|
||||
# Check if we should transition to half-open
|
||||
if current_time - self.last_failure_time >= self.recovery_timeout:
|
||||
self._transition_to_half_open()
|
||||
else:
|
||||
raise CircuitBreakerOpenError(
|
||||
f"Circuit breaker is open. "
|
||||
f"Waiting {self.recovery_timeout - (current_time - self.last_failure_time):.1f}s "
|
||||
f"before retry."
|
||||
)
|
||||
|
||||
# Allow request to proceed (CLOSED or HALF_OPEN)
|
||||
|
||||
def record_success(self) -> None:
|
||||
"""Record a successful request."""
|
||||
if self.state == CircuitState.HALF_OPEN:
|
||||
# Service has recovered, close the circuit
|
||||
self._transition_to_closed()
|
||||
|
||||
# Reset failure count on success
|
||||
self.failure_count = 0
|
||||
|
||||
def record_failure(self) -> None:
|
||||
"""Record a failed request."""
|
||||
self.failure_count += 1
|
||||
self.last_failure_time = time.time()
|
||||
|
||||
if self.state == CircuitState.HALF_OPEN:
|
||||
# Test request failed, reopen circuit
|
||||
self._transition_to_open()
|
||||
elif self.state == CircuitState.CLOSED:
|
||||
# Check if we should open the circuit
|
||||
if self.failure_count >= self.failure_threshold:
|
||||
self._transition_to_open()
|
||||
|
||||
def _transition_to_open(self) -> None:
|
||||
"""Transition to OPEN state."""
|
||||
self.state = CircuitState.OPEN
|
||||
self.last_state_change = time.time()
|
||||
logger.warning(
|
||||
f"Circuit breaker OPENED after {self.failure_count} consecutive failures. "
|
||||
f"Will retry in {self.recovery_timeout}s"
|
||||
)
|
||||
|
||||
def _transition_to_half_open(self) -> None:
|
||||
"""Transition to HALF_OPEN state."""
|
||||
self.state = CircuitState.HALF_OPEN
|
||||
self.last_state_change = time.time()
|
||||
logger.info("Circuit breaker entering HALF_OPEN state, testing service recovery")
|
||||
|
||||
def _transition_to_closed(self) -> None:
|
||||
"""Transition to CLOSED state."""
|
||||
self.state = CircuitState.CLOSED
|
||||
self.last_state_change = time.time()
|
||||
self.failure_count = 0
|
||||
logger.info("Circuit breaker CLOSED, service recovered")
|
||||
|
||||
def reset(self) -> None:
|
||||
"""Manually reset the circuit breaker to CLOSED state."""
|
||||
self.state = CircuitState.CLOSED
|
||||
self.failure_count = 0
|
||||
self.last_failure_time = 0.0
|
||||
self.last_state_change = time.time()
|
||||
logger.info("Circuit breaker manually reset to CLOSED state")
|
||||
|
||||
@property
|
||||
def is_open(self) -> bool:
|
||||
"""Check if circuit is currently open."""
|
||||
return self.state == CircuitState.OPEN
|
||||
|
||||
@property
|
||||
def is_closed(self) -> bool:
|
||||
"""Check if circuit is currently closed."""
|
||||
return self.state == CircuitState.CLOSED
|
||||
|
||||
@property
|
||||
def is_half_open(self) -> bool:
|
||||
"""Check if circuit is currently half-open."""
|
||||
return self.state == CircuitState.HALF_OPEN
|
||||
74
crawler/rec/exceptions.py
Normal file
74
crawler/rec/exceptions.py
Normal file
|
|
@ -0,0 +1,74 @@
|
|||
"""Custom exceptions for Rightmove API errors."""
|
||||
|
||||
|
||||
class RightmoveAPIError(Exception):
|
||||
"""Base exception for all Rightmove API errors."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class ThrottlingError(RightmoveAPIError):
|
||||
"""Base exception for throttling-related errors.
|
||||
|
||||
Indicates that Rightmove is limiting our requests and we should back off.
|
||||
"""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class RateLimitError(ThrottlingError):
|
||||
"""HTTP 429 - Too Many Requests.
|
||||
|
||||
Rightmove is explicitly rate limiting our requests.
|
||||
"""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class ServiceUnavailableError(ThrottlingError):
|
||||
"""HTTP 503 - Service Unavailable.
|
||||
|
||||
Rightmove's service is temporarily unavailable, possibly due to overload.
|
||||
"""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class IPBlockedError(ThrottlingError):
|
||||
"""HTTP 403 - Forbidden (IP blocked).
|
||||
|
||||
Our IP may be blocked or blacklisted by Rightmove.
|
||||
"""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class SlowResponseError(ThrottlingError):
|
||||
"""Response time exceeded threshold.
|
||||
|
||||
API is responding very slowly, indicating potential throttling or overload.
|
||||
"""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class UnexpectedEmptyResponseError(RightmoveAPIError):
|
||||
"""Empty response received when data was expected."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class InvalidResponseError(RightmoveAPIError):
|
||||
"""Response contains error messages or invalid data."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class CircuitBreakerOpenError(RightmoveAPIError):
|
||||
"""Circuit breaker is open, requests are being blocked.
|
||||
|
||||
The circuit breaker has detected too many failures and is preventing
|
||||
further requests to allow the service to recover.
|
||||
"""
|
||||
|
||||
pass
|
||||
|
|
@ -1,4 +1,6 @@
|
|||
import enum
|
||||
import logging
|
||||
import time
|
||||
from typing import Any
|
||||
from contextlib import asynccontextmanager
|
||||
from collections.abc import AsyncIterator
|
||||
|
|
@ -6,9 +8,26 @@ from collections.abc import AsyncIterator
|
|||
import aiohttp
|
||||
from models.listing import FurnishType, ListingType
|
||||
from rec import districts
|
||||
from tenacity import retry, stop_after_attempt, wait_random
|
||||
from rec.exceptions import (
|
||||
CircuitBreakerOpenError,
|
||||
ThrottlingError,
|
||||
)
|
||||
from rec.throttle_detector import get_throttle_metrics, validate_response
|
||||
from rec.circuit_breaker import CircuitBreaker
|
||||
from tenacity import (
|
||||
retry,
|
||||
retry_if_exception_type,
|
||||
stop_after_attempt,
|
||||
wait_exponential,
|
||||
wait_random,
|
||||
)
|
||||
from config.scraper_config import ScraperConfig
|
||||
|
||||
logger = logging.getLogger("uvicorn.error")
|
||||
|
||||
# Global circuit breaker instance
|
||||
_circuit_breaker: CircuitBreaker | None = None
|
||||
|
||||
|
||||
DEFAULT_HEADERS = {
|
||||
"Host": "api.rightmove.co.uk",
|
||||
|
|
@ -65,20 +84,81 @@ async def create_session(
|
|||
await session.close()
|
||||
|
||||
|
||||
@retry(wait=wait_random(min=1, max=2), stop=stop_after_attempt(3))
|
||||
def get_circuit_breaker(config: ScraperConfig | None = None) -> CircuitBreaker | None:
|
||||
"""Get the global circuit breaker instance.
|
||||
|
||||
Args:
|
||||
config: Configuration for initializing the circuit breaker.
|
||||
|
||||
Returns:
|
||||
CircuitBreaker instance if enabled, None otherwise.
|
||||
"""
|
||||
global _circuit_breaker
|
||||
if config is None:
|
||||
config = ScraperConfig.from_env()
|
||||
|
||||
if not config.enable_circuit_breaker:
|
||||
return None
|
||||
|
||||
if _circuit_breaker is None:
|
||||
_circuit_breaker = CircuitBreaker(
|
||||
failure_threshold=config.circuit_breaker_failure_threshold,
|
||||
recovery_timeout=config.circuit_breaker_recovery_timeout,
|
||||
)
|
||||
return _circuit_breaker
|
||||
|
||||
|
||||
def reset_circuit_breaker() -> None:
|
||||
"""Reset the global circuit breaker."""
|
||||
global _circuit_breaker
|
||||
if _circuit_breaker is not None:
|
||||
_circuit_breaker.reset()
|
||||
|
||||
|
||||
def check_circuit_breaker(config: ScraperConfig | None = None) -> None:
|
||||
"""Check if the circuit breaker allows requests.
|
||||
|
||||
Args:
|
||||
config: Configuration for the circuit breaker.
|
||||
|
||||
Raises:
|
||||
CircuitBreakerOpenError: If the circuit is open.
|
||||
"""
|
||||
cb = get_circuit_breaker(config)
|
||||
if cb is not None:
|
||||
cb.call()
|
||||
|
||||
|
||||
@retry(
|
||||
retry=retry_if_exception_type(ThrottlingError),
|
||||
wait=wait_exponential(multiplier=2, min=2, max=120),
|
||||
stop=stop_after_attempt(5),
|
||||
)
|
||||
async def detail_query(
|
||||
detail_id: int,
|
||||
session: aiohttp.ClientSession | None = None,
|
||||
config: ScraperConfig | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Fetch detailed property information.
|
||||
|
||||
Args:
|
||||
detail_id: The property identifier.
|
||||
session: Optional aiohttp session. Creates new one if not provided.
|
||||
config: Scraper configuration. Loads from environment if not provided.
|
||||
|
||||
Returns:
|
||||
Property details as a dictionary.
|
||||
|
||||
Raises:
|
||||
CircuitBreakerOpenError: If the circuit breaker is open.
|
||||
ThrottlingError: If the request is throttled.
|
||||
"""
|
||||
if config is None:
|
||||
config = ScraperConfig.from_env()
|
||||
|
||||
check_circuit_breaker(config)
|
||||
cb = get_circuit_breaker(config)
|
||||
|
||||
params = {
|
||||
"apiApplication": "ANDROID",
|
||||
"appVersion": "3.70.0",
|
||||
|
|
@ -86,13 +166,38 @@ async def detail_query(
|
|||
url = f"https://api.rightmove.co.uk/api/property/{detail_id}"
|
||||
|
||||
async def do_request(s: aiohttp.ClientSession) -> dict[str, Any]:
|
||||
async with s.get(url, params=params, headers=DEFAULT_HEADERS) as response:
|
||||
if response.status != 200:
|
||||
raise Exception(
|
||||
f"""id: {detail_id}. Status Code: {response.status}."""
|
||||
f"""Failed due to: {await response.text()}"""
|
||||
start_time = time.time()
|
||||
try:
|
||||
async with s.get(url, params=params, headers=DEFAULT_HEADERS) as response:
|
||||
response_time = time.time() - start_time
|
||||
body = await response.json() if response.status == 200 else None
|
||||
|
||||
# Validate response for throttling
|
||||
validate_response(
|
||||
response,
|
||||
response_time,
|
||||
body,
|
||||
config.slow_response_threshold,
|
||||
expect_data=True,
|
||||
)
|
||||
return await response.json()
|
||||
|
||||
if response.status != 200:
|
||||
raise Exception(
|
||||
f"""id: {detail_id}. Status Code: {response.status}."""
|
||||
f"""Failed due to: {await response.text()}"""
|
||||
)
|
||||
|
||||
if cb is not None:
|
||||
cb.record_success()
|
||||
return body # type: ignore
|
||||
except ThrottlingError:
|
||||
if cb is not None:
|
||||
cb.record_failure()
|
||||
raise
|
||||
except Exception as e:
|
||||
if cb is not None:
|
||||
cb.record_failure()
|
||||
raise e
|
||||
|
||||
if session:
|
||||
return await do_request(session)
|
||||
|
|
@ -101,7 +206,11 @@ async def detail_query(
|
|||
return await do_request(new_session)
|
||||
|
||||
|
||||
@retry(wait=wait_random(min=1, max=60), stop=stop_after_attempt(3))
|
||||
@retry(
|
||||
retry=retry_if_exception_type(ThrottlingError),
|
||||
wait=wait_exponential(multiplier=2, min=2, max=120),
|
||||
stop=stop_after_attempt(5),
|
||||
)
|
||||
async def listing_query(
|
||||
*,
|
||||
page: int,
|
||||
|
|
@ -118,6 +227,7 @@ async def listing_query(
|
|||
page_size: int = 25,
|
||||
furnish_types: list[FurnishType] = [],
|
||||
session: aiohttp.ClientSession | None = None,
|
||||
config: ScraperConfig | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Execute a listing search query.
|
||||
|
||||
|
|
@ -136,10 +246,21 @@ async def listing_query(
|
|||
page_size: Number of results per page (default 25).
|
||||
furnish_types: List of furnish types to filter (RENT only).
|
||||
session: Optional aiohttp session. Creates new one if not provided.
|
||||
config: Scraper configuration. Loads from environment if not provided.
|
||||
|
||||
Returns:
|
||||
API response as a dictionary.
|
||||
|
||||
Raises:
|
||||
CircuitBreakerOpenError: If the circuit breaker is open.
|
||||
ThrottlingError: If the request is throttled.
|
||||
"""
|
||||
if config is None:
|
||||
config = ScraperConfig.from_env()
|
||||
|
||||
check_circuit_breaker(config)
|
||||
cb = get_circuit_breaker(config)
|
||||
|
||||
params: dict[str, str] = {
|
||||
"locationIdentifier": districts.get_districts()[district],
|
||||
"channel": str(channel).upper(),
|
||||
|
|
@ -185,14 +306,39 @@ async def listing_query(
|
|||
}
|
||||
|
||||
async def do_request(s: aiohttp.ClientSession) -> dict[str, Any]:
|
||||
async with s.get(
|
||||
"https://api.rightmove.co.uk/api/property-listing",
|
||||
params=params,
|
||||
headers=request_headers,
|
||||
) as response:
|
||||
if response.status != 200:
|
||||
raise Exception(f"Failed due to: {await response.text()}")
|
||||
return await response.json()
|
||||
start_time = time.time()
|
||||
try:
|
||||
async with s.get(
|
||||
"https://api.rightmove.co.uk/api/property-listing",
|
||||
params=params,
|
||||
headers=request_headers,
|
||||
) as response:
|
||||
response_time = time.time() - start_time
|
||||
body = await response.json() if response.status == 200 else None
|
||||
|
||||
# Validate response for throttling
|
||||
validate_response(
|
||||
response,
|
||||
response_time,
|
||||
body,
|
||||
config.slow_response_threshold,
|
||||
expect_data=(page == 1), # Only expect data on first page
|
||||
)
|
||||
|
||||
if response.status != 200:
|
||||
raise Exception(f"Failed due to: {await response.text()}")
|
||||
|
||||
if cb is not None:
|
||||
cb.record_success()
|
||||
return body # type: ignore
|
||||
except ThrottlingError:
|
||||
if cb is not None:
|
||||
cb.record_failure()
|
||||
raise
|
||||
except Exception as e:
|
||||
if cb is not None:
|
||||
cb.record_failure()
|
||||
raise e
|
||||
|
||||
if session:
|
||||
return await do_request(session)
|
||||
|
|
@ -201,7 +347,11 @@ async def listing_query(
|
|||
return await do_request(new_session)
|
||||
|
||||
|
||||
@retry(wait=wait_random(min=1, max=10), stop=stop_after_attempt(3))
|
||||
@retry(
|
||||
retry=retry_if_exception_type(ThrottlingError),
|
||||
wait=wait_exponential(multiplier=2, min=2, max=60),
|
||||
stop=stop_after_attempt(5),
|
||||
)
|
||||
async def probe_query(
|
||||
*,
|
||||
session: aiohttp.ClientSession,
|
||||
|
|
@ -214,6 +364,7 @@ async def probe_query(
|
|||
district: str,
|
||||
max_days_since_added: int = 30,
|
||||
furnish_types: list[FurnishType] = [],
|
||||
config: ScraperConfig | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Probe the API to get result count without fetching full results.
|
||||
|
||||
|
|
@ -230,10 +381,21 @@ async def probe_query(
|
|||
district: District identifier string.
|
||||
max_days_since_added: Maximum days since listing was added (BUY only).
|
||||
furnish_types: List of furnish types to filter (RENT only).
|
||||
config: Scraper configuration. Loads from environment if not provided.
|
||||
|
||||
Returns:
|
||||
API response containing totalAvailableResults.
|
||||
|
||||
Raises:
|
||||
CircuitBreakerOpenError: If the circuit breaker is open.
|
||||
ThrottlingError: If the request is throttled.
|
||||
"""
|
||||
if config is None:
|
||||
config = ScraperConfig.from_env()
|
||||
|
||||
check_circuit_breaker(config)
|
||||
cb = get_circuit_breaker(config)
|
||||
|
||||
params: dict[str, str] = {
|
||||
"locationIdentifier": districts.get_districts()[district],
|
||||
"channel": str(channel).upper(),
|
||||
|
|
@ -271,11 +433,36 @@ async def probe_query(
|
|||
"Connection": "keep-alive",
|
||||
}
|
||||
|
||||
async with session.get(
|
||||
"https://api.rightmove.co.uk/api/property-listing",
|
||||
params=params,
|
||||
headers=request_headers,
|
||||
) as response:
|
||||
if response.status != 200:
|
||||
raise Exception(f"Probe failed: {await response.text()}")
|
||||
return await response.json()
|
||||
start_time = time.time()
|
||||
try:
|
||||
async with session.get(
|
||||
"https://api.rightmove.co.uk/api/property-listing",
|
||||
params=params,
|
||||
headers=request_headers,
|
||||
) as response:
|
||||
response_time = time.time() - start_time
|
||||
body = await response.json() if response.status == 200 else None
|
||||
|
||||
# Validate response for throttling
|
||||
validate_response(
|
||||
response,
|
||||
response_time,
|
||||
body,
|
||||
config.slow_response_threshold,
|
||||
expect_data=False, # Probe doesn't need data, just count
|
||||
)
|
||||
|
||||
if response.status != 200:
|
||||
raise Exception(f"Probe failed: {await response.text()}")
|
||||
|
||||
if cb is not None:
|
||||
cb.record_success()
|
||||
return body # type: ignore
|
||||
except ThrottlingError:
|
||||
if cb is not None:
|
||||
cb.record_failure()
|
||||
raise
|
||||
except Exception as e:
|
||||
if cb is not None:
|
||||
cb.record_failure()
|
||||
raise e
|
||||
|
|
|
|||
232
crawler/rec/throttle_detector.py
Normal file
232
crawler/rec/throttle_detector.py
Normal file
|
|
@ -0,0 +1,232 @@
|
|||
"""Throttling detection and metrics for Rightmove API."""
|
||||
from __future__ import annotations
|
||||
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
|
||||
import aiohttp
|
||||
|
||||
from rec.exceptions import (
|
||||
InvalidResponseError,
|
||||
IPBlockedError,
|
||||
RateLimitError,
|
||||
ServiceUnavailableError,
|
||||
SlowResponseError,
|
||||
UnexpectedEmptyResponseError,
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ThrottleMetrics:
|
||||
"""Tracks throttling events and metrics.
|
||||
|
||||
Attributes:
|
||||
rate_limit_count: Number of HTTP 429 errors.
|
||||
service_unavailable_count: Number of HTTP 503 errors.
|
||||
ip_blocked_count: Number of HTTP 403 errors.
|
||||
slow_response_count: Number of slow responses.
|
||||
empty_response_count: Number of unexpected empty responses.
|
||||
invalid_response_count: Number of invalid/error responses.
|
||||
total_requests: Total number of requests made.
|
||||
total_response_time: Cumulative response time in seconds.
|
||||
"""
|
||||
|
||||
rate_limit_count: int = 0
|
||||
service_unavailable_count: int = 0
|
||||
ip_blocked_count: int = 0
|
||||
slow_response_count: int = 0
|
||||
empty_response_count: int = 0
|
||||
invalid_response_count: int = 0
|
||||
total_requests: int = 0
|
||||
total_response_time: float = 0.0
|
||||
_start_time: float = field(default_factory=time.time)
|
||||
|
||||
def record_rate_limit(self) -> None:
|
||||
"""Record a rate limit error (HTTP 429)."""
|
||||
self.rate_limit_count += 1
|
||||
|
||||
def record_service_unavailable(self) -> None:
|
||||
"""Record a service unavailable error (HTTP 503)."""
|
||||
self.service_unavailable_count += 1
|
||||
|
||||
def record_ip_blocked(self) -> None:
|
||||
"""Record an IP blocked error (HTTP 403)."""
|
||||
self.ip_blocked_count += 1
|
||||
|
||||
def record_slow_response(self, response_time: float) -> None:
|
||||
"""Record a slow response.
|
||||
|
||||
Args:
|
||||
response_time: Response time in seconds.
|
||||
"""
|
||||
self.slow_response_count += 1
|
||||
self.total_response_time += response_time
|
||||
self.total_requests += 1
|
||||
|
||||
def record_empty_response(self) -> None:
|
||||
"""Record an unexpected empty response."""
|
||||
self.empty_response_count += 1
|
||||
|
||||
def record_invalid_response(self) -> None:
|
||||
"""Record an invalid or error response."""
|
||||
self.invalid_response_count += 1
|
||||
|
||||
def record_request(self, response_time: float) -> None:
|
||||
"""Record a successful request.
|
||||
|
||||
Args:
|
||||
response_time: Response time in seconds.
|
||||
"""
|
||||
self.total_requests += 1
|
||||
self.total_response_time += response_time
|
||||
|
||||
@property
|
||||
def average_response_time(self) -> float:
|
||||
"""Calculate average response time in seconds."""
|
||||
if self.total_requests == 0:
|
||||
return 0.0
|
||||
return self.total_response_time / self.total_requests
|
||||
|
||||
@property
|
||||
def total_throttling_events(self) -> int:
|
||||
"""Total number of throttling events."""
|
||||
return (
|
||||
self.rate_limit_count
|
||||
+ self.service_unavailable_count
|
||||
+ self.ip_blocked_count
|
||||
+ self.slow_response_count
|
||||
)
|
||||
|
||||
@property
|
||||
def throttle_rate(self) -> float:
|
||||
"""Percentage of requests that were throttled."""
|
||||
if self.total_requests == 0:
|
||||
return 0.0
|
||||
return (self.total_throttling_events / self.total_requests) * 100
|
||||
|
||||
@property
|
||||
def elapsed_time(self) -> float:
|
||||
"""Time elapsed since metrics started tracking."""
|
||||
return time.time() - self._start_time
|
||||
|
||||
def summary(self) -> str:
|
||||
"""Generate a summary of throttling metrics."""
|
||||
return (
|
||||
f"Throttle Metrics Summary:\n"
|
||||
f" Total Requests: {self.total_requests}\n"
|
||||
f" Total Throttling Events: {self.total_throttling_events}\n"
|
||||
f" Throttle Rate: {self.throttle_rate:.2f}%\n"
|
||||
f" Rate Limit (429): {self.rate_limit_count}\n"
|
||||
f" Service Unavailable (503): {self.service_unavailable_count}\n"
|
||||
f" IP Blocked (403): {self.ip_blocked_count}\n"
|
||||
f" Slow Responses: {self.slow_response_count}\n"
|
||||
f" Empty Responses: {self.empty_response_count}\n"
|
||||
f" Invalid Responses: {self.invalid_response_count}\n"
|
||||
f" Average Response Time: {self.average_response_time:.2f}s\n"
|
||||
f" Elapsed Time: {self.elapsed_time:.2f}s"
|
||||
)
|
||||
|
||||
|
||||
# Global metrics instance
|
||||
_global_metrics: ThrottleMetrics | None = None
|
||||
|
||||
|
||||
def get_throttle_metrics() -> ThrottleMetrics:
|
||||
"""Get the global throttle metrics instance.
|
||||
|
||||
Returns:
|
||||
Global ThrottleMetrics instance.
|
||||
"""
|
||||
global _global_metrics
|
||||
if _global_metrics is None:
|
||||
_global_metrics = ThrottleMetrics()
|
||||
return _global_metrics
|
||||
|
||||
|
||||
def reset_throttle_metrics() -> None:
|
||||
"""Reset the global throttle metrics."""
|
||||
global _global_metrics
|
||||
_global_metrics = ThrottleMetrics()
|
||||
|
||||
|
||||
def validate_response(
|
||||
response: aiohttp.ClientResponse,
|
||||
response_time: float,
|
||||
response_body: dict[str, Any] | None,
|
||||
slow_response_threshold: float,
|
||||
expect_data: bool = True,
|
||||
) -> None:
|
||||
"""Validate an API response and raise appropriate exceptions for throttling.
|
||||
|
||||
Args:
|
||||
response: The aiohttp response object.
|
||||
response_time: Time taken for the request in seconds.
|
||||
response_body: Parsed JSON response body (if available).
|
||||
slow_response_threshold: Threshold in seconds for slow responses.
|
||||
expect_data: Whether we expect data in the response.
|
||||
|
||||
Raises:
|
||||
RateLimitError: If HTTP 429 is returned.
|
||||
ServiceUnavailableError: If HTTP 503 is returned.
|
||||
IPBlockedError: If HTTP 403 is returned.
|
||||
SlowResponseError: If response time exceeds threshold.
|
||||
UnexpectedEmptyResponseError: If response is empty when data is expected.
|
||||
InvalidResponseError: If response contains error messages.
|
||||
"""
|
||||
metrics = get_throttle_metrics()
|
||||
|
||||
# Check HTTP status codes
|
||||
if response.status == 429:
|
||||
metrics.record_rate_limit()
|
||||
raise RateLimitError(
|
||||
f"Rate limit exceeded (HTTP 429). "
|
||||
f"Response time: {response_time:.2f}s"
|
||||
)
|
||||
|
||||
if response.status == 503:
|
||||
metrics.record_service_unavailable()
|
||||
raise ServiceUnavailableError(
|
||||
f"Service unavailable (HTTP 503). "
|
||||
f"Response time: {response_time:.2f}s"
|
||||
)
|
||||
|
||||
if response.status == 403:
|
||||
metrics.record_ip_blocked()
|
||||
raise IPBlockedError(
|
||||
f"Access forbidden, possible IP block (HTTP 403). "
|
||||
f"Response time: {response_time:.2f}s"
|
||||
)
|
||||
|
||||
# Check response time
|
||||
if response_time > slow_response_threshold:
|
||||
metrics.record_slow_response(response_time)
|
||||
raise SlowResponseError(
|
||||
f"Slow response detected: {response_time:.2f}s "
|
||||
f"(threshold: {slow_response_threshold}s)"
|
||||
)
|
||||
|
||||
# Check response body if available
|
||||
if response_body is not None:
|
||||
# Check for error messages
|
||||
if "error" in response_body or "GENERIC_ERROR" in str(response_body):
|
||||
metrics.record_invalid_response()
|
||||
raise InvalidResponseError(
|
||||
f"Error in response body: {response_body}"
|
||||
)
|
||||
|
||||
# Check for unexpected empty responses
|
||||
if expect_data:
|
||||
properties = response_body.get("properties", [])
|
||||
total_results = response_body.get("totalAvailableResults", 0)
|
||||
|
||||
# If we expect data but got none (and total shows there should be some)
|
||||
if total_results > 0 and len(properties) == 0:
|
||||
metrics.record_empty_response()
|
||||
raise UnexpectedEmptyResponseError(
|
||||
f"Expected data but got empty response. "
|
||||
f"Total available: {total_results}"
|
||||
)
|
||||
|
||||
# Record successful request
|
||||
metrics.record_request(response_time)
|
||||
Loading…
Add table
Add a link
Reference in a new issue