Refactor codebase following Clean Code principles and add 229 tests

- Extract helpers to reduce function sizes (listing_tasks, app.py, query.py, listing_fetcher)
  - Replace nonlocal mutations with _PipelineState dataclass in listing_tasks
  - Fix bugs: isinstance→equality check in repository, verify_exp for OIDC tokens
  - Consolidate duplicate filter methods in listing_repository
  - Move hardcoded config to env vars with backward-compatible defaults
  - Simplify CLI decorator to auto-build QueryParameters
  - Add deprecation docstring to data_access.py
  - Test count: 158 → 387 (all passing)
This commit is contained in:
Viktor Barzin 2026-02-07 20:19:57 +00:00
parent 7e05b3c971
commit 150342bb9e
No known key found for this signature in database
GPG key ID: 0EB088298288D958
48 changed files with 5029 additions and 990 deletions

View file

@ -28,6 +28,11 @@ logger = logging.getLogger("uvicorn.error")
# Global circuit breaker instance
_circuit_breaker: CircuitBreaker | None = None
# API constants
ANDROID_APP_VERSION = "3.70.0"
ANDROID_APP_VERSION_LISTING = "4.28.0"
RIGHTMOVE_API_BASE = "https://api.rightmove.co.uk/api"
PROPERTY_LISTING_ENDPOINT = f"{RIGHTMOVE_API_BASE}/property-listing"
DEFAULT_HEADERS = {
"Host": "api.rightmove.co.uk",
@ -35,6 +40,11 @@ DEFAULT_HEADERS = {
"Connection": "keep-alive",
}
LISTING_HEADERS = {
**DEFAULT_HEADERS,
"Accept-Encoding": "gzip, deflate, br",
}
class PropertyType(enum.StrEnum):
BUNGALOW = "bungalow"
@ -129,6 +139,177 @@ def check_circuit_breaker(config: ScraperConfig | None = None) -> None:
cb.call()
def _build_base_params(
*,
channel: ListingType,
page: int,
page_size: int,
radius: float,
min_price: int,
max_price: int,
min_bedrooms: int,
max_bedrooms: int,
district: str,
) -> dict[str, str]:
return {
"locationIdentifier": districts.get_districts()[district],
"channel": str(channel).upper(),
"page": str(page),
"numberOfPropertiesPerPage": str(page_size),
"radius": str(radius),
"sortBy": "distance",
"includeUnavailableProperties": "false",
"minPrice": str(min_price),
"maxPrice": str(max_price),
"minBedrooms": str(min_bedrooms),
"maxBedrooms": str(max_bedrooms),
"apiApplication": "ANDROID",
"appVersion": ANDROID_APP_VERSION_LISTING,
}
def _build_listing_params(
*,
page: int,
channel: ListingType,
min_bedrooms: int,
max_bedrooms: int,
radius: float,
min_price: int,
max_price: int,
district: str,
mustNewHome: bool,
max_days_since_added: int,
property_type: list[PropertyType],
page_size: int,
furnish_types: list[FurnishType],
) -> dict[str, str]:
params = _build_base_params(
channel=channel,
page=page,
page_size=page_size,
radius=radius,
min_price=min_price,
max_price=max_price,
min_bedrooms=min_bedrooms,
max_bedrooms=max_bedrooms,
district=district,
)
if channel is ListingType.BUY:
params["dontShow"] = "sharedOwnership,retirement"
if len(property_type) > 0:
params["propertyTypes"] = ",".join(property_type)
if max_days_since_added is not None and max_days_since_added not in [
1,
3,
7,
14,
]:
raise Exception(
f"Invalid max days - {max_days_since_added} Can only be got",
[1, 3, 7, 14],
)
params["maxDaysSinceAdded"] = str(max_days_since_added)
if mustNewHome:
params["mustHave"] = "newHome"
if channel is ListingType.RENT:
if furnish_types:
params["furnishTypes"] = ",".join(furnish_types)
return params
def _build_probe_params(
*,
channel: ListingType,
min_bedrooms: int,
max_bedrooms: int,
radius: float,
min_price: int,
max_price: int,
district: str,
max_days_since_added: int,
furnish_types: list[FurnishType],
) -> dict[str, str]:
params = _build_base_params(
channel=channel,
page=1,
page_size=1, # Minimal page size for probing
radius=radius,
min_price=min_price,
max_price=max_price,
min_bedrooms=min_bedrooms,
max_bedrooms=max_bedrooms,
district=district,
)
if channel is ListingType.BUY:
params["dontShow"] = "sharedOwnership,retirement"
if max_days_since_added is not None and max_days_since_added in [
1,
3,
7,
14,
]:
params["maxDaysSinceAdded"] = str(max_days_since_added)
if channel is ListingType.RENT:
if furnish_types:
params["furnishTypes"] = ",".join(furnish_types)
return params
async def _execute_api_request(
*,
url: str,
params: dict[str, str],
headers: dict[str, str],
session: aiohttp.ClientSession | None,
config: ScraperConfig,
expect_data: bool = True,
error_context: str = "",
) -> dict[str, Any]:
check_circuit_breaker(config)
cb = get_circuit_breaker(config)
async def do_request(s: aiohttp.ClientSession) -> dict[str, Any]:
start_time = time.time()
try:
async with s.get(url, params=params, headers=headers) as response:
response_time = time.time() - start_time
body = await response.json() if response.status == 200 else None
validate_response(
response,
response_time,
body,
config.slow_response_threshold,
expect_data=expect_data,
)
if response.status != 200:
raise Exception(
f"{error_context}Failed due to: {await response.text()}"
)
if cb is not None:
cb.record_success()
return body # type: ignore
except ThrottlingError:
if cb is not None:
cb.record_failure()
raise
except Exception as e:
if cb is not None:
cb.record_failure()
raise e
if session:
return await do_request(session)
else:
async with aiohttp.ClientSession(trust_env=True) as new_session:
return await do_request(new_session)
@retry(
retry=retry_if_exception_type(ThrottlingError),
wait=wait_exponential(multiplier=2, min=2, max=120),
@ -156,54 +337,21 @@ async def detail_query(
if config is None:
config = ScraperConfig.from_env()
check_circuit_breaker(config)
cb = get_circuit_breaker(config)
params = {
"apiApplication": "ANDROID",
"appVersion": "3.70.0",
"appVersion": ANDROID_APP_VERSION,
}
url = f"https://api.rightmove.co.uk/api/property/{detail_id}"
url = f"{RIGHTMOVE_API_BASE}/property/{detail_id}"
async def do_request(s: aiohttp.ClientSession) -> dict[str, Any]:
start_time = time.time()
try:
async with s.get(url, params=params, headers=DEFAULT_HEADERS) as response:
response_time = time.time() - start_time
body = await response.json() if response.status == 200 else None
# Validate response for throttling
validate_response(
response,
response_time,
body,
config.slow_response_threshold,
expect_data=True,
)
if response.status != 200:
raise Exception(
f"""id: {detail_id}. Status Code: {response.status}."""
f"""Failed due to: {await response.text()}"""
)
if cb is not None:
cb.record_success()
return body # type: ignore
except ThrottlingError:
if cb is not None:
cb.record_failure()
raise
except Exception as e:
if cb is not None:
cb.record_failure()
raise e
if session:
return await do_request(session)
else:
async with aiohttp.ClientSession(trust_env=True) as new_session:
return await do_request(new_session)
return await _execute_api_request(
url=url,
params=params,
headers=DEFAULT_HEADERS,
session=session,
config=config,
expect_data=True,
error_context=f"id: {detail_id}. Status Code: ",
)
@retry(
@ -223,9 +371,9 @@ async def listing_query(
district: str, # = "STATION^5168", # kings cross station
mustNewHome: bool = False,
max_days_since_added: int = 30,
property_type: list[PropertyType] = [],
property_type: list[PropertyType] | None = None,
page_size: int = 25,
furnish_types: list[FurnishType] = [],
furnish_types: list[FurnishType] | None = None,
session: aiohttp.ClientSession | None = None,
config: ScraperConfig | None = None,
) -> dict[str, Any]:
@ -257,94 +405,35 @@ async def listing_query(
"""
if config is None:
config = ScraperConfig.from_env()
if property_type is None:
property_type = []
if furnish_types is None:
furnish_types = []
check_circuit_breaker(config)
cb = get_circuit_breaker(config)
params = _build_listing_params(
page=page,
channel=channel,
min_bedrooms=min_bedrooms,
max_bedrooms=max_bedrooms,
radius=radius,
min_price=min_price,
max_price=max_price,
district=district,
mustNewHome=mustNewHome,
max_days_since_added=max_days_since_added,
property_type=property_type,
page_size=page_size,
furnish_types=furnish_types,
)
params: dict[str, str] = {
"locationIdentifier": districts.get_districts()[district],
"channel": str(channel).upper(),
"page": str(page),
"numberOfPropertiesPerPage": str(page_size),
"radius": str(radius),
"sortBy": "distance",
"includeUnavailableProperties": "false",
"minPrice": str(min_price),
"maxPrice": str(max_price),
"minBedrooms": str(min_bedrooms),
"maxBedrooms": str(max_bedrooms),
"apiApplication": "ANDROID",
"appVersion": "4.28.0",
}
if channel is ListingType.BUY:
params["dontShow"] = "sharedOwnership,retirement"
if len(property_type) > 0:
params["propertyTypes"] = ",".join(property_type)
if max_days_since_added is not None and max_days_since_added not in [
1,
3,
7,
14,
]:
raise Exception(
f"Invalid max days - {max_days_since_added} Can only be got",
[1, 3, 7, 14],
)
params["maxDaysSinceAdded"] = str(max_days_since_added)
if mustNewHome:
params["mustHave"] = "newHome"
if channel is ListingType.RENT:
if furnish_types:
params["furnishTypes"] = ",".join(furnish_types)
request_headers = {
"Host": "api.rightmove.co.uk",
"Accept-Encoding": "gzip, deflate, br",
"User-Agent": "okhttp/4.12.0",
"Connection": "keep-alive",
}
async def do_request(s: aiohttp.ClientSession) -> dict[str, Any]:
start_time = time.time()
try:
async with s.get(
"https://api.rightmove.co.uk/api/property-listing",
params=params,
headers=request_headers,
) as response:
response_time = time.time() - start_time
body = await response.json() if response.status == 200 else None
# Validate response for throttling
validate_response(
response,
response_time,
body,
config.slow_response_threshold,
expect_data=(page == 1), # Only expect data on first page
)
if response.status != 200:
raise Exception(f"Failed due to: {await response.text()}")
if cb is not None:
cb.record_success()
return body # type: ignore
except ThrottlingError:
if cb is not None:
cb.record_failure()
raise
except Exception as e:
if cb is not None:
cb.record_failure()
raise e
if session:
return await do_request(session)
else:
async with aiohttp.ClientSession(trust_env=True) as new_session:
return await do_request(new_session)
return await _execute_api_request(
url=PROPERTY_LISTING_ENDPOINT,
params=params,
headers=LISTING_HEADERS,
session=session,
config=config,
expect_data=(page == 1),
)
@retry(
@ -363,7 +452,7 @@ async def probe_query(
max_price: int,
district: str,
max_days_since_added: int = 30,
furnish_types: list[FurnishType] = [],
furnish_types: list[FurnishType] | None = None,
config: ScraperConfig | None = None,
) -> dict[str, Any]:
"""Probe the API to get result count without fetching full results.
@ -392,77 +481,27 @@ async def probe_query(
"""
if config is None:
config = ScraperConfig.from_env()
if furnish_types is None:
furnish_types = []
check_circuit_breaker(config)
cb = get_circuit_breaker(config)
params = _build_probe_params(
channel=channel,
min_bedrooms=min_bedrooms,
max_bedrooms=max_bedrooms,
radius=radius,
min_price=min_price,
max_price=max_price,
district=district,
max_days_since_added=max_days_since_added,
furnish_types=furnish_types,
)
params: dict[str, str] = {
"locationIdentifier": districts.get_districts()[district],
"channel": str(channel).upper(),
"page": "1",
"numberOfPropertiesPerPage": "1", # Minimal page size for probing
"radius": str(radius),
"sortBy": "distance",
"includeUnavailableProperties": "false",
"minPrice": str(min_price),
"maxPrice": str(max_price),
"minBedrooms": str(min_bedrooms),
"maxBedrooms": str(max_bedrooms),
"apiApplication": "ANDROID",
"appVersion": "4.28.0",
}
if channel is ListingType.BUY:
params["dontShow"] = "sharedOwnership,retirement"
if max_days_since_added is not None and max_days_since_added in [
1,
3,
7,
14,
]:
params["maxDaysSinceAdded"] = str(max_days_since_added)
if channel is ListingType.RENT:
if furnish_types:
params["furnishTypes"] = ",".join(furnish_types)
request_headers = {
"Host": "api.rightmove.co.uk",
"Accept-Encoding": "gzip, deflate, br",
"User-Agent": "okhttp/4.12.0",
"Connection": "keep-alive",
}
start_time = time.time()
try:
async with session.get(
"https://api.rightmove.co.uk/api/property-listing",
params=params,
headers=request_headers,
) as response:
response_time = time.time() - start_time
body = await response.json() if response.status == 200 else None
# Validate response for throttling
validate_response(
response,
response_time,
body,
config.slow_response_threshold,
expect_data=False, # Probe doesn't need data, just count
)
if response.status != 200:
raise Exception(f"Probe failed: {await response.text()}")
if cb is not None:
cb.record_success()
return body # type: ignore
except ThrottlingError:
if cb is not None:
cb.record_failure()
raise
except Exception as e:
if cb is not None:
cb.record_failure()
raise e
return await _execute_api_request(
url=PROPERTY_LISTING_ENDPOINT,
params=params,
headers=LISTING_HEADERS,
session=session,
config=config,
expect_data=False,
error_context="Probe failed: ",
)