wrongmove/crawler/rec/query.py

281 lines
8.8 KiB
Python

import enum
from typing import Any
from contextlib import asynccontextmanager
from collections.abc import AsyncIterator
import aiohttp
from models.listing import FurnishType, ListingType
from rec import districts
from tenacity import retry, stop_after_attempt, wait_random
from config.scraper_config import ScraperConfig
DEFAULT_HEADERS = {
"Host": "api.rightmove.co.uk",
"User-Agent": "okhttp/4.12.0",
"Connection": "keep-alive",
}
class PropertyType(enum.StrEnum):
BUNGALOW = "bungalow"
DETACHED = "detached"
FLAT = "flat"
LAND = "land"
PARK_HOME = "park-home"
SEMI_DETACHED = "semi-detached"
TERRACED = "terraced"
@asynccontextmanager
async def create_session(
config: ScraperConfig | None = None,
) -> AsyncIterator[aiohttp.ClientSession]:
"""Create an aiohttp session with optional proxy support.
Args:
config: Scraper configuration. Loads from environment if not provided.
Yields:
Configured aiohttp ClientSession.
"""
if config is None:
config = ScraperConfig.from_env()
connector = None
if config.proxy_url:
try:
from aiohttp_socks import ProxyConnector
connector = ProxyConnector.from_url(config.proxy_url)
except ImportError:
raise ImportError(
"aiohttp-socks is required for proxy support. "
"Install with: pip install aiohttp-socks"
)
session = aiohttp.ClientSession(
trust_env=True,
connector=connector,
headers=DEFAULT_HEADERS,
)
try:
yield session
finally:
await session.close()
@retry(wait=wait_random(min=1, max=2), stop=stop_after_attempt(3))
async def detail_query(
detail_id: int,
session: aiohttp.ClientSession | None = None,
) -> dict[str, Any]:
"""Fetch detailed property information.
Args:
detail_id: The property identifier.
session: Optional aiohttp session. Creates new one if not provided.
Returns:
Property details as a dictionary.
"""
params = {
"apiApplication": "ANDROID",
"appVersion": "3.70.0",
}
url = f"https://api.rightmove.co.uk/api/property/{detail_id}"
async def do_request(s: aiohttp.ClientSession) -> dict[str, Any]:
async with s.get(url, params=params, headers=DEFAULT_HEADERS) as response:
if response.status != 200:
raise Exception(
f"""id: {detail_id}. Status Code: {response.status}."""
f"""Failed due to: {await response.text()}"""
)
return await response.json()
if session:
return await do_request(session)
else:
async with aiohttp.ClientSession(trust_env=True) as new_session:
return await do_request(new_session)
@retry(wait=wait_random(min=1, max=60), stop=stop_after_attempt(3))
async def listing_query(
*,
page: int,
channel: ListingType,
min_bedrooms: int,
max_bedrooms: int,
radius: float,
min_price: int,
max_price: int,
district: str, # = "STATION^5168", # kings cross station
mustNewHome: bool = False,
max_days_since_added: int = 30,
property_type: list[PropertyType] = [],
page_size: int = 25,
furnish_types: list[FurnishType] = [],
session: aiohttp.ClientSession | None = None,
) -> dict[str, Any]:
"""Execute a listing search query.
Args:
page: Page number to fetch (1-indexed).
channel: Listing type (BUY or RENT).
min_bedrooms: Minimum number of bedrooms.
max_bedrooms: Maximum number of bedrooms.
radius: Search radius.
min_price: Minimum price.
max_price: Maximum price.
district: District identifier string.
mustNewHome: Filter for new homes only (BUY only).
max_days_since_added: Maximum days since listing was added (BUY only).
property_type: List of property types to filter (BUY only).
page_size: Number of results per page (default 25).
furnish_types: List of furnish types to filter (RENT only).
session: Optional aiohttp session. Creates new one if not provided.
Returns:
API response as a dictionary.
"""
params: dict[str, str] = {
"locationIdentifier": districts.get_districts()[district],
"channel": str(channel).upper(),
"page": str(page),
"numberOfPropertiesPerPage": str(page_size),
"radius": str(radius),
"sortBy": "distance",
"includeUnavailableProperties": "false",
"minPrice": str(min_price),
"maxPrice": str(max_price),
"minBedrooms": str(min_bedrooms),
"maxBedrooms": str(max_bedrooms),
"apiApplication": "ANDROID",
"appVersion": "4.28.0",
}
if channel is ListingType.BUY:
params["dontShow"] = "sharedOwnership,retirement"
if len(property_type) > 0:
params["propertyTypes"] = ",".join(property_type)
if max_days_since_added is not None and max_days_since_added not in [
1,
3,
7,
14,
]:
raise Exception(
f"Invalid max days - {max_days_since_added} Can only be got",
[1, 3, 7, 14],
)
params["maxDaysSinceAdded"] = str(max_days_since_added)
if mustNewHome:
params["mustHave"] = "newHome"
if channel is ListingType.RENT:
if furnish_types:
params["furnishTypes"] = ",".join(furnish_types)
request_headers = {
"Host": "api.rightmove.co.uk",
"Accept-Encoding": "gzip, deflate, br",
"User-Agent": "okhttp/4.12.0",
"Connection": "keep-alive",
}
async def do_request(s: aiohttp.ClientSession) -> dict[str, Any]:
async with s.get(
"https://api.rightmove.co.uk/api/property-listing",
params=params,
headers=request_headers,
) as response:
if response.status != 200:
raise Exception(f"Failed due to: {await response.text()}")
return await response.json()
if session:
return await do_request(session)
else:
async with aiohttp.ClientSession(trust_env=True) as new_session:
return await do_request(new_session)
@retry(wait=wait_random(min=1, max=10), stop=stop_after_attempt(3))
async def probe_query(
*,
session: aiohttp.ClientSession,
channel: ListingType,
min_bedrooms: int,
max_bedrooms: int,
radius: float,
min_price: int,
max_price: int,
district: str,
max_days_since_added: int = 30,
furnish_types: list[FurnishType] = [],
) -> dict[str, Any]:
"""Probe the API to get result count without fetching full results.
Makes a minimal request (page_size=1) to efficiently get totalAvailableResults.
Args:
session: aiohttp session for making requests.
channel: Listing type (BUY or RENT).
min_bedrooms: Minimum number of bedrooms.
max_bedrooms: Maximum number of bedrooms.
radius: Search radius.
min_price: Minimum price.
max_price: Maximum price.
district: District identifier string.
max_days_since_added: Maximum days since listing was added (BUY only).
furnish_types: List of furnish types to filter (RENT only).
Returns:
API response containing totalAvailableResults.
"""
params: dict[str, str] = {
"locationIdentifier": districts.get_districts()[district],
"channel": str(channel).upper(),
"page": "1",
"numberOfPropertiesPerPage": "1", # Minimal page size for probing
"radius": str(radius),
"sortBy": "distance",
"includeUnavailableProperties": "false",
"minPrice": str(min_price),
"maxPrice": str(max_price),
"minBedrooms": str(min_bedrooms),
"maxBedrooms": str(max_bedrooms),
"apiApplication": "ANDROID",
"appVersion": "4.28.0",
}
if channel is ListingType.BUY:
params["dontShow"] = "sharedOwnership,retirement"
if max_days_since_added is not None and max_days_since_added in [
1,
3,
7,
14,
]:
params["maxDaysSinceAdded"] = str(max_days_since_added)
if channel is ListingType.RENT:
if furnish_types:
params["furnishTypes"] = ",".join(furnish_types)
request_headers = {
"Host": "api.rightmove.co.uk",
"Accept-Encoding": "gzip, deflate, br",
"User-Agent": "okhttp/4.12.0",
"Connection": "keep-alive",
}
async with session.get(
"https://api.rightmove.co.uk/api/property-listing",
params=params,
headers=request_headers,
) as response:
if response.status != 200:
raise Exception(f"Probe failed: {await response.text()}")
return await response.json()