import enum from typing import Any from contextlib import asynccontextmanager from collections.abc import AsyncIterator import aiohttp from models.listing import FurnishType, ListingType from rec import districts from tenacity import retry, stop_after_attempt, wait_random from config.scraper_config import ScraperConfig DEFAULT_HEADERS = { "Host": "api.rightmove.co.uk", "User-Agent": "okhttp/4.12.0", "Connection": "keep-alive", } class PropertyType(enum.StrEnum): BUNGALOW = "bungalow" DETACHED = "detached" FLAT = "flat" LAND = "land" PARK_HOME = "park-home" SEMI_DETACHED = "semi-detached" TERRACED = "terraced" @asynccontextmanager async def create_session( config: ScraperConfig | None = None, ) -> AsyncIterator[aiohttp.ClientSession]: """Create an aiohttp session with optional proxy support. Args: config: Scraper configuration. Loads from environment if not provided. Yields: Configured aiohttp ClientSession. """ if config is None: config = ScraperConfig.from_env() connector = None if config.proxy_url: try: from aiohttp_socks import ProxyConnector connector = ProxyConnector.from_url(config.proxy_url) except ImportError: raise ImportError( "aiohttp-socks is required for proxy support. " "Install with: pip install aiohttp-socks" ) session = aiohttp.ClientSession( trust_env=True, connector=connector, headers=DEFAULT_HEADERS, ) try: yield session finally: await session.close() @retry(wait=wait_random(min=1, max=2), stop=stop_after_attempt(3)) async def detail_query( detail_id: int, session: aiohttp.ClientSession | None = None, ) -> dict[str, Any]: """Fetch detailed property information. Args: detail_id: The property identifier. session: Optional aiohttp session. Creates new one if not provided. Returns: Property details as a dictionary. """ params = { "apiApplication": "ANDROID", "appVersion": "3.70.0", } url = f"https://api.rightmove.co.uk/api/property/{detail_id}" async def do_request(s: aiohttp.ClientSession) -> dict[str, Any]: async with s.get(url, params=params, headers=DEFAULT_HEADERS) as response: if response.status != 200: raise Exception( f"""id: {detail_id}. Status Code: {response.status}.""" f"""Failed due to: {await response.text()}""" ) return await response.json() if session: return await do_request(session) else: async with aiohttp.ClientSession(trust_env=True) as new_session: return await do_request(new_session) @retry(wait=wait_random(min=1, max=60), stop=stop_after_attempt(3)) async def listing_query( *, page: int, channel: ListingType, min_bedrooms: int, max_bedrooms: int, radius: float, min_price: int, max_price: int, district: str, # = "STATION^5168", # kings cross station mustNewHome: bool = False, max_days_since_added: int = 30, property_type: list[PropertyType] = [], page_size: int = 25, furnish_types: list[FurnishType] = [], session: aiohttp.ClientSession | None = None, ) -> dict[str, Any]: """Execute a listing search query. Args: page: Page number to fetch (1-indexed). channel: Listing type (BUY or RENT). min_bedrooms: Minimum number of bedrooms. max_bedrooms: Maximum number of bedrooms. radius: Search radius. min_price: Minimum price. max_price: Maximum price. district: District identifier string. mustNewHome: Filter for new homes only (BUY only). max_days_since_added: Maximum days since listing was added (BUY only). property_type: List of property types to filter (BUY only). page_size: Number of results per page (default 25). furnish_types: List of furnish types to filter (RENT only). session: Optional aiohttp session. Creates new one if not provided. Returns: API response as a dictionary. """ params: dict[str, str] = { "locationIdentifier": districts.get_districts()[district], "channel": str(channel).upper(), "page": str(page), "numberOfPropertiesPerPage": str(page_size), "radius": str(radius), "sortBy": "distance", "includeUnavailableProperties": "false", "minPrice": str(min_price), "maxPrice": str(max_price), "minBedrooms": str(min_bedrooms), "maxBedrooms": str(max_bedrooms), "apiApplication": "ANDROID", "appVersion": "4.28.0", } if channel is ListingType.BUY: params["dontShow"] = "sharedOwnership,retirement" if len(property_type) > 0: params["propertyTypes"] = ",".join(property_type) if max_days_since_added is not None and max_days_since_added not in [ 1, 3, 7, 14, ]: raise Exception( f"Invalid max days - {max_days_since_added} Can only be got", [1, 3, 7, 14], ) params["maxDaysSinceAdded"] = str(max_days_since_added) if mustNewHome: params["mustHave"] = "newHome" if channel is ListingType.RENT: if furnish_types: params["furnishTypes"] = ",".join(furnish_types) request_headers = { "Host": "api.rightmove.co.uk", "Accept-Encoding": "gzip, deflate, br", "User-Agent": "okhttp/4.12.0", "Connection": "keep-alive", } async def do_request(s: aiohttp.ClientSession) -> dict[str, Any]: async with s.get( "https://api.rightmove.co.uk/api/property-listing", params=params, headers=request_headers, ) as response: if response.status != 200: raise Exception(f"Failed due to: {await response.text()}") return await response.json() if session: return await do_request(session) else: async with aiohttp.ClientSession(trust_env=True) as new_session: return await do_request(new_session) @retry(wait=wait_random(min=1, max=10), stop=stop_after_attempt(3)) async def probe_query( *, session: aiohttp.ClientSession, channel: ListingType, min_bedrooms: int, max_bedrooms: int, radius: float, min_price: int, max_price: int, district: str, max_days_since_added: int = 30, furnish_types: list[FurnishType] = [], ) -> dict[str, Any]: """Probe the API to get result count without fetching full results. Makes a minimal request (page_size=1) to efficiently get totalAvailableResults. Args: session: aiohttp session for making requests. channel: Listing type (BUY or RENT). min_bedrooms: Minimum number of bedrooms. max_bedrooms: Maximum number of bedrooms. radius: Search radius. min_price: Minimum price. max_price: Maximum price. district: District identifier string. max_days_since_added: Maximum days since listing was added (BUY only). furnish_types: List of furnish types to filter (RENT only). Returns: API response containing totalAvailableResults. """ params: dict[str, str] = { "locationIdentifier": districts.get_districts()[district], "channel": str(channel).upper(), "page": "1", "numberOfPropertiesPerPage": "1", # Minimal page size for probing "radius": str(radius), "sortBy": "distance", "includeUnavailableProperties": "false", "minPrice": str(min_price), "maxPrice": str(max_price), "minBedrooms": str(min_bedrooms), "maxBedrooms": str(max_bedrooms), "apiApplication": "ANDROID", "appVersion": "4.28.0", } if channel is ListingType.BUY: params["dontShow"] = "sharedOwnership,retirement" if max_days_since_added is not None and max_days_since_added in [ 1, 3, 7, 14, ]: params["maxDaysSinceAdded"] = str(max_days_since_added) if channel is ListingType.RENT: if furnish_types: params["furnishTypes"] = ",".join(furnish_types) request_headers = { "Host": "api.rightmove.co.uk", "Accept-Encoding": "gzip, deflate, br", "User-Agent": "okhttp/4.12.0", "Connection": "keep-alive", } async with session.get( "https://api.rightmove.co.uk/api/property-listing", params=params, headers=request_headers, ) as response: if response.status != 200: raise Exception(f"Probe failed: {await response.text()}") return await response.json()