281 lines
8.8 KiB
Python
281 lines
8.8 KiB
Python
import enum
|
|
from typing import Any
|
|
from contextlib import asynccontextmanager
|
|
from collections.abc import AsyncIterator
|
|
|
|
import aiohttp
|
|
from models.listing import FurnishType, ListingType
|
|
from rec import districts
|
|
from tenacity import retry, stop_after_attempt, wait_random
|
|
from config.scraper_config import ScraperConfig
|
|
|
|
|
|
DEFAULT_HEADERS = {
|
|
"Host": "api.rightmove.co.uk",
|
|
"User-Agent": "okhttp/4.12.0",
|
|
"Connection": "keep-alive",
|
|
}
|
|
|
|
|
|
class PropertyType(enum.StrEnum):
|
|
BUNGALOW = "bungalow"
|
|
DETACHED = "detached"
|
|
FLAT = "flat"
|
|
LAND = "land"
|
|
PARK_HOME = "park-home"
|
|
SEMI_DETACHED = "semi-detached"
|
|
TERRACED = "terraced"
|
|
|
|
|
|
@asynccontextmanager
|
|
async def create_session(
|
|
config: ScraperConfig | None = None,
|
|
) -> AsyncIterator[aiohttp.ClientSession]:
|
|
"""Create an aiohttp session with optional proxy support.
|
|
|
|
Args:
|
|
config: Scraper configuration. Loads from environment if not provided.
|
|
|
|
Yields:
|
|
Configured aiohttp ClientSession.
|
|
"""
|
|
if config is None:
|
|
config = ScraperConfig.from_env()
|
|
|
|
connector = None
|
|
if config.proxy_url:
|
|
try:
|
|
from aiohttp_socks import ProxyConnector
|
|
|
|
connector = ProxyConnector.from_url(config.proxy_url)
|
|
except ImportError:
|
|
raise ImportError(
|
|
"aiohttp-socks is required for proxy support. "
|
|
"Install with: pip install aiohttp-socks"
|
|
)
|
|
|
|
session = aiohttp.ClientSession(
|
|
trust_env=True,
|
|
connector=connector,
|
|
headers=DEFAULT_HEADERS,
|
|
)
|
|
try:
|
|
yield session
|
|
finally:
|
|
await session.close()
|
|
|
|
|
|
@retry(wait=wait_random(min=1, max=2), stop=stop_after_attempt(3))
|
|
async def detail_query(
|
|
detail_id: int,
|
|
session: aiohttp.ClientSession | None = None,
|
|
) -> dict[str, Any]:
|
|
"""Fetch detailed property information.
|
|
|
|
Args:
|
|
detail_id: The property identifier.
|
|
session: Optional aiohttp session. Creates new one if not provided.
|
|
|
|
Returns:
|
|
Property details as a dictionary.
|
|
"""
|
|
params = {
|
|
"apiApplication": "ANDROID",
|
|
"appVersion": "3.70.0",
|
|
}
|
|
url = f"https://api.rightmove.co.uk/api/property/{detail_id}"
|
|
|
|
async def do_request(s: aiohttp.ClientSession) -> dict[str, Any]:
|
|
async with s.get(url, params=params, headers=DEFAULT_HEADERS) as response:
|
|
if response.status != 200:
|
|
raise Exception(
|
|
f"""id: {detail_id}. Status Code: {response.status}."""
|
|
f"""Failed due to: {await response.text()}"""
|
|
)
|
|
return await response.json()
|
|
|
|
if session:
|
|
return await do_request(session)
|
|
else:
|
|
async with aiohttp.ClientSession(trust_env=True) as new_session:
|
|
return await do_request(new_session)
|
|
|
|
|
|
@retry(wait=wait_random(min=1, max=60), stop=stop_after_attempt(3))
|
|
async def listing_query(
|
|
*,
|
|
page: int,
|
|
channel: ListingType,
|
|
min_bedrooms: int,
|
|
max_bedrooms: int,
|
|
radius: float,
|
|
min_price: int,
|
|
max_price: int,
|
|
district: str, # = "STATION^5168", # kings cross station
|
|
mustNewHome: bool = False,
|
|
max_days_since_added: int = 30,
|
|
property_type: list[PropertyType] = [],
|
|
page_size: int = 25,
|
|
furnish_types: list[FurnishType] = [],
|
|
session: aiohttp.ClientSession | None = None,
|
|
) -> dict[str, Any]:
|
|
"""Execute a listing search query.
|
|
|
|
Args:
|
|
page: Page number to fetch (1-indexed).
|
|
channel: Listing type (BUY or RENT).
|
|
min_bedrooms: Minimum number of bedrooms.
|
|
max_bedrooms: Maximum number of bedrooms.
|
|
radius: Search radius.
|
|
min_price: Minimum price.
|
|
max_price: Maximum price.
|
|
district: District identifier string.
|
|
mustNewHome: Filter for new homes only (BUY only).
|
|
max_days_since_added: Maximum days since listing was added (BUY only).
|
|
property_type: List of property types to filter (BUY only).
|
|
page_size: Number of results per page (default 25).
|
|
furnish_types: List of furnish types to filter (RENT only).
|
|
session: Optional aiohttp session. Creates new one if not provided.
|
|
|
|
Returns:
|
|
API response as a dictionary.
|
|
"""
|
|
params: dict[str, str] = {
|
|
"locationIdentifier": districts.get_districts()[district],
|
|
"channel": str(channel).upper(),
|
|
"page": str(page),
|
|
"numberOfPropertiesPerPage": str(page_size),
|
|
"radius": str(radius),
|
|
"sortBy": "distance",
|
|
"includeUnavailableProperties": "false",
|
|
"minPrice": str(min_price),
|
|
"maxPrice": str(max_price),
|
|
"minBedrooms": str(min_bedrooms),
|
|
"maxBedrooms": str(max_bedrooms),
|
|
"apiApplication": "ANDROID",
|
|
"appVersion": "4.28.0",
|
|
}
|
|
if channel is ListingType.BUY:
|
|
params["dontShow"] = "sharedOwnership,retirement"
|
|
if len(property_type) > 0:
|
|
params["propertyTypes"] = ",".join(property_type)
|
|
if max_days_since_added is not None and max_days_since_added not in [
|
|
1,
|
|
3,
|
|
7,
|
|
14,
|
|
]:
|
|
raise Exception(
|
|
f"Invalid max days - {max_days_since_added} Can only be got",
|
|
[1, 3, 7, 14],
|
|
)
|
|
params["maxDaysSinceAdded"] = str(max_days_since_added)
|
|
|
|
if mustNewHome:
|
|
params["mustHave"] = "newHome"
|
|
if channel is ListingType.RENT:
|
|
if furnish_types:
|
|
params["furnishTypes"] = ",".join(furnish_types)
|
|
|
|
request_headers = {
|
|
"Host": "api.rightmove.co.uk",
|
|
"Accept-Encoding": "gzip, deflate, br",
|
|
"User-Agent": "okhttp/4.12.0",
|
|
"Connection": "keep-alive",
|
|
}
|
|
|
|
async def do_request(s: aiohttp.ClientSession) -> dict[str, Any]:
|
|
async with s.get(
|
|
"https://api.rightmove.co.uk/api/property-listing",
|
|
params=params,
|
|
headers=request_headers,
|
|
) as response:
|
|
if response.status != 200:
|
|
raise Exception(f"Failed due to: {await response.text()}")
|
|
return await response.json()
|
|
|
|
if session:
|
|
return await do_request(session)
|
|
else:
|
|
async with aiohttp.ClientSession(trust_env=True) as new_session:
|
|
return await do_request(new_session)
|
|
|
|
|
|
@retry(wait=wait_random(min=1, max=10), stop=stop_after_attempt(3))
|
|
async def probe_query(
|
|
*,
|
|
session: aiohttp.ClientSession,
|
|
channel: ListingType,
|
|
min_bedrooms: int,
|
|
max_bedrooms: int,
|
|
radius: float,
|
|
min_price: int,
|
|
max_price: int,
|
|
district: str,
|
|
max_days_since_added: int = 30,
|
|
furnish_types: list[FurnishType] = [],
|
|
) -> dict[str, Any]:
|
|
"""Probe the API to get result count without fetching full results.
|
|
|
|
Makes a minimal request (page_size=1) to efficiently get totalAvailableResults.
|
|
|
|
Args:
|
|
session: aiohttp session for making requests.
|
|
channel: Listing type (BUY or RENT).
|
|
min_bedrooms: Minimum number of bedrooms.
|
|
max_bedrooms: Maximum number of bedrooms.
|
|
radius: Search radius.
|
|
min_price: Minimum price.
|
|
max_price: Maximum price.
|
|
district: District identifier string.
|
|
max_days_since_added: Maximum days since listing was added (BUY only).
|
|
furnish_types: List of furnish types to filter (RENT only).
|
|
|
|
Returns:
|
|
API response containing totalAvailableResults.
|
|
"""
|
|
params: dict[str, str] = {
|
|
"locationIdentifier": districts.get_districts()[district],
|
|
"channel": str(channel).upper(),
|
|
"page": "1",
|
|
"numberOfPropertiesPerPage": "1", # Minimal page size for probing
|
|
"radius": str(radius),
|
|
"sortBy": "distance",
|
|
"includeUnavailableProperties": "false",
|
|
"minPrice": str(min_price),
|
|
"maxPrice": str(max_price),
|
|
"minBedrooms": str(min_bedrooms),
|
|
"maxBedrooms": str(max_bedrooms),
|
|
"apiApplication": "ANDROID",
|
|
"appVersion": "4.28.0",
|
|
}
|
|
|
|
if channel is ListingType.BUY:
|
|
params["dontShow"] = "sharedOwnership,retirement"
|
|
if max_days_since_added is not None and max_days_since_added in [
|
|
1,
|
|
3,
|
|
7,
|
|
14,
|
|
]:
|
|
params["maxDaysSinceAdded"] = str(max_days_since_added)
|
|
|
|
if channel is ListingType.RENT:
|
|
if furnish_types:
|
|
params["furnishTypes"] = ",".join(furnish_types)
|
|
|
|
request_headers = {
|
|
"Host": "api.rightmove.co.uk",
|
|
"Accept-Encoding": "gzip, deflate, br",
|
|
"User-Agent": "okhttp/4.12.0",
|
|
"Connection": "keep-alive",
|
|
}
|
|
|
|
async with session.get(
|
|
"https://api.rightmove.co.uk/api/property-listing",
|
|
params=params,
|
|
headers=request_headers,
|
|
) as response:
|
|
if response.status != 200:
|
|
raise Exception(f"Probe failed: {await response.text()}")
|
|
return await response.json()
|