add cli param for querying properties to rent

example:
python main.py --data-dir data/rs2 dump-listings --max-price 3500 --min-bedrooms 2 --max-bedrooms 4 --district islington -t rent
This commit is contained in:
Viktor Barzin 2025-05-17 21:22:39 +00:00
parent bb9afc76fe
commit df24c2c1b7
No known key found for this signature in database
GPG key ID: 4056458DBDBF8863
3 changed files with 44 additions and 18 deletions

View file

@ -1,12 +1,13 @@
from dataclasses import dataclass
import pathlib
from rec.query import listing_query
from rec.query import ListingType, listing_query
from rec.districts import get_districts
from data_access import Listing
@dataclass(frozen=True)
class QueryParameters:
listing_type: ListingType
min_bedrooms: int
max_bedrooms: int
min_price: int
@ -32,8 +33,9 @@ def dump_listings(
print("#### District:", district)
for i in [1, 2]:
try:
d = listing_query(
response_json = listing_query(
page=i,
channel=parameters.listing_type,
min_bedrooms=parameters.min_bedrooms,
max_bedrooms=parameters.max_bedrooms,
radius=parameters.radius,
@ -47,12 +49,12 @@ def dump_listings(
print(e)
break
if i == 1:
print("totalAvailableResults: ", d["totalAvailableResults"])
if len(d["properties"]) == 0:
print("totalAvailableResults: ", response_json["totalAvailableResults"])
if len(response_json["properties"]) == 0:
break
print(f"page {i}", end=", ", flush=True)
for property in d["properties"]:
for property in response_json["properties"]:
identifier = property["identifier"]
listing = Listing(identifier, data_dir=data_dir)

View file

@ -5,6 +5,7 @@ import importlib
from rec.districts import get_districts
from data_access import Listing
import csv_exporter
from rec.query import ListingType
dump_listings_module = importlib.import_module('1_dump_listings')
dump_detail_module = importlib.import_module('2_dump_detail')
@ -41,6 +42,16 @@ def cli(ctx, data_dir: str):
@cli.command()
@click.option(
'--type',
'-t',
help='Type of listing to scrape',
type=click.Choice(
ListingType.__members__.keys(),
case_sensitive=False,
),
required=True,
)
@click.option(
'--min-bedrooms',
default=1,
@ -80,9 +91,11 @@ def dump_listings(
max_bedrooms: int,
min_price: int,
max_price: int,
type: str,
):
data_dir: str = ctx.obj['data_dir']
query_parameters = dump_listings_module.QueryParameters(
listing_type=ListingType[type],
district_names=set(district),
min_bedrooms=min_bedrooms,
max_bedrooms=max_bedrooms,

View file

@ -6,7 +6,11 @@ import urllib3
urllib3.disable_warnings()
# cache = Cache(r"_cache")
class ListingType(enum.StrEnum):
BUY = "BUY"
RENT = "RENT"
headers = {
"Host": "api.rightmove.co.uk",
@ -42,9 +46,9 @@ def detail_query(detail_id: int):
return response.json()
# @cache.memoize()
def listing_query(
page: int,
channel: ListingType,
min_bedrooms: int,
max_bedrooms: int,
radius: float,
@ -58,29 +62,38 @@ def listing_query(
) -> dict:
params = {
"locationIdentifier": location_id,
"channel": "BUY",
"channel": channel.upper(),
"page": str(page),
"numberOfPropertiesPerPage": str(page_size),
"radius": str(radius),
"sortBy": "distance",
"includeUnavailableProperties": "false",
"dontShow": "sharedOwnership,retirement",
"minPrice": str(min_price),
"maxPrice": str(max_price),
"minBedrooms": str(min_bedrooms),
"maxBedrooms": str(max_bedrooms),
"apiApplication": "ANDROID",
"appVersion": "3.70.0",
"appVersion": "4.28.0",
}
if len(property_type) > 0:
params["propertyTypes"] = ",".join(property_type)
if max_days_since_added:
if max_days_since_added not in [1, 3, 7, 14]:
if channel is ListingType.BUY:
params["dontShow"] = "sharedOwnership,retirement",
if len(property_type) > 0:
params["propertyTypes"] = ",".join(property_type)
if max_days_since_added is not None and max_days_since_added not in [
1, 3, 7, 14
]:
raise Exception("Invalid max days. Can only be", [1, 3, 7, 14])
params["maxDaysSinceAdded"] = max_days_since_added
if mustNewHome:
params["mustHave"] = "newHome"
if mustNewHome:
params["mustHave"] = "newHome"
headers = {
"Host": "api.rightmove.co.uk",
"Accept-Encoding": "gzip, deflate, br",
"User-Agent": "okhttp/4.12.0",
"Connection": "keep-alive"
}
response = requests.get(
"https://api.rightmove.co.uk/api/property-listing",
params=params,
@ -91,5 +104,3 @@ def listing_query(
raise Exception("Failed due to: ", response.text)
return response.json()