add cli param for querying properties to rent
example: python main.py --data-dir data/rs2 dump-listings --max-price 3500 --min-bedrooms 2 --max-bedrooms 4 --district islington -t rent
This commit is contained in:
parent
bb9afc76fe
commit
df24c2c1b7
3 changed files with 44 additions and 18 deletions
|
|
@ -1,12 +1,13 @@
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
import pathlib
|
import pathlib
|
||||||
from rec.query import listing_query
|
from rec.query import ListingType, listing_query
|
||||||
from rec.districts import get_districts
|
from rec.districts import get_districts
|
||||||
from data_access import Listing
|
from data_access import Listing
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
@dataclass(frozen=True)
|
||||||
class QueryParameters:
|
class QueryParameters:
|
||||||
|
listing_type: ListingType
|
||||||
min_bedrooms: int
|
min_bedrooms: int
|
||||||
max_bedrooms: int
|
max_bedrooms: int
|
||||||
min_price: int
|
min_price: int
|
||||||
|
|
@ -32,8 +33,9 @@ def dump_listings(
|
||||||
print("#### District:", district)
|
print("#### District:", district)
|
||||||
for i in [1, 2]:
|
for i in [1, 2]:
|
||||||
try:
|
try:
|
||||||
d = listing_query(
|
response_json = listing_query(
|
||||||
page=i,
|
page=i,
|
||||||
|
channel=parameters.listing_type,
|
||||||
min_bedrooms=parameters.min_bedrooms,
|
min_bedrooms=parameters.min_bedrooms,
|
||||||
max_bedrooms=parameters.max_bedrooms,
|
max_bedrooms=parameters.max_bedrooms,
|
||||||
radius=parameters.radius,
|
radius=parameters.radius,
|
||||||
|
|
@ -47,12 +49,12 @@ def dump_listings(
|
||||||
print(e)
|
print(e)
|
||||||
break
|
break
|
||||||
if i == 1:
|
if i == 1:
|
||||||
print("totalAvailableResults: ", d["totalAvailableResults"])
|
print("totalAvailableResults: ", response_json["totalAvailableResults"])
|
||||||
if len(d["properties"]) == 0:
|
if len(response_json["properties"]) == 0:
|
||||||
break
|
break
|
||||||
print(f"page {i}", end=", ", flush=True)
|
print(f"page {i}", end=", ", flush=True)
|
||||||
|
|
||||||
for property in d["properties"]:
|
for property in response_json["properties"]:
|
||||||
identifier = property["identifier"]
|
identifier = property["identifier"]
|
||||||
|
|
||||||
listing = Listing(identifier, data_dir=data_dir)
|
listing = Listing(identifier, data_dir=data_dir)
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,7 @@ import importlib
|
||||||
from rec.districts import get_districts
|
from rec.districts import get_districts
|
||||||
from data_access import Listing
|
from data_access import Listing
|
||||||
import csv_exporter
|
import csv_exporter
|
||||||
|
from rec.query import ListingType
|
||||||
|
|
||||||
dump_listings_module = importlib.import_module('1_dump_listings')
|
dump_listings_module = importlib.import_module('1_dump_listings')
|
||||||
dump_detail_module = importlib.import_module('2_dump_detail')
|
dump_detail_module = importlib.import_module('2_dump_detail')
|
||||||
|
|
@ -41,6 +42,16 @@ def cli(ctx, data_dir: str):
|
||||||
|
|
||||||
|
|
||||||
@cli.command()
|
@cli.command()
|
||||||
|
@click.option(
|
||||||
|
'--type',
|
||||||
|
'-t',
|
||||||
|
help='Type of listing to scrape',
|
||||||
|
type=click.Choice(
|
||||||
|
ListingType.__members__.keys(),
|
||||||
|
case_sensitive=False,
|
||||||
|
),
|
||||||
|
required=True,
|
||||||
|
)
|
||||||
@click.option(
|
@click.option(
|
||||||
'--min-bedrooms',
|
'--min-bedrooms',
|
||||||
default=1,
|
default=1,
|
||||||
|
|
@ -80,9 +91,11 @@ def dump_listings(
|
||||||
max_bedrooms: int,
|
max_bedrooms: int,
|
||||||
min_price: int,
|
min_price: int,
|
||||||
max_price: int,
|
max_price: int,
|
||||||
|
type: str,
|
||||||
):
|
):
|
||||||
data_dir: str = ctx.obj['data_dir']
|
data_dir: str = ctx.obj['data_dir']
|
||||||
query_parameters = dump_listings_module.QueryParameters(
|
query_parameters = dump_listings_module.QueryParameters(
|
||||||
|
listing_type=ListingType[type],
|
||||||
district_names=set(district),
|
district_names=set(district),
|
||||||
min_bedrooms=min_bedrooms,
|
min_bedrooms=min_bedrooms,
|
||||||
max_bedrooms=max_bedrooms,
|
max_bedrooms=max_bedrooms,
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,11 @@ import urllib3
|
||||||
|
|
||||||
urllib3.disable_warnings()
|
urllib3.disable_warnings()
|
||||||
|
|
||||||
# cache = Cache(r"_cache")
|
|
||||||
|
class ListingType(enum.StrEnum):
|
||||||
|
BUY = "BUY"
|
||||||
|
RENT = "RENT"
|
||||||
|
|
||||||
|
|
||||||
headers = {
|
headers = {
|
||||||
"Host": "api.rightmove.co.uk",
|
"Host": "api.rightmove.co.uk",
|
||||||
|
|
@ -42,9 +46,9 @@ def detail_query(detail_id: int):
|
||||||
return response.json()
|
return response.json()
|
||||||
|
|
||||||
|
|
||||||
# @cache.memoize()
|
|
||||||
def listing_query(
|
def listing_query(
|
||||||
page: int,
|
page: int,
|
||||||
|
channel: ListingType,
|
||||||
min_bedrooms: int,
|
min_bedrooms: int,
|
||||||
max_bedrooms: int,
|
max_bedrooms: int,
|
||||||
radius: float,
|
radius: float,
|
||||||
|
|
@ -58,29 +62,38 @@ def listing_query(
|
||||||
) -> dict:
|
) -> dict:
|
||||||
params = {
|
params = {
|
||||||
"locationIdentifier": location_id,
|
"locationIdentifier": location_id,
|
||||||
"channel": "BUY",
|
"channel": channel.upper(),
|
||||||
"page": str(page),
|
"page": str(page),
|
||||||
"numberOfPropertiesPerPage": str(page_size),
|
"numberOfPropertiesPerPage": str(page_size),
|
||||||
"radius": str(radius),
|
"radius": str(radius),
|
||||||
"sortBy": "distance",
|
"sortBy": "distance",
|
||||||
"includeUnavailableProperties": "false",
|
"includeUnavailableProperties": "false",
|
||||||
"dontShow": "sharedOwnership,retirement",
|
|
||||||
"minPrice": str(min_price),
|
"minPrice": str(min_price),
|
||||||
"maxPrice": str(max_price),
|
"maxPrice": str(max_price),
|
||||||
"minBedrooms": str(min_bedrooms),
|
"minBedrooms": str(min_bedrooms),
|
||||||
"maxBedrooms": str(max_bedrooms),
|
"maxBedrooms": str(max_bedrooms),
|
||||||
"apiApplication": "ANDROID",
|
"apiApplication": "ANDROID",
|
||||||
"appVersion": "3.70.0",
|
"appVersion": "4.28.0",
|
||||||
}
|
}
|
||||||
if len(property_type) > 0:
|
if channel is ListingType.BUY:
|
||||||
params["propertyTypes"] = ",".join(property_type)
|
params["dontShow"] = "sharedOwnership,retirement",
|
||||||
if max_days_since_added:
|
if len(property_type) > 0:
|
||||||
if max_days_since_added not in [1, 3, 7, 14]:
|
params["propertyTypes"] = ",".join(property_type)
|
||||||
|
if max_days_since_added is not None and max_days_since_added not in [
|
||||||
|
1, 3, 7, 14
|
||||||
|
]:
|
||||||
raise Exception("Invalid max days. Can only be", [1, 3, 7, 14])
|
raise Exception("Invalid max days. Can only be", [1, 3, 7, 14])
|
||||||
params["maxDaysSinceAdded"] = max_days_since_added
|
params["maxDaysSinceAdded"] = max_days_since_added
|
||||||
|
|
||||||
if mustNewHome:
|
if mustNewHome:
|
||||||
params["mustHave"] = "newHome"
|
params["mustHave"] = "newHome"
|
||||||
|
|
||||||
|
headers = {
|
||||||
|
"Host": "api.rightmove.co.uk",
|
||||||
|
"Accept-Encoding": "gzip, deflate, br",
|
||||||
|
"User-Agent": "okhttp/4.12.0",
|
||||||
|
"Connection": "keep-alive"
|
||||||
|
}
|
||||||
response = requests.get(
|
response = requests.get(
|
||||||
"https://api.rightmove.co.uk/api/property-listing",
|
"https://api.rightmove.co.uk/api/property-listing",
|
||||||
params=params,
|
params=params,
|
||||||
|
|
@ -91,5 +104,3 @@ def listing_query(
|
||||||
raise Exception("Failed due to: ", response.text)
|
raise Exception("Failed due to: ", response.text)
|
||||||
|
|
||||||
return response.json()
|
return response.json()
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue