wrongmove/crawler/rec/query.py
Kadir 508aa02812 Real crawling scripts and floorplan detection
1. get all listings
2. get all detail jsons
3. get all images
4. get all floorplans
5. detecting floorplans

Also updating dependencies for huggingface etc.
2024-03-10 18:49:39 +00:00

76 lines
2.1 KiB
Python

# from diskcache import Cache
import requests
# from rec.db import RightmoveListing
import urllib3
urllib3.disable_warnings()
# cache = Cache(r"_cache")
headers = {
"Host": "api.rightmove.co.uk",
# 'Accept-Encoding': 'gzip, deflate, br',
"User-Agent": "okhttp/4.10.0",
"Connection": "close",
}
def detail_query(detail_id: int):
params = {
'apiApplication': 'ANDROID',
'appVersion': '3.70.0',
}
url = f'https://api.rightmove.co.uk/api/property/{detail_id}'
response = requests.get(url, params=params, headers=headers, verify=False)
if response.status_code != 200:
raise Exception("Failed due to: ", response.text)
return response.json()
# @cache.memoize()
def listing_query(page: int, min_bedrooms: int, max_bedrooms: int, radius: float, min_price: int, max_price: int) -> dict:
params = {
"locationIdentifier": "POSTCODE^4228216",
"channel": "BUY",
"page": str(page),
"numberOfPropertiesPerPage": "25",
"radius": str(radius),
"sortBy": "distance",
"includeUnavailableProperties": "false",
"propertyTypes": "flat",
"dontShow": "sharedOwnership,retirement",
"minPrice": str(min_price),
"maxPrice": str(max_price),
"minBedrooms": str(min_bedrooms),
"maxBedrooms": str(max_bedrooms),
"apiApplication": "ANDROID",
"appVersion": "3.70.0",
}
response = requests.get(
"https://api.rightmove.co.uk/api/property-listing",
params=params,
headers=headers,
verify=False,
)
if response.status_code != 200:
raise Exception("Failed due to: ", response.text)
return response.json()
if __name__ == "__main__":
response = listing_query(page=1, min_bedrooms=2, max_bedrooms=2, radius=5.0, min_price=150000, max_price=700000)
resp = response
for d in resp["properties"]:
rl = RightmoveListing(
id=d["identifier"],
listing_json=d,
price=d["price"],
updated_timestamp=d["updateDate"],
lat=d["latitude"],
lon=d["longitude"],
)
rl.save()