ruff format

This commit is contained in:
Kadir 2024-03-25 20:48:48 +00:00
parent 37e3e8ad6f
commit d777558b34
17 changed files with 411 additions and 368 deletions

View file

@ -13,9 +13,9 @@ for i in range(1, 10000):
except: except:
break break
for property in d['properties']: for property in d["properties"]:
identifier = property['identifier'] identifier = property["identifier"]
listing = Listing(identifier) listing = Listing(identifier)
with open(listing.path_listing_json(), 'w') as f: with open(listing.path_listing_json(), "w") as f:
json.dump(property, f) json.dump(property, f)

View file

@ -10,8 +10,8 @@ for listing in tqdm(Listing.get_all_listings()):
try: try:
d = detail_query(listing.identifier) d = detail_query(listing.identifier)
with open(listing.path_detail_json(), 'w') as f: with open(listing.path_detail_json(), "w") as f:
json.dump(d, f) json.dump(d, f)
except: except:
print('Failed at: ', listing.identifier) print("Failed at: ", listing.identifier)
raise raise

View file

@ -7,25 +7,22 @@ for listing in tqdm(Listing.get_all_listings()):
with open(listing.path_detail_json()) as f: with open(listing.path_detail_json()) as f:
detail = json.load(f) detail = json.load(f)
for photo in detail["property"]["photos"]:
for photo in detail['property']['photos']: url = photo["maxSizeUrl"]
url = photo['maxSizeUrl'] picname = url.split("/")[-1]
picname = url.split('/')[-1] order = photo["order"]
order = photo['order']
p = listing.path_pic_file(order, picname) p = listing.path_pic_file(order, picname)
if p.exists(): if p.exists():
continue continue
tqdm.write(str(p)) tqdm.write(str(p))
urlretrieve(url, p) urlretrieve(url, p)
for photo in detail['property']['floorplans']: for photo in detail["property"]["floorplans"]:
url = photo['url'] url = photo["url"]
picname = url.split('/')[-1] picname = url.split("/")[-1]
order = photo['order'] order = photo["order"]
p = listing.path_floorplan_file(order, picname) p = listing.path_floorplan_file(order, picname)
if p.exists(): if p.exists():
continue continue
tqdm.write(str(p)) tqdm.write(str(p))
urlretrieve(url, p) urlretrieve(url, p)

View file

@ -8,6 +8,6 @@ for listing in tqdm(listings):
lat, long = BROCK_STREET_LAT_LONG lat, long = BROCK_STREET_LAT_LONG
listing.calculate_route(lat, long, recalculate=False) listing.calculate_route(lat, long, recalculate=False)
traveltime = listing.travel_time[0] traveltime = listing.travel_time[0]
duration_minutes = traveltime['duration'] / 60. duration_minutes = traveltime["duration"] / 60.0
tqdm.write(f"{listing.identifier} {duration_minutes}") tqdm.write(f"{listing.identifier} {duration_minutes}")

View file

@ -9,7 +9,7 @@ for listing in tqdm(list(Listing.get_all_listings())):
floorplans = json.load(f) floorplans = json.load(f)
for floorplan in floorplans: for floorplan in floorplans:
floorplan['estimated_sqm'] = extract_total_sqm(floorplan['text']) floorplan["estimated_sqm"] = extract_total_sqm(floorplan["text"])
with open(listing.path_floorplan_ocr_json(), 'w') as f: with open(listing.path_floorplan_ocr_json(), "w") as f:
floorplans = json.dump(floorplans, f) floorplans = json.dump(floorplans, f)

View file

@ -5,21 +5,22 @@ from typing import List, Dict
from rec import floorplan, routing from rec import floorplan, routing
import re import re
_DATA_DIR = pathlib.Path('data/rs/') _DATA_DIR = pathlib.Path("data/rs/")
@dataclass() @dataclass()
class Listing(): class Listing:
identifier: int identifier: int
_cached: Dict = None _cached: Dict = None
@staticmethod @staticmethod
def get_all_listings() -> List['Listing']: def get_all_listings() -> List["Listing"]:
listing_paths = sorted(list(_DATA_DIR.glob('*/listing.json'))) listing_paths = sorted(list(_DATA_DIR.glob("*/listing.json")))
identifiers = [] identifiers = []
for listing_path in listing_paths: for listing_path in listing_paths:
with open(listing_path) as f: with open(listing_path) as f:
d = json.load(f) d = json.load(f)
identifiers.append(Listing(d['identifier'])) identifiers.append(Listing(d["identifier"]))
return identifiers return identifiers
@ -29,51 +30,57 @@ class Listing():
return p return p
def path_listing_json(self) -> pathlib.Path: def path_listing_json(self) -> pathlib.Path:
return self.path_listing() / 'listing.json' return self.path_listing() / "listing.json"
def path_detail_json(self) -> pathlib.Path: def path_detail_json(self) -> pathlib.Path:
return self.path_listing() / 'detail.json' return self.path_listing() / "detail.json"
def path_routing_json(self) -> pathlib.Path: def path_routing_json(self) -> pathlib.Path:
return self.path_listing() / 'routing.json' return self.path_listing() / "routing.json"
def path_floorplan_model_json(self) -> pathlib.Path: def path_floorplan_model_json(self) -> pathlib.Path:
return self.path_listing() / 'floorplan_model.json' return self.path_listing() / "floorplan_model.json"
def path_floorplan_ocr_json(self) -> pathlib.Path: def path_floorplan_ocr_json(self) -> pathlib.Path:
return self.path_listing() / 'floorplan_ocr.json' return self.path_listing() / "floorplan_ocr.json"
def path_pic_folder(self) -> pathlib.Path: def path_pic_folder(self) -> pathlib.Path:
return self.path_listing() / 'pics' return self.path_listing() / "pics"
def path_pic_file(self, order, name) -> pathlib.Path: def path_pic_file(self, order, name) -> pathlib.Path:
self.path_pic_folder().mkdir(parents=True, exist_ok=True) self.path_pic_folder().mkdir(parents=True, exist_ok=True)
return self.path_pic_folder() / f'{order}_{name}' return self.path_pic_folder() / f"{order}_{name}"
def path_floorplan_folder(self) -> pathlib.Path: def path_floorplan_folder(self) -> pathlib.Path:
return self.path_listing() / 'floorplans' return self.path_listing() / "floorplans"
def path_floorplan_file(self, order, name) -> pathlib.Path: def path_floorplan_file(self, order, name) -> pathlib.Path:
self.path_floorplan_folder().mkdir(parents=True, exist_ok=True) self.path_floorplan_folder().mkdir(parents=True, exist_ok=True)
return self.path_floorplan_folder() / f'{order}_{name}' return self.path_floorplan_folder() / f"{order}_{name}"
def list_floorplans(self): def list_floorplans(self):
images = list(self.path_floorplan_folder().glob('*')) images = list(self.path_floorplan_folder().glob("*"))
# todo add check if return is image # todo add check if return is image
return images return images
def calculate_sqm_model(self): def calculate_sqm_model(self):
objs = [] objs = []
for floorplan_path in self.list_floorplans(): for floorplan_path in self.list_floorplans():
estimated_sqm, model_output, predictions = floorplan.calculate_model(floorplan_path) estimated_sqm, model_output, predictions = floorplan.calculate_model(
objs.append({ floorplan_path
'floorplan_path': str(floorplan_path), )
'estimated_sqm': estimated_sqm, objs.append(
'model_output': model_output, {
'no_predictions': len(predictions) # cant serialize the predictions itself since its a tensor "floorplan_path": str(floorplan_path),
}) "estimated_sqm": estimated_sqm,
"model_output": model_output,
"no_predictions": len(
predictions
), # cant serialize the predictions itself since its a tensor
}
)
with open(self.path_floorplan_model_json(), 'w') as f: with open(self.path_floorplan_model_json(), "w") as f:
json.dump(objs, f) json.dump(objs, f)
@property @property
@ -84,7 +91,9 @@ class Listing():
with open(self.path_floorplan_json()) as f: with open(self.path_floorplan_json()) as f:
objs = json.load(f) objs = json.load(f)
max_sqm = max([o['estimated_sqm'] for o in objs if o is None]) # filter out Nones max_sqm = max(
[o["estimated_sqm"] for o in objs if o is None]
) # filter out Nones
return max_sqm return max_sqm
def calculate_sqm_ocr(self, recalculate=True): def calculate_sqm_ocr(self, recalculate=True):
@ -94,13 +103,15 @@ class Listing():
objs = [] objs = []
for floorplan_path in self.list_floorplans(): for floorplan_path in self.list_floorplans():
estimated_sqm, model_output = floorplan.calculate_ocr(floorplan_path) estimated_sqm, model_output = floorplan.calculate_ocr(floorplan_path)
objs.append({ objs.append(
'floorplan_path': str(floorplan_path), {
'estimated_sqm': estimated_sqm, "floorplan_path": str(floorplan_path),
'text': model_output, "estimated_sqm": estimated_sqm,
}) "text": model_output,
}
)
with open(self.path_floorplan_ocr_json(), 'w') as f: with open(self.path_floorplan_ocr_json(), "w") as f:
json.dump(objs, f) json.dump(objs, f)
@property @property
@ -111,8 +122,7 @@ class Listing():
with open(self.path_floorplan_ocr_json()) as f: with open(self.path_floorplan_ocr_json()) as f:
objs = json.load(f) objs = json.load(f)
sqms = [o["estimated_sqm"] for o in objs if o["estimated_sqm"] is not None]
sqms = [o['estimated_sqm'] for o in objs if o['estimated_sqm'] is not None]
if len(sqms) == 0: if len(sqms) == 0:
return None return None
max_sqm = max(sqms) max_sqm = max(sqms)
@ -122,8 +132,10 @@ class Listing():
if self.path_routing_json().exists() and not recalculate: if self.path_routing_json().exists() and not recalculate:
return return
result = routing.transit_route(self.latitude, self.longitude, dest_lat, dest_lon) result = routing.transit_route(
with open(self.path_routing_json(), 'w') as f: self.latitude, self.longitude, dest_lat, dest_lon
)
with open(self.path_routing_json(), "w") as f:
json.dump(result, f) json.dump(result, f)
@property @property
@ -135,10 +147,9 @@ class Listing():
return routing.extract_time(d) return routing.extract_time(d)
@property @property
def url(self): def url(self):
return f'https://www.rightmove.co.uk/properties/{self.identifier}' return f"https://www.rightmove.co.uk/properties/{self.identifier}"
@property @property
def detailobject(self): def detailobject(self):
@ -149,7 +160,7 @@ class Listing():
@property @property
def price(self) -> float: def price(self) -> float:
return self.detailobject['property']['price'] return self.detailobject["property"]["price"]
@property @property
def price_per_sqm(self) -> float: def price_per_sqm(self) -> float:
@ -159,22 +170,22 @@ class Listing():
@property @property
def bedrooms(self) -> int: def bedrooms(self) -> int:
return self.detailobject['property']['bedrooms'] return self.detailobject["property"]["bedrooms"]
@property @property
def latitude(self) -> float: def latitude(self) -> float:
return self.detailobject['property']['latitude'] return self.detailobject["property"]["latitude"]
@property @property
def longitude(self) -> float: def longitude(self) -> float:
return self.detailobject['property']['longitude'] return self.detailobject["property"]["longitude"]
@property @property
def leaseLeft(self) -> int: def leaseLeft(self) -> int:
ds = self.detailobject['property'].get('tenureInfo', {}).get('content', []) ds = self.detailobject["property"].get("tenureInfo", {}).get("content", [])
for d in ds: for d in ds:
if d['type'] == 'lengthOfLease': if d["type"] == "lengthOfLease":
matches = re.findall(r'(\d+\.?\d*)', d['value']) matches = re.findall(r"(\d+\.?\d*)", d["value"])
if len(matches): if len(matches):
return float(matches[0]) return float(matches[0])
return None return None
@ -182,25 +193,25 @@ class Listing():
@property @property
def development(self) -> bool: def development(self) -> bool:
# aka new home # aka new home
return self.detailobject['property']['development'] return self.detailobject["property"]["development"]
def dict_nicely(self): def dict_nicely(self):
return { return {
'identifier': self.identifier, "identifier": self.identifier,
'sqm_ocr': self.sqm_ocr, "sqm_ocr": self.sqm_ocr,
'price': self.price, "price": self.price,
'price_per_sqm': self.price_per_sqm, "price_per_sqm": self.price_per_sqm,
'url': self.url, "url": self.url,
'bedrooms': self.bedrooms, "bedrooms": self.bedrooms,
'travel_time_fastest': self.travel_time[0], "travel_time_fastest": self.travel_time[0],
'travel_time_second': None if len(self.travel_time) < 2 else self.travel_time[1], "travel_time_second": None
'lease_left': self.leaseLeft, if len(self.travel_time) < 2
'development': self.development, else self.travel_time[1],
"lease_left": self.leaseLeft,
"development": self.development,
} }
if __name__ == "__main__":
if __name__ == '__main__':
listings = Listing.get_all_listings() listings = Listing.get_all_listings()
print(listings[0].list_floorplans()) print(listings[0].list_floorplans())

View file

@ -4,29 +4,37 @@ def record():
page = 1 page = 1
listing = listing_query(page, 2, 2, 5, 200000, 500000) listing = listing_query(page, 2, 2, 5, 200000, 500000)
with open(f'/Users/kadir/code/realestate/crawler/code/json/queries/listing{page}.json', 'w') as f: with open(
f"/Users/kadir/code/realestate/crawler/code/json/queries/listing{page}.json",
"w",
) as f:
json.dump(listing, f) json.dump(listing, f)
for prop in listing['properties']: for prop in listing["properties"]:
identifier = prop['identifier'] identifier = prop["identifier"]
resp = detail_query(identifier) resp = detail_query(identifier)
# print(identifier, resp.status_code) # print(identifier, resp.status_code)
with open(f'/Users/kadir/code/realestate/crawler/code/json/queries/detail_{identifier}.json', 'w') as f: with open(
f"/Users/kadir/code/realestate/crawler/code/json/queries/detail_{identifier}.json",
"w",
) as f:
json.dump(resp, f) json.dump(resp, f)
def process(): def process():
import json import json
import pathlib import pathlib
path = pathlib.Path('/Users/kadir/code/realestate/crawler/code/json/queries/')
detailjsons = list(path.glob('detail_*json')) path = pathlib.Path("/Users/kadir/code/realestate/crawler/code/json/queries/")
detailjsons = list(path.glob("detail_*json"))
for file in detailjsons: for file in detailjsons:
with open(file) as f: with open(file) as f:
js = json.load(f) js = json.load(f)
for floorplan in js['property']['floorplans']: for floorplan in js["property"]["floorplans"]:
print(floorplan['url']) print(floorplan["url"])
# record() # record()
process() process()

View file

@ -1,13 +1,13 @@
import requests import requests
headers = { headers = {
'Host': 'media.rightmove.co.uk', "Host": "media.rightmove.co.uk",
# 'Accept-Encoding': 'gzip, deflate, br', # 'Accept-Encoding': 'gzip, deflate, br',
'User-Agent': 'okhttp/4.10.0', "User-Agent": "okhttp/4.10.0",
} }
response = requests.get( response = requests.get(
'https://media.rightmove.co.uk/47k/46001/138680705/46001_32532509_IMG_00_0000.jpeg', "https://media.rightmove.co.uk/47k/46001/138680705/46001_32532509_IMG_00_0000.jpeg",
headers=headers, headers=headers,
verify=False, verify=False,
) )

View file

@ -1,58 +1,68 @@
import requests import requests
headers = { headers = {
'Host': 'api.rightmove.co.uk', "Host": "api.rightmove.co.uk",
# 'Accept-Encoding': 'gzip, deflate, br', # 'Accept-Encoding': 'gzip, deflate, br',
'User-Agent': 'okhttp/4.10.0', "User-Agent": "okhttp/4.10.0",
'Connection': 'close', "Connection": "close",
} }
params = { params = {
'locationIdentifier': 'POSTCODE^4228216', "locationIdentifier": "POSTCODE^4228216",
'channel': 'BUY', "channel": "BUY",
'page': '1', "page": "1",
'numberOfPropertiesPerPage': '25', "numberOfPropertiesPerPage": "25",
'radius': '3.0', "radius": "3.0",
'sortBy': 'distance', "sortBy": "distance",
'includeUnavailableProperties': 'false', "includeUnavailableProperties": "false",
'propertyTypes': 'flat', "propertyTypes": "flat",
'mustHave': 'newHome', # added manually later "mustHave": "newHome", # added manually later
'dontShow': 'sharedOwnership,retirement', "dontShow": "sharedOwnership,retirement",
'minPrice': '150000', "minPrice": "150000",
'maxPrice': '500000', "maxPrice": "500000",
'minBedrooms': '2', "minBedrooms": "2",
'maxBedrooms': '2', "maxBedrooms": "2",
'apiApplication': 'ANDROID', "apiApplication": "ANDROID",
'appVersion': '3.70.0', "appVersion": "3.70.0",
} }
response = requests.get('https://api.rightmove.co.uk/api/property-listing', params=params, headers=headers, verify=False) response = requests.get(
"https://api.rightmove.co.uk/api/property-listing",
params=params,
headers=headers,
verify=False,
)
import requests import requests
headers = { headers = {
'Host': 'api.rightmove.co.uk', "Host": "api.rightmove.co.uk",
# 'Accept-Encoding': 'gzip, deflate, br', # 'Accept-Encoding': 'gzip, deflate, br',
'User-Agent': 'okhttp/4.10.0', "User-Agent": "okhttp/4.10.0",
'Connection': 'close', "Connection": "close",
} }
params = { params = {
'locationIdentifier': 'POSTCODE^4228216', "locationIdentifier": "POSTCODE^4228216",
'channel': 'BUY', "channel": "BUY",
'page': '2', "page": "2",
'numberOfPropertiesPerPage': '25', "numberOfPropertiesPerPage": "25",
'radius': '3.0', "radius": "3.0",
'sortBy': 'distance', "sortBy": "distance",
'includeUnavailableProperties': 'false', "includeUnavailableProperties": "false",
'propertyTypes': 'flat', "propertyTypes": "flat",
'dontShow': 'sharedOwnership,retirement', "dontShow": "sharedOwnership,retirement",
'minPrice': '150000', "minPrice": "150000",
'maxPrice': '600000', "maxPrice": "600000",
'minBedrooms': '2', "minBedrooms": "2",
'maxBedrooms': '2', "maxBedrooms": "2",
'apiApplication': 'ANDROID', "apiApplication": "ANDROID",
'appVersion': '3.70.0', "appVersion": "3.70.0",
} }
response = requests.get('https://api.rightmove.co.uk/api/property-listing', params=params, headers=headers, verify=False) response = requests.get(
"https://api.rightmove.co.uk/api/property-listing",
params=params,
headers=headers,
verify=False,
)

View file

@ -1,16 +1,16 @@
import requests import requests
API_KEY = 'AIzaSyBoBHzeQFgR7O-NlNsuHXQcC1B7ccEHpl8' API_KEY = "AIzaSyBoBHzeQFgR7O-NlNsuHXQcC1B7ccEHpl8"
url = "https://maps.googleapis.com/maps/api/distancematrix/json" url = "https://maps.googleapis.com/maps/api/distancematrix/json"
origin = '51.5636306598907,-0.11061106079085892' origin = "51.5636306598907,-0.11061106079085892"
dest = "51.53836609846008,-0.12743940233824352" dest = "51.53836609846008,-0.12743940233824352"
params = { params = {
"origins": origin, "origins": origin,
"destinations": dest, "destinations": dest,
"key": API_KEY, "key": API_KEY,
"departure_time": "", # timstamp, optional "departure_time": "", # timstamp, optional
"mode": "transit", "mode": "transit",
} }
r = requests.get(url, params=params) r = requests.get(url, params=params)
@ -18,6 +18,5 @@ print(r.status_code)
print(r.json()) print(r.json())
with open('code/json/routing_distancematrix.json', 'w') as f: with open("code/json/routing_distancematrix.json", "w") as f:
f.write(r.text) f.write(r.text)

View file

@ -2,82 +2,76 @@ import requests
from utils import nextMonday from utils import nextMonday
from collections import defaultdict from collections import defaultdict
API_KEY = 'AIzaSyBoBHzeQFgR7O-NlNsuHXQcC1B7ccEHpl8' API_KEY = "AIzaSyBoBHzeQFgR7O-NlNsuHXQcC1B7ccEHpl8"
url = "https://routes.googleapis.com/directions/v2:computeRoutes" url = "https://routes.googleapis.com/directions/v2:computeRoutes"
def travel_time(origin_lat:float, origin_lon:float, dest_lat:float, dest_lon:float): def travel_time(origin_lat: float, origin_lon: float, dest_lat: float, dest_lon: float):
monday9am = nextMonday() monday9am = nextMonday()
header = { header = {
"X-Goog-Api-Key": API_KEY, "X-Goog-Api-Key": API_KEY,
"Content-Type": "application/json", "Content-Type": "application/json",
"X-Goog-FieldMask": "routes.distanceMeters,routes.duration,routes.staticDuration,routes.legs.steps.distanceMeters,routes.legs.steps.staticDuration,routes.legs.steps.travelMode", "X-Goog-FieldMask": "routes.distanceMeters,routes.duration,routes.staticDuration,routes.legs.steps.distanceMeters,routes.legs.steps.staticDuration,routes.legs.steps.travelMode",
} }
body = { body = {
"origin":{ "origin": {
"location":{ "location": {"latLng": {"latitude": origin_lat, "longitude": origin_lon}}
"latLng":{ },
"latitude": origin_lat, "destination": {
"longitude": origin_lon "location": {"latLng": {"latitude": dest_lat, "longitude": dest_lon}}
} },
} "travelMode": "TRANSIT",
}, # "2023-10-15T15:01:23.045123456Z"
"destination":{ "departureTime": monday9am.strftime("%Y-%m-%dT%H:%M:%S.%fZ"),
"location":{ "computeAlternativeRoutes": False,
"latLng":{ # "routeModifiers": {
"latitude": dest_lat, # "avoidTolls": false,
"longitude": dest_lon # "avoidHighways": false,
} # "avoidFerries": false
} # },
}, "languageCode": "en-US",
"travelMode": "TRANSIT", "units": "METRIC",
# "2023-10-15T15:01:23.045123456Z" }
"departureTime": monday9am.strftime("%Y-%m-%dT%H:%M:%S.%fZ"),
"computeAlternativeRoutes": False,
# "routeModifiers": {
# "avoidTolls": false,
# "avoidHighways": false,
# "avoidFerries": false
# },
"languageCode": "en-US",
"units": "METRIC"
}
r = requests.post(url, json=body, headers=header) r = requests.post(url, json=body, headers=header)
if r.status_code == 200: if r.status_code == 200:
return r.json() return r.json()
raise Exception(r.json())
raise Exception(r.json())
def extract_time(d): def extract_time(d):
r = d['routes'][0] r = d["routes"][0]
print(r.keys()) print(r.keys())
distance = r['distanceMeters'] distance = r["distanceMeters"]
duration = r['duration'] duration = r["duration"]
duration_static = r['staticDuration'] duration_static = r["staticDuration"]
steps = r['legs'][0]['steps'] steps = r["legs"][0]["steps"]
# print(steps) # print(steps)
duration_per_transit = defaultdict(lambda: 0) duration_per_transit = defaultdict(lambda: 0)
distance_per_transit = defaultdict(lambda: 0) distance_per_transit = defaultdict(lambda: 0)
for step in steps: for step in steps:
duration_per_transit[step['travelMode']] += int(step['staticDuration'].strip('s')) duration_per_transit[step["travelMode"]] += int(
distance_per_transit[step['travelMode']] += step.get('distanceMeters', 0) step["staticDuration"].strip("s")
)
distance_per_transit[step["travelMode"]] += step.get("distanceMeters", 0)
print(
print(f"dis {distance}, dur {duration}, duration per transit {dict(duration_per_transit)}, distance per transit {dict(distance_per_transit)}") f"dis {distance}, dur {duration}, duration per transit {dict(duration_per_transit)}, distance per transit {dict(distance_per_transit)}"
)
if __name__ == "__main__": if __name__ == "__main__":
import json import json
with open('code/json/routing_routeapi.json', 'r') as f:
d = json.load(f)
extract_time(d) with open("code/json/routing_routeapi.json", "r") as f:
d = json.load(f)
extract_time(d)
# if __name__ == "__main__": # if __name__ == "__main__":
@ -87,4 +81,3 @@ if __name__ == "__main__":
# import json # import json
# with open('code/json/routing_routeapi.json', 'w') as f: # with open('code/json/routing_routeapi.json', 'w') as f:
# json.dump(d, f) # json.dump(d, f)

View file

@ -1,15 +1,20 @@
import requests import requests
headers = { headers = {
'Host': 'api.rightmove.co.uk', "Host": "api.rightmove.co.uk",
# 'Accept-Encoding': 'gzip, deflate, br', # 'Accept-Encoding': 'gzip, deflate, br',
'User-Agent': 'okhttp/4.10.0', "User-Agent": "okhttp/4.10.0",
'Connection': 'close', "Connection": "close",
} }
params = { params = {
'apiApplication': 'ANDROID', "apiApplication": "ANDROID",
'appVersion': '3.70.0', "appVersion": "3.70.0",
} }
response = requests.get('https://api.rightmove.co.uk/api/property/119578451', params=params, headers=headers, verify=False) response = requests.get(
"https://api.rightmove.co.uk/api/property/119578451",
params=params,
headers=headers,
verify=False,
)

View file

@ -3,11 +3,12 @@ from PIL import Image
from transformers import Pix2StructProcessor, Pix2StructForConditionalGeneration from transformers import Pix2StructProcessor, Pix2StructForConditionalGeneration
import pytesseract import pytesseract
def inference(image_path): def inference(image_path):
image = Image.open(image_path) image = Image.open(image_path)
question = "How many living rooms are displayed on this floor plan?" # not sure if it even has an effect question = "How many living rooms are displayed on this floor plan?" # not sure if it even has an effect
processor = Pix2StructProcessor.from_pretrained('google/deplot') processor = Pix2StructProcessor.from_pretrained("google/deplot")
model = Pix2StructForConditionalGeneration.from_pretrained('google/deplot') model = Pix2StructForConditionalGeneration.from_pretrained("google/deplot")
inputs = processor(images=image, text=question, return_tensors="pt") inputs = processor(images=image, text=question, return_tensors="pt")
predictions = model.generate(**inputs, max_new_tokens=512) predictions = model.generate(**inputs, max_new_tokens=512)
@ -17,7 +18,7 @@ def inference(image_path):
def extract_total_sqm(deplot_input_str): def extract_total_sqm(deplot_input_str):
sqmregex = r'(\d+\.\d*) ?(sq ?m|sq. ?m)' sqmregex = r"(\d+\.\d*) ?(sq ?m|sq. ?m)"
matches = re.findall(sqmregex, deplot_input_str.lower()) matches = re.findall(sqmregex, deplot_input_str.lower())
if len(matches) == 0: if len(matches) == 0:
return None return None

View file

@ -18,10 +18,10 @@ headers = {
def detail_query(detail_id: int): def detail_query(detail_id: int):
params = { params = {
'apiApplication': 'ANDROID', "apiApplication": "ANDROID",
'appVersion': '3.70.0', "appVersion": "3.70.0",
} }
url = f'https://api.rightmove.co.uk/api/property/{detail_id}' url = f"https://api.rightmove.co.uk/api/property/{detail_id}"
response = requests.get(url, params=params, headers=headers, verify=False) response = requests.get(url, params=params, headers=headers, verify=False)
if response.status_code != 200: if response.status_code != 200:
raise Exception("Failed due to: ", response.text) raise Exception("Failed due to: ", response.text)
@ -30,7 +30,16 @@ def detail_query(detail_id: int):
# @cache.memoize() # @cache.memoize()
def listing_query(page: int, min_bedrooms: int, max_bedrooms: int, radius: float, min_price: int, max_price: int, mustNewHome: bool = False, max_days_since_added: int = None) -> dict: def listing_query(
page: int,
min_bedrooms: int,
max_bedrooms: int,
radius: float,
min_price: int,
max_price: int,
mustNewHome: bool = False,
max_days_since_added: int = None,
) -> dict:
params = { params = {
"locationIdentifier": "POSTCODE^4228216", "locationIdentifier": "POSTCODE^4228216",
"channel": "BUY", "channel": "BUY",
@ -49,12 +58,12 @@ def listing_query(page: int, min_bedrooms: int, max_bedrooms: int, radius: float
"appVersion": "3.70.0", "appVersion": "3.70.0",
} }
if max_days_since_added: if max_days_since_added:
if max_days_since_added not in [1,3,7,14]: if max_days_since_added not in [1, 3, 7, 14]:
raise Exception("Invalid max days. Can only be", [1,3,7,14]) raise Exception("Invalid max days. Can only be", [1, 3, 7, 14])
params['maxDaysSinceAdded'] = max_days_since_added params["maxDaysSinceAdded"] = max_days_since_added
if mustNewHome: if mustNewHome:
params['mustHave'] = 'newHome' params["mustHave"] = "newHome"
response = requests.get( response = requests.get(
"https://api.rightmove.co.uk/api/property-listing", "https://api.rightmove.co.uk/api/property-listing",
@ -69,7 +78,14 @@ def listing_query(page: int, min_bedrooms: int, max_bedrooms: int, radius: float
if __name__ == "__main__": if __name__ == "__main__":
response = listing_query(page=1, min_bedrooms=2, max_bedrooms=2, radius=5.0, min_price=150000, max_price=700000) response = listing_query(
page=1,
min_bedrooms=2,
max_bedrooms=2,
radius=5.0,
min_price=150000,
max_price=700000,
)
resp = response resp = response
for d in resp["properties"]: for d in resp["properties"]:
rl = RightmoveListing( rl = RightmoveListing(

View file

@ -2,99 +2,100 @@ import requests
from rec.utils import nextMonday from rec.utils import nextMonday
from collections import defaultdict from collections import defaultdict
API_KEY = 'AIzaSyBoBHzeQFgR7O-NlNsuHXQcC1B7ccEHpl8' API_KEY = "AIzaSyBoBHzeQFgR7O-NlNsuHXQcC1B7ccEHpl8"
url = "https://routes.googleapis.com/directions/v2:computeRoutes" url = "https://routes.googleapis.com/directions/v2:computeRoutes"
def transit_route(origin_lat:float, origin_lon:float, dest_lat:float, dest_lon:float, compute_alternative_routes=True): def transit_route(
monday9am = nextMonday() origin_lat: float,
origin_lon: float,
dest_lat: float,
dest_lon: float,
compute_alternative_routes=True,
):
monday9am = nextMonday()
header = { header = {
"X-Goog-Api-Key": API_KEY, "X-Goog-Api-Key": API_KEY,
"Content-Type": "application/json", "Content-Type": "application/json",
"X-Goog-FieldMask": "routes.distanceMeters,routes.duration,routes.staticDuration,routes.legs.steps.distanceMeters,routes.legs.steps.staticDuration,routes.legs.steps.travelMode", "X-Goog-FieldMask": "routes.distanceMeters,routes.duration,routes.staticDuration,routes.legs.steps.distanceMeters,routes.legs.steps.staticDuration,routes.legs.steps.travelMode",
} }
body = { body = {
"origin":{ "origin": {
"location":{ "location": {"latLng": {"latitude": origin_lat, "longitude": origin_lon}}
"latLng":{ },
"latitude": origin_lat, "destination": {
"longitude": origin_lon "location": {"latLng": {"latitude": dest_lat, "longitude": dest_lon}}
} },
} "travelMode": "TRANSIT",
}, # "2023-10-15T15:01:23.045123456Z"
"destination":{ "departureTime": monday9am.strftime("%Y-%m-%dT%H:%M:%S.%fZ"),
"location":{ "computeAlternativeRoutes": compute_alternative_routes,
"latLng":{ # "routeModifiers": {
"latitude": dest_lat, # "avoidTolls": false,
"longitude": dest_lon # "avoidHighways": false,
} # "avoidFerries": false
} # },
}, "languageCode": "en-US",
"travelMode": "TRANSIT", "units": "METRIC",
# "2023-10-15T15:01:23.045123456Z" }
"departureTime": monday9am.strftime("%Y-%m-%dT%H:%M:%S.%fZ"),
"computeAlternativeRoutes": compute_alternative_routes,
# "routeModifiers": {
# "avoidTolls": false,
# "avoidHighways": false,
# "avoidFerries": false
# },
"languageCode": "en-US",
"units": "METRIC"
}
r = requests.post(url, json=body, headers=header) r = requests.post(url, json=body, headers=header)
if r.status_code == 200: if r.status_code == 200:
return r.json() return r.json()
raise Exception(r.json()) raise Exception(r.json())
def extract_time(d, limit:int=2):
res = []
for route in d['routes']:
distance = route['distanceMeters']
duration = int(route['duration'].strip('s'))
duration_static = int(route['staticDuration'].strip('s'))
steps = route['legs'][0]['steps'] def extract_time(d, limit: int = 2):
initial_walk_duration = 0 res = []
used_transit = False for route in d["routes"]:
duration_per_transit = defaultdict(lambda: 0) distance = route["distanceMeters"]
distance_per_transit = defaultdict(lambda: 0) duration = int(route["duration"].strip("s"))
number_of_transit_stops = 0 duration_static = int(route["staticDuration"].strip("s"))
for step in steps: steps = route["legs"][0]["steps"]
if used_transit == False and step['travelMode'] == 'WALK': initial_walk_duration = 0
initial_walk_duration += int(step['staticDuration'].strip('s')) used_transit = False
else: duration_per_transit = defaultdict(lambda: 0)
used_transit = True distance_per_transit = defaultdict(lambda: 0)
duration_per_transit[step['travelMode']] += int(step['staticDuration'].strip('s')) number_of_transit_stops = 0
distance_per_transit[step['travelMode']] += step.get('distanceMeters', 0)
if step['travelMode'] == 'TRANSIT':
number_of_transit_stops += 1
res.append({ for step in steps:
'duration': duration, if used_transit == False and step["travelMode"] == "WALK":
'distance': distance, initial_walk_duration += int(step["staticDuration"].strip("s"))
'duration_static': duration_static, else:
'initial_walk_duration': initial_walk_duration, used_transit = True
'duration_per_transit': dict(duration_per_transit), duration_per_transit[step["travelMode"]] += int(
'distance_per_transit': dict(distance_per_transit), step["staticDuration"].strip("s")
'number_of_transit_stops': number_of_transit_stops, )
}) distance_per_transit[step["travelMode"]] += step.get("distanceMeters", 0)
if step["travelMode"] == "TRANSIT":
number_of_transit_stops += 1
return res[:limit] res.append(
{
"duration": duration,
"distance": distance,
"duration_static": duration_static,
"initial_walk_duration": initial_walk_duration,
"duration_per_transit": dict(duration_per_transit),
"distance_per_transit": dict(distance_per_transit),
"number_of_transit_stops": number_of_transit_stops,
}
)
return res[:limit]
if __name__ == "__main__": if __name__ == "__main__":
import json import json
with open('code/json/routing_routeapi.json', 'r') as f:
d = json.load(f)
extract_time(d) with open("code/json/routing_routeapi.json", "r") as f:
d = json.load(f)
extract_time(d)
# if __name__ == "__main__": # if __name__ == "__main__":
@ -104,4 +105,3 @@ if __name__ == "__main__":
# import json # import json
# with open('code/json/routing_routeapi.json', 'w') as f: # with open('code/json/routing_routeapi.json', 'w') as f:
# json.dump(d, f) # json.dump(d, f)

View file

@ -1,5 +1,6 @@
from datetime import datetime, timedelta, timezone from datetime import datetime, timedelta, timezone
def nextMonday(): def nextMonday():
""" """
I think this function doesnt work when the day is monday itself. I think this function doesnt work when the day is monday itself.
@ -10,8 +11,11 @@ def nextMonday():
now = datetime.now(timezone.utc) now = datetime.now(timezone.utc)
days_until_monday = (0 - now.weekday() + 7) % 7 days_until_monday = (0 - now.weekday() + 7) % 7
monday = now + timedelta(days=days_until_monday) monday = now + timedelta(days=days_until_monday)
monday_9am = monday.replace(hour=9, minute=0, second=0, microsecond=0, tzinfo=timezone.utc) monday_9am = monday.replace(
hour=9, minute=0, second=0, microsecond=0, tzinfo=timezone.utc
)
return monday_9am return monday_9am
if __name__ == '__main__':
if __name__ == "__main__":
print(nextMonday()) print(nextMonday())

View file

@ -1,10 +1,9 @@
from rec.db import RightmoveListing, session from rec.db import RightmoveListing, session
from sqlalchemy import select from sqlalchemy import select
if __name__ == '__main__': if __name__ == "__main__":
print("x") print("x")
x = select(RightmoveListing).where(RightmoveListing.price <600000) x = select(RightmoveListing).where(RightmoveListing.price < 600000)
print("y") print("y")
d = list(session.execute(x)) d = list(session.execute(x))
print(d) print(d)