ruff format

This commit is contained in:
Kadir 2024-03-25 20:48:48 +00:00
parent 37e3e8ad6f
commit d777558b34
17 changed files with 411 additions and 368 deletions

View file

@ -12,10 +12,10 @@ for i in range(1, 10000):
d = listing_query(i, 3, 3, 15, 0, 800000, max_days_since_added=1)
except:
break
for property in d['properties']:
identifier = property['identifier']
for property in d["properties"]:
identifier = property["identifier"]
listing = Listing(identifier)
with open(listing.path_listing_json(), 'w') as f:
with open(listing.path_listing_json(), "w") as f:
json.dump(property, f)

View file

@ -7,11 +7,11 @@ from data_access import Listing
for listing in tqdm(Listing.get_all_listings()):
if listing.path_detail_json().exists():
continue
try:
d = detail_query(listing.identifier)
with open(listing.path_detail_json(), 'w') as f:
with open(listing.path_detail_json(), "w") as f:
json.dump(d, f)
except:
print('Failed at: ', listing.identifier)
print("Failed at: ", listing.identifier)
raise

View file

@ -6,26 +6,23 @@ from data_access import Listing
for listing in tqdm(Listing.get_all_listings()):
with open(listing.path_detail_json()) as f:
detail = json.load(f)
for photo in detail['property']['photos']:
url = photo['maxSizeUrl']
picname = url.split('/')[-1]
order = photo['order']
for photo in detail["property"]["photos"]:
url = photo["maxSizeUrl"]
picname = url.split("/")[-1]
order = photo["order"]
p = listing.path_pic_file(order, picname)
if p.exists():
continue
tqdm.write(str(p))
urlretrieve(url, p)
for photo in detail['property']['floorplans']:
url = photo['url']
picname = url.split('/')[-1]
order = photo['order']
for photo in detail["property"]["floorplans"]:
url = photo["url"]
picname = url.split("/")[-1]
order = photo["order"]
p = listing.path_floorplan_file(order, picname)
if p.exists():
continue
tqdm.write(str(p))
urlretrieve(url, p)

View file

@ -8,6 +8,6 @@ for listing in tqdm(listings):
lat, long = BROCK_STREET_LAT_LONG
listing.calculate_route(lat, long, recalculate=False)
traveltime = listing.travel_time[0]
duration_minutes = traveltime['duration'] / 60.
duration_minutes = traveltime["duration"] / 60.0
tqdm.write(f"{listing.identifier} {duration_minutes}")

View file

@ -9,7 +9,7 @@ for listing in tqdm(list(Listing.get_all_listings())):
floorplans = json.load(f)
for floorplan in floorplans:
floorplan['estimated_sqm'] = extract_total_sqm(floorplan['text'])
floorplan["estimated_sqm"] = extract_total_sqm(floorplan["text"])
with open(listing.path_floorplan_ocr_json(), 'w') as f:
with open(listing.path_floorplan_ocr_json(), "w") as f:
floorplans = json.dump(floorplans, f)

View file

@ -5,202 +5,213 @@ from typing import List, Dict
from rec import floorplan, routing
import re
_DATA_DIR = pathlib.Path('data/rs/')
_DATA_DIR = pathlib.Path("data/rs/")
@dataclass()
class Listing():
class Listing:
identifier: int
_cached: Dict = None
@staticmethod
def get_all_listings() -> List['Listing']:
listing_paths = sorted(list(_DATA_DIR.glob('*/listing.json')))
def get_all_listings() -> List["Listing"]:
listing_paths = sorted(list(_DATA_DIR.glob("*/listing.json")))
identifiers = []
for listing_path in listing_paths:
with open(listing_path) as f:
d = json.load(f)
identifiers.append(Listing(d['identifier']))
identifiers.append(Listing(d["identifier"]))
return identifiers
def path_listing(self) -> pathlib.Path:
p = _DATA_DIR / str(self.identifier)
p.mkdir(parents=True, exist_ok=True)
return p
def path_listing_json(self) -> pathlib.Path:
return self.path_listing() / 'listing.json'
return self.path_listing() / "listing.json"
def path_detail_json(self) -> pathlib.Path:
return self.path_listing() / 'detail.json'
return self.path_listing() / "detail.json"
def path_routing_json(self) -> pathlib.Path:
return self.path_listing() / 'routing.json'
return self.path_listing() / "routing.json"
def path_floorplan_model_json(self) -> pathlib.Path:
return self.path_listing() / 'floorplan_model.json'
return self.path_listing() / "floorplan_model.json"
def path_floorplan_ocr_json(self) -> pathlib.Path:
return self.path_listing() / 'floorplan_ocr.json'
return self.path_listing() / "floorplan_ocr.json"
def path_pic_folder(self) -> pathlib.Path:
return self.path_listing() / 'pics'
return self.path_listing() / "pics"
def path_pic_file(self, order, name) -> pathlib.Path:
self.path_pic_folder().mkdir(parents=True, exist_ok=True)
return self.path_pic_folder() / f'{order}_{name}'
return self.path_pic_folder() / f"{order}_{name}"
def path_floorplan_folder(self) -> pathlib.Path:
return self.path_listing() / 'floorplans'
return self.path_listing() / "floorplans"
def path_floorplan_file(self, order, name) -> pathlib.Path:
self.path_floorplan_folder().mkdir(parents=True, exist_ok=True)
return self.path_floorplan_folder() / f'{order}_{name}'
return self.path_floorplan_folder() / f"{order}_{name}"
def list_floorplans(self):
images = list(self.path_floorplan_folder().glob('*'))
images = list(self.path_floorplan_folder().glob("*"))
# todo add check if return is image
return images
def calculate_sqm_model(self):
objs = []
for floorplan_path in self.list_floorplans():
estimated_sqm, model_output, predictions = floorplan.calculate_model(floorplan_path)
objs.append({
'floorplan_path': str(floorplan_path),
'estimated_sqm': estimated_sqm,
'model_output': model_output,
'no_predictions': len(predictions) # cant serialize the predictions itself since its a tensor
})
with open(self.path_floorplan_model_json(), 'w') as f:
estimated_sqm, model_output, predictions = floorplan.calculate_model(
floorplan_path
)
objs.append(
{
"floorplan_path": str(floorplan_path),
"estimated_sqm": estimated_sqm,
"model_output": model_output,
"no_predictions": len(
predictions
), # cant serialize the predictions itself since its a tensor
}
)
with open(self.path_floorplan_model_json(), "w") as f:
json.dump(objs, f)
@property
def sqm_model(self, recalculate=True):
if not self.path_floorplan_model_json().exists() or recalculate:
self.calculate_sqm_model()
with open(self.path_floorplan_json()) as f:
objs = json.load(f)
max_sqm = max([o['estimated_sqm'] for o in objs if o is None]) # filter out Nones
max_sqm = max(
[o["estimated_sqm"] for o in objs if o is None]
) # filter out Nones
return max_sqm
def calculate_sqm_ocr(self, recalculate=True):
if not recalculate and self.path_floorplan_ocr_json().exists():
return
objs = []
for floorplan_path in self.list_floorplans():
estimated_sqm, model_output = floorplan.calculate_ocr(floorplan_path)
objs.append({
'floorplan_path': str(floorplan_path),
'estimated_sqm': estimated_sqm,
'text': model_output,
})
with open(self.path_floorplan_ocr_json(), 'w') as f:
objs.append(
{
"floorplan_path": str(floorplan_path),
"estimated_sqm": estimated_sqm,
"text": model_output,
}
)
with open(self.path_floorplan_ocr_json(), "w") as f:
json.dump(objs, f)
@property
def sqm_ocr(self, recalculate=False):
if not self.path_floorplan_ocr_json().exists() or recalculate:
self.calculate_sqm_ocr()
with open(self.path_floorplan_ocr_json()) as f:
objs = json.load(f)
sqms = [o['estimated_sqm'] for o in objs if o['estimated_sqm'] is not None]
sqms = [o["estimated_sqm"] for o in objs if o["estimated_sqm"] is not None]
if len(sqms) == 0:
return None
max_sqm = max(sqms)
return max_sqm
def calculate_route(self, dest_lat: float, dest_lon: float, recalculate=False):
if self.path_routing_json().exists() and not recalculate:
return
result = routing.transit_route(self.latitude, self.longitude, dest_lat, dest_lon)
with open(self.path_routing_json(), 'w') as f:
result = routing.transit_route(
self.latitude, self.longitude, dest_lat, dest_lon
)
with open(self.path_routing_json(), "w") as f:
json.dump(result, f)
@property
def travel_time(self) -> List:
if not self.path_routing_json().exists():
return []
with open(self.path_routing_json()) as f:
d = json.load(f)
return routing.extract_time(d)
@property
def url(self):
return f'https://www.rightmove.co.uk/properties/{self.identifier}'
return f"https://www.rightmove.co.uk/properties/{self.identifier}"
@property
def detailobject(self):
if self._cached is None:
with open(self.path_detail_json()) as f:
self._cached = json.load(f)
return self._cached
@property
def price(self) -> float:
return self.detailobject['property']['price']
return self.detailobject["property"]["price"]
@property
def price_per_sqm(self) -> float:
if self.sqm_ocr is None or self.sqm_ocr == 0:
return None
return self.price / self.sqm_ocr
@property
def bedrooms(self) -> int:
return self.detailobject['property']['bedrooms']
return self.detailobject["property"]["bedrooms"]
@property
def latitude(self) -> float:
return self.detailobject['property']['latitude']
return self.detailobject["property"]["latitude"]
@property
def longitude(self) -> float:
return self.detailobject['property']['longitude']
return self.detailobject["property"]["longitude"]
@property
def leaseLeft(self) -> int:
ds = self.detailobject['property'].get('tenureInfo', {}).get('content', [])
ds = self.detailobject["property"].get("tenureInfo", {}).get("content", [])
for d in ds:
if d['type'] == 'lengthOfLease':
matches = re.findall(r'(\d+\.?\d*)', d['value'])
if d["type"] == "lengthOfLease":
matches = re.findall(r"(\d+\.?\d*)", d["value"])
if len(matches):
return float(matches[0])
return None
@property
def development(self) -> bool:
# aka new home
return self.detailobject['property']['development']
return self.detailobject["property"]["development"]
def dict_nicely(self):
return {
'identifier': self.identifier,
'sqm_ocr': self.sqm_ocr,
'price': self.price,
'price_per_sqm': self.price_per_sqm,
'url': self.url,
'bedrooms': self.bedrooms,
'travel_time_fastest': self.travel_time[0],
'travel_time_second': None if len(self.travel_time) < 2 else self.travel_time[1],
'lease_left': self.leaseLeft,
'development': self.development,
"identifier": self.identifier,
"sqm_ocr": self.sqm_ocr,
"price": self.price,
"price_per_sqm": self.price_per_sqm,
"url": self.url,
"bedrooms": self.bedrooms,
"travel_time_fastest": self.travel_time[0],
"travel_time_second": None
if len(self.travel_time) < 2
else self.travel_time[1],
"lease_left": self.leaseLeft,
"development": self.development,
}
if __name__ == '__main__':
if __name__ == "__main__":
listings = Listing.get_all_listings()
print(listings[0].list_floorplans())

View file

@ -1,32 +1,40 @@
def record():
from rec.query import listing_query, detail_query
import json
page = 1
listing = listing_query(page, 2, 2, 5, 200000, 500000)
with open(f'/Users/kadir/code/realestate/crawler/code/json/queries/listing{page}.json', 'w') as f:
with open(
f"/Users/kadir/code/realestate/crawler/code/json/queries/listing{page}.json",
"w",
) as f:
json.dump(listing, f)
for prop in listing['properties']:
identifier = prop['identifier']
for prop in listing["properties"]:
identifier = prop["identifier"]
resp = detail_query(identifier)
# print(identifier, resp.status_code)
with open(f'/Users/kadir/code/realestate/crawler/code/json/queries/detail_{identifier}.json', 'w') as f:
with open(
f"/Users/kadir/code/realestate/crawler/code/json/queries/detail_{identifier}.json",
"w",
) as f:
json.dump(resp, f)
def process():
import json
import pathlib
path = pathlib.Path('/Users/kadir/code/realestate/crawler/code/json/queries/')
detailjsons = list(path.glob('detail_*json'))
path = pathlib.Path("/Users/kadir/code/realestate/crawler/code/json/queries/")
detailjsons = list(path.glob("detail_*json"))
for file in detailjsons:
with open(file) as f:
js = json.load(f)
for floorplan in js['property']['floorplans']:
print(floorplan['url'])
for floorplan in js["property"]["floorplans"]:
print(floorplan["url"])
# record()
process()
process()

View file

@ -1,13 +1,13 @@
import requests
headers = {
'Host': 'media.rightmove.co.uk',
"Host": "media.rightmove.co.uk",
# 'Accept-Encoding': 'gzip, deflate, br',
'User-Agent': 'okhttp/4.10.0',
"User-Agent": "okhttp/4.10.0",
}
response = requests.get(
'https://media.rightmove.co.uk/47k/46001/138680705/46001_32532509_IMG_00_0000.jpeg',
"https://media.rightmove.co.uk/47k/46001/138680705/46001_32532509_IMG_00_0000.jpeg",
headers=headers,
verify=False,
)
)

View file

@ -1,58 +1,68 @@
import requests
headers = {
'Host': 'api.rightmove.co.uk',
"Host": "api.rightmove.co.uk",
# 'Accept-Encoding': 'gzip, deflate, br',
'User-Agent': 'okhttp/4.10.0',
'Connection': 'close',
"User-Agent": "okhttp/4.10.0",
"Connection": "close",
}
params = {
'locationIdentifier': 'POSTCODE^4228216',
'channel': 'BUY',
'page': '1',
'numberOfPropertiesPerPage': '25',
'radius': '3.0',
'sortBy': 'distance',
'includeUnavailableProperties': 'false',
'propertyTypes': 'flat',
'mustHave': 'newHome', # added manually later
'dontShow': 'sharedOwnership,retirement',
'minPrice': '150000',
'maxPrice': '500000',
'minBedrooms': '2',
'maxBedrooms': '2',
'apiApplication': 'ANDROID',
'appVersion': '3.70.0',
"locationIdentifier": "POSTCODE^4228216",
"channel": "BUY",
"page": "1",
"numberOfPropertiesPerPage": "25",
"radius": "3.0",
"sortBy": "distance",
"includeUnavailableProperties": "false",
"propertyTypes": "flat",
"mustHave": "newHome", # added manually later
"dontShow": "sharedOwnership,retirement",
"minPrice": "150000",
"maxPrice": "500000",
"minBedrooms": "2",
"maxBedrooms": "2",
"apiApplication": "ANDROID",
"appVersion": "3.70.0",
}
response = requests.get('https://api.rightmove.co.uk/api/property-listing', params=params, headers=headers, verify=False)
response = requests.get(
"https://api.rightmove.co.uk/api/property-listing",
params=params,
headers=headers,
verify=False,
)
import requests
headers = {
'Host': 'api.rightmove.co.uk',
"Host": "api.rightmove.co.uk",
# 'Accept-Encoding': 'gzip, deflate, br',
'User-Agent': 'okhttp/4.10.0',
'Connection': 'close',
"User-Agent": "okhttp/4.10.0",
"Connection": "close",
}
params = {
'locationIdentifier': 'POSTCODE^4228216',
'channel': 'BUY',
'page': '2',
'numberOfPropertiesPerPage': '25',
'radius': '3.0',
'sortBy': 'distance',
'includeUnavailableProperties': 'false',
'propertyTypes': 'flat',
'dontShow': 'sharedOwnership,retirement',
'minPrice': '150000',
'maxPrice': '600000',
'minBedrooms': '2',
'maxBedrooms': '2',
'apiApplication': 'ANDROID',
'appVersion': '3.70.0',
"locationIdentifier": "POSTCODE^4228216",
"channel": "BUY",
"page": "2",
"numberOfPropertiesPerPage": "25",
"radius": "3.0",
"sortBy": "distance",
"includeUnavailableProperties": "false",
"propertyTypes": "flat",
"dontShow": "sharedOwnership,retirement",
"minPrice": "150000",
"maxPrice": "600000",
"minBedrooms": "2",
"maxBedrooms": "2",
"apiApplication": "ANDROID",
"appVersion": "3.70.0",
}
response = requests.get('https://api.rightmove.co.uk/api/property-listing', params=params, headers=headers, verify=False)
response = requests.get(
"https://api.rightmove.co.uk/api/property-listing",
params=params,
headers=headers,
verify=False,
)

View file

@ -1,16 +1,16 @@
import requests
API_KEY = 'AIzaSyBoBHzeQFgR7O-NlNsuHXQcC1B7ccEHpl8'
API_KEY = "AIzaSyBoBHzeQFgR7O-NlNsuHXQcC1B7ccEHpl8"
url = "https://maps.googleapis.com/maps/api/distancematrix/json"
origin = '51.5636306598907,-0.11061106079085892'
origin = "51.5636306598907,-0.11061106079085892"
dest = "51.53836609846008,-0.12743940233824352"
params = {
"origins": origin,
"destinations": dest,
"key": API_KEY,
"departure_time": "", # timstamp, optional
"mode": "transit",
"origins": origin,
"destinations": dest,
"key": API_KEY,
"departure_time": "", # timstamp, optional
"mode": "transit",
}
r = requests.get(url, params=params)
@ -18,6 +18,5 @@ print(r.status_code)
print(r.json())
with open('code/json/routing_distancematrix.json', 'w') as f:
f.write(r.text)
with open("code/json/routing_distancematrix.json", "w") as f:
f.write(r.text)

View file

@ -2,83 +2,77 @@ import requests
from utils import nextMonday
from collections import defaultdict
API_KEY = 'AIzaSyBoBHzeQFgR7O-NlNsuHXQcC1B7ccEHpl8'
API_KEY = "AIzaSyBoBHzeQFgR7O-NlNsuHXQcC1B7ccEHpl8"
url = "https://routes.googleapis.com/directions/v2:computeRoutes"
def travel_time(origin_lat:float, origin_lon:float, dest_lat:float, dest_lon:float):
monday9am = nextMonday()
def travel_time(origin_lat: float, origin_lon: float, dest_lat: float, dest_lon: float):
monday9am = nextMonday()
header = {
"X-Goog-Api-Key": API_KEY,
"Content-Type": "application/json",
"X-Goog-FieldMask": "routes.distanceMeters,routes.duration,routes.staticDuration,routes.legs.steps.distanceMeters,routes.legs.steps.staticDuration,routes.legs.steps.travelMode",
}
header = {
"X-Goog-Api-Key": API_KEY,
"Content-Type": "application/json",
"X-Goog-FieldMask": "routes.distanceMeters,routes.duration,routes.staticDuration,routes.legs.steps.distanceMeters,routes.legs.steps.staticDuration,routes.legs.steps.travelMode",
}
body = {
"origin":{
"location":{
"latLng":{
"latitude": origin_lat,
"longitude": origin_lon
}
}
},
"destination":{
"location":{
"latLng":{
"latitude": dest_lat,
"longitude": dest_lon
}
}
},
"travelMode": "TRANSIT",
# "2023-10-15T15:01:23.045123456Z"
"departureTime": monday9am.strftime("%Y-%m-%dT%H:%M:%S.%fZ"),
"computeAlternativeRoutes": False,
# "routeModifiers": {
# "avoidTolls": false,
# "avoidHighways": false,
# "avoidFerries": false
# },
"languageCode": "en-US",
"units": "METRIC"
}
body = {
"origin": {
"location": {"latLng": {"latitude": origin_lat, "longitude": origin_lon}}
},
"destination": {
"location": {"latLng": {"latitude": dest_lat, "longitude": dest_lon}}
},
"travelMode": "TRANSIT",
# "2023-10-15T15:01:23.045123456Z"
"departureTime": monday9am.strftime("%Y-%m-%dT%H:%M:%S.%fZ"),
"computeAlternativeRoutes": False,
# "routeModifiers": {
# "avoidTolls": false,
# "avoidHighways": false,
# "avoidFerries": false
# },
"languageCode": "en-US",
"units": "METRIC",
}
r = requests.post(url, json=body, headers=header)
if r.status_code == 200:
return r.json()
raise Exception(r.json())
r = requests.post(url, json=body, headers=header)
if r.status_code == 200:
return r.json()
raise Exception(r.json())
def extract_time(d):
r = d['routes'][0]
print(r.keys())
distance = r['distanceMeters']
duration = r['duration']
duration_static = r['staticDuration']
steps = r['legs'][0]['steps']
# print(steps)
duration_per_transit = defaultdict(lambda: 0)
distance_per_transit = defaultdict(lambda: 0)
for step in steps:
duration_per_transit[step['travelMode']] += int(step['staticDuration'].strip('s'))
distance_per_transit[step['travelMode']] += step.get('distanceMeters', 0)
print(f"dis {distance}, dur {duration}, duration per transit {dict(duration_per_transit)}, distance per transit {dict(distance_per_transit)}")
r = d["routes"][0]
print(r.keys())
distance = r["distanceMeters"]
duration = r["duration"]
duration_static = r["staticDuration"]
steps = r["legs"][0]["steps"]
# print(steps)
duration_per_transit = defaultdict(lambda: 0)
distance_per_transit = defaultdict(lambda: 0)
for step in steps:
duration_per_transit[step["travelMode"]] += int(
step["staticDuration"].strip("s")
)
distance_per_transit[step["travelMode"]] += step.get("distanceMeters", 0)
print(
f"dis {distance}, dur {duration}, duration per transit {dict(duration_per_transit)}, distance per transit {dict(distance_per_transit)}"
)
if __name__ == "__main__":
import json
with open('code/json/routing_routeapi.json', 'r') as f:
d = json.load(f)
extract_time(d)
import json
with open("code/json/routing_routeapi.json", "r") as f:
d = json.load(f)
extract_time(d)
# if __name__ == "__main__":
# origin = 51.5635664310333, -0.1107173751570373 # home
@ -87,4 +81,3 @@ if __name__ == "__main__":
# import json
# with open('code/json/routing_routeapi.json', 'w') as f:
# json.dump(d, f)

View file

@ -1,15 +1,20 @@
import requests
headers = {
'Host': 'api.rightmove.co.uk',
"Host": "api.rightmove.co.uk",
# 'Accept-Encoding': 'gzip, deflate, br',
'User-Agent': 'okhttp/4.10.0',
'Connection': 'close',
"User-Agent": "okhttp/4.10.0",
"Connection": "close",
}
params = {
'apiApplication': 'ANDROID',
'appVersion': '3.70.0',
"apiApplication": "ANDROID",
"appVersion": "3.70.0",
}
response = requests.get('https://api.rightmove.co.uk/api/property/119578451', params=params, headers=headers, verify=False)
response = requests.get(
"https://api.rightmove.co.uk/api/property/119578451",
params=params,
headers=headers,
verify=False,
)

View file

@ -3,21 +3,22 @@ from PIL import Image
from transformers import Pix2StructProcessor, Pix2StructForConditionalGeneration
import pytesseract
def inference(image_path):
image = Image.open(image_path)
question = "How many living rooms are displayed on this floor plan?" # not sure if it even has an effect
processor = Pix2StructProcessor.from_pretrained('google/deplot')
model = Pix2StructForConditionalGeneration.from_pretrained('google/deplot')
question = "How many living rooms are displayed on this floor plan?" # not sure if it even has an effect
processor = Pix2StructProcessor.from_pretrained("google/deplot")
model = Pix2StructForConditionalGeneration.from_pretrained("google/deplot")
inputs = processor(images=image, text=question, return_tensors="pt")
predictions = model.generate(**inputs, max_new_tokens=512)
output = processor.decode(predictions[0], skip_special_tokens=True)
return output, predictions
def extract_total_sqm(deplot_input_str):
sqmregex = r'(\d+\.\d*) ?(sq ?m|sq. ?m)'
sqmregex = r"(\d+\.\d*) ?(sq ?m|sq. ?m)"
matches = re.findall(sqmregex, deplot_input_str.lower())
if len(matches) == 0:
return None

View file

@ -18,10 +18,10 @@ headers = {
def detail_query(detail_id: int):
params = {
'apiApplication': 'ANDROID',
'appVersion': '3.70.0',
"apiApplication": "ANDROID",
"appVersion": "3.70.0",
}
url = f'https://api.rightmove.co.uk/api/property/{detail_id}'
url = f"https://api.rightmove.co.uk/api/property/{detail_id}"
response = requests.get(url, params=params, headers=headers, verify=False)
if response.status_code != 200:
raise Exception("Failed due to: ", response.text)
@ -30,7 +30,16 @@ def detail_query(detail_id: int):
# @cache.memoize()
def listing_query(page: int, min_bedrooms: int, max_bedrooms: int, radius: float, min_price: int, max_price: int, mustNewHome: bool = False, max_days_since_added: int = None) -> dict:
def listing_query(
page: int,
min_bedrooms: int,
max_bedrooms: int,
radius: float,
min_price: int,
max_price: int,
mustNewHome: bool = False,
max_days_since_added: int = None,
) -> dict:
params = {
"locationIdentifier": "POSTCODE^4228216",
"channel": "BUY",
@ -49,12 +58,12 @@ def listing_query(page: int, min_bedrooms: int, max_bedrooms: int, radius: float
"appVersion": "3.70.0",
}
if max_days_since_added:
if max_days_since_added not in [1,3,7,14]:
raise Exception("Invalid max days. Can only be", [1,3,7,14])
params['maxDaysSinceAdded'] = max_days_since_added
if max_days_since_added not in [1, 3, 7, 14]:
raise Exception("Invalid max days. Can only be", [1, 3, 7, 14])
params["maxDaysSinceAdded"] = max_days_since_added
if mustNewHome:
params['mustHave'] = 'newHome'
params["mustHave"] = "newHome"
response = requests.get(
"https://api.rightmove.co.uk/api/property-listing",
@ -69,7 +78,14 @@ def listing_query(page: int, min_bedrooms: int, max_bedrooms: int, radius: float
if __name__ == "__main__":
response = listing_query(page=1, min_bedrooms=2, max_bedrooms=2, radius=5.0, min_price=150000, max_price=700000)
response = listing_query(
page=1,
min_bedrooms=2,
max_bedrooms=2,
radius=5.0,
min_price=150000,
max_price=700000,
)
resp = response
for d in resp["properties"]:
rl = RightmoveListing(

View file

@ -2,100 +2,101 @@ import requests
from rec.utils import nextMonday
from collections import defaultdict
API_KEY = 'AIzaSyBoBHzeQFgR7O-NlNsuHXQcC1B7ccEHpl8'
API_KEY = "AIzaSyBoBHzeQFgR7O-NlNsuHXQcC1B7ccEHpl8"
url = "https://routes.googleapis.com/directions/v2:computeRoutes"
def transit_route(origin_lat:float, origin_lon:float, dest_lat:float, dest_lon:float, compute_alternative_routes=True):
monday9am = nextMonday()
def transit_route(
origin_lat: float,
origin_lon: float,
dest_lat: float,
dest_lon: float,
compute_alternative_routes=True,
):
monday9am = nextMonday()
header = {
"X-Goog-Api-Key": API_KEY,
"Content-Type": "application/json",
"X-Goog-FieldMask": "routes.distanceMeters,routes.duration,routes.staticDuration,routes.legs.steps.distanceMeters,routes.legs.steps.staticDuration,routes.legs.steps.travelMode",
}
header = {
"X-Goog-Api-Key": API_KEY,
"Content-Type": "application/json",
"X-Goog-FieldMask": "routes.distanceMeters,routes.duration,routes.staticDuration,routes.legs.steps.distanceMeters,routes.legs.steps.staticDuration,routes.legs.steps.travelMode",
}
body = {
"origin":{
"location":{
"latLng":{
"latitude": origin_lat,
"longitude": origin_lon
}
}
},
"destination":{
"location":{
"latLng":{
"latitude": dest_lat,
"longitude": dest_lon
}
}
},
"travelMode": "TRANSIT",
# "2023-10-15T15:01:23.045123456Z"
"departureTime": monday9am.strftime("%Y-%m-%dT%H:%M:%S.%fZ"),
"computeAlternativeRoutes": compute_alternative_routes,
# "routeModifiers": {
# "avoidTolls": false,
# "avoidHighways": false,
# "avoidFerries": false
# },
"languageCode": "en-US",
"units": "METRIC"
}
body = {
"origin": {
"location": {"latLng": {"latitude": origin_lat, "longitude": origin_lon}}
},
"destination": {
"location": {"latLng": {"latitude": dest_lat, "longitude": dest_lon}}
},
"travelMode": "TRANSIT",
# "2023-10-15T15:01:23.045123456Z"
"departureTime": monday9am.strftime("%Y-%m-%dT%H:%M:%S.%fZ"),
"computeAlternativeRoutes": compute_alternative_routes,
# "routeModifiers": {
# "avoidTolls": false,
# "avoidHighways": false,
# "avoidFerries": false
# },
"languageCode": "en-US",
"units": "METRIC",
}
r = requests.post(url, json=body, headers=header)
if r.status_code == 200:
return r.json()
raise Exception(r.json())
r = requests.post(url, json=body, headers=header)
if r.status_code == 200:
return r.json()
raise Exception(r.json())
def extract_time(d, limit: int = 2):
res = []
for route in d["routes"]:
distance = route["distanceMeters"]
duration = int(route["duration"].strip("s"))
duration_static = int(route["staticDuration"].strip("s"))
steps = route["legs"][0]["steps"]
initial_walk_duration = 0
used_transit = False
duration_per_transit = defaultdict(lambda: 0)
distance_per_transit = defaultdict(lambda: 0)
number_of_transit_stops = 0
for step in steps:
if used_transit == False and step["travelMode"] == "WALK":
initial_walk_duration += int(step["staticDuration"].strip("s"))
else:
used_transit = True
duration_per_transit[step["travelMode"]] += int(
step["staticDuration"].strip("s")
)
distance_per_transit[step["travelMode"]] += step.get("distanceMeters", 0)
if step["travelMode"] == "TRANSIT":
number_of_transit_stops += 1
res.append(
{
"duration": duration,
"distance": distance,
"duration_static": duration_static,
"initial_walk_duration": initial_walk_duration,
"duration_per_transit": dict(duration_per_transit),
"distance_per_transit": dict(distance_per_transit),
"number_of_transit_stops": number_of_transit_stops,
}
)
return res[:limit]
def extract_time(d, limit:int=2):
res = []
for route in d['routes']:
distance = route['distanceMeters']
duration = int(route['duration'].strip('s'))
duration_static = int(route['staticDuration'].strip('s'))
steps = route['legs'][0]['steps']
initial_walk_duration = 0
used_transit = False
duration_per_transit = defaultdict(lambda: 0)
distance_per_transit = defaultdict(lambda: 0)
number_of_transit_stops = 0
for step in steps:
if used_transit == False and step['travelMode'] == 'WALK':
initial_walk_duration += int(step['staticDuration'].strip('s'))
else:
used_transit = True
duration_per_transit[step['travelMode']] += int(step['staticDuration'].strip('s'))
distance_per_transit[step['travelMode']] += step.get('distanceMeters', 0)
if step['travelMode'] == 'TRANSIT':
number_of_transit_stops += 1
res.append({
'duration': duration,
'distance': distance,
'duration_static': duration_static,
'initial_walk_duration': initial_walk_duration,
'duration_per_transit': dict(duration_per_transit),
'distance_per_transit': dict(distance_per_transit),
'number_of_transit_stops': number_of_transit_stops,
})
return res[:limit]
if __name__ == "__main__":
import json
with open('code/json/routing_routeapi.json', 'r') as f:
d = json.load(f)
extract_time(d)
import json
with open("code/json/routing_routeapi.json", "r") as f:
d = json.load(f)
extract_time(d)
# if __name__ == "__main__":
# origin = 51.5635664310333, -0.1107173751570373 # home
@ -104,4 +105,3 @@ if __name__ == "__main__":
# import json
# with open('code/json/routing_routeapi.json', 'w') as f:
# json.dump(d, f)

View file

@ -1,5 +1,6 @@
from datetime import datetime, timedelta, timezone
def nextMonday():
"""
I think this function doesnt work when the day is monday itself.
@ -10,8 +11,11 @@ def nextMonday():
now = datetime.now(timezone.utc)
days_until_monday = (0 - now.weekday() + 7) % 7
monday = now + timedelta(days=days_until_monday)
monday_9am = monday.replace(hour=9, minute=0, second=0, microsecond=0, tzinfo=timezone.utc)
monday_9am = monday.replace(
hour=9, minute=0, second=0, microsecond=0, tzinfo=timezone.utc
)
return monday_9am
if __name__ == '__main__':
if __name__ == "__main__":
print(nextMonday())

View file

@ -1,10 +1,9 @@
from rec.db import RightmoveListing, session
from sqlalchemy import select
if __name__ == '__main__':
if __name__ == "__main__":
print("x")
x = select(RightmoveListing).where(RightmoveListing.price <600000)
x = select(RightmoveListing).where(RightmoveListing.price < 600000)
print("y")
d = list(session.execute(x))
print(d)