ruff format
This commit is contained in:
parent
37e3e8ad6f
commit
d777558b34
17 changed files with 411 additions and 368 deletions
|
|
@ -12,10 +12,10 @@ for i in range(1, 10000):
|
|||
d = listing_query(i, 3, 3, 15, 0, 800000, max_days_since_added=1)
|
||||
except:
|
||||
break
|
||||
|
||||
for property in d['properties']:
|
||||
identifier = property['identifier']
|
||||
|
||||
|
||||
for property in d["properties"]:
|
||||
identifier = property["identifier"]
|
||||
|
||||
listing = Listing(identifier)
|
||||
with open(listing.path_listing_json(), 'w') as f:
|
||||
with open(listing.path_listing_json(), "w") as f:
|
||||
json.dump(property, f)
|
||||
|
|
|
|||
|
|
@ -7,11 +7,11 @@ from data_access import Listing
|
|||
for listing in tqdm(Listing.get_all_listings()):
|
||||
if listing.path_detail_json().exists():
|
||||
continue
|
||||
|
||||
|
||||
try:
|
||||
d = detail_query(listing.identifier)
|
||||
with open(listing.path_detail_json(), 'w') as f:
|
||||
with open(listing.path_detail_json(), "w") as f:
|
||||
json.dump(d, f)
|
||||
except:
|
||||
print('Failed at: ', listing.identifier)
|
||||
print("Failed at: ", listing.identifier)
|
||||
raise
|
||||
|
|
|
|||
|
|
@ -6,26 +6,23 @@ from data_access import Listing
|
|||
for listing in tqdm(Listing.get_all_listings()):
|
||||
with open(listing.path_detail_json()) as f:
|
||||
detail = json.load(f)
|
||||
|
||||
|
||||
for photo in detail['property']['photos']:
|
||||
url = photo['maxSizeUrl']
|
||||
picname = url.split('/')[-1]
|
||||
order = photo['order']
|
||||
|
||||
for photo in detail["property"]["photos"]:
|
||||
url = photo["maxSizeUrl"]
|
||||
picname = url.split("/")[-1]
|
||||
order = photo["order"]
|
||||
p = listing.path_pic_file(order, picname)
|
||||
if p.exists():
|
||||
continue
|
||||
tqdm.write(str(p))
|
||||
urlretrieve(url, p)
|
||||
|
||||
for photo in detail['property']['floorplans']:
|
||||
url = photo['url']
|
||||
picname = url.split('/')[-1]
|
||||
order = photo['order']
|
||||
|
||||
for photo in detail["property"]["floorplans"]:
|
||||
url = photo["url"]
|
||||
picname = url.split("/")[-1]
|
||||
order = photo["order"]
|
||||
p = listing.path_floorplan_file(order, picname)
|
||||
if p.exists():
|
||||
continue
|
||||
tqdm.write(str(p))
|
||||
urlretrieve(url, p)
|
||||
|
||||
|
||||
|
|
@ -8,6 +8,6 @@ for listing in tqdm(listings):
|
|||
lat, long = BROCK_STREET_LAT_LONG
|
||||
listing.calculate_route(lat, long, recalculate=False)
|
||||
traveltime = listing.travel_time[0]
|
||||
duration_minutes = traveltime['duration'] / 60.
|
||||
|
||||
duration_minutes = traveltime["duration"] / 60.0
|
||||
|
||||
tqdm.write(f"{listing.identifier} {duration_minutes}")
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ for listing in tqdm(list(Listing.get_all_listings())):
|
|||
floorplans = json.load(f)
|
||||
|
||||
for floorplan in floorplans:
|
||||
floorplan['estimated_sqm'] = extract_total_sqm(floorplan['text'])
|
||||
floorplan["estimated_sqm"] = extract_total_sqm(floorplan["text"])
|
||||
|
||||
with open(listing.path_floorplan_ocr_json(), 'w') as f:
|
||||
with open(listing.path_floorplan_ocr_json(), "w") as f:
|
||||
floorplans = json.dump(floorplans, f)
|
||||
|
|
|
|||
|
|
@ -5,202 +5,213 @@ from typing import List, Dict
|
|||
from rec import floorplan, routing
|
||||
import re
|
||||
|
||||
_DATA_DIR = pathlib.Path('data/rs/')
|
||||
_DATA_DIR = pathlib.Path("data/rs/")
|
||||
|
||||
|
||||
@dataclass()
|
||||
class Listing():
|
||||
class Listing:
|
||||
identifier: int
|
||||
_cached: Dict = None
|
||||
|
||||
|
||||
@staticmethod
|
||||
def get_all_listings() -> List['Listing']:
|
||||
listing_paths = sorted(list(_DATA_DIR.glob('*/listing.json')))
|
||||
def get_all_listings() -> List["Listing"]:
|
||||
listing_paths = sorted(list(_DATA_DIR.glob("*/listing.json")))
|
||||
identifiers = []
|
||||
for listing_path in listing_paths:
|
||||
with open(listing_path) as f:
|
||||
d = json.load(f)
|
||||
identifiers.append(Listing(d['identifier']))
|
||||
|
||||
identifiers.append(Listing(d["identifier"]))
|
||||
|
||||
return identifiers
|
||||
|
||||
def path_listing(self) -> pathlib.Path:
|
||||
p = _DATA_DIR / str(self.identifier)
|
||||
p.mkdir(parents=True, exist_ok=True)
|
||||
return p
|
||||
|
||||
|
||||
def path_listing_json(self) -> pathlib.Path:
|
||||
return self.path_listing() / 'listing.json'
|
||||
|
||||
return self.path_listing() / "listing.json"
|
||||
|
||||
def path_detail_json(self) -> pathlib.Path:
|
||||
return self.path_listing() / 'detail.json'
|
||||
|
||||
return self.path_listing() / "detail.json"
|
||||
|
||||
def path_routing_json(self) -> pathlib.Path:
|
||||
return self.path_listing() / 'routing.json'
|
||||
|
||||
return self.path_listing() / "routing.json"
|
||||
|
||||
def path_floorplan_model_json(self) -> pathlib.Path:
|
||||
return self.path_listing() / 'floorplan_model.json'
|
||||
|
||||
return self.path_listing() / "floorplan_model.json"
|
||||
|
||||
def path_floorplan_ocr_json(self) -> pathlib.Path:
|
||||
return self.path_listing() / 'floorplan_ocr.json'
|
||||
|
||||
return self.path_listing() / "floorplan_ocr.json"
|
||||
|
||||
def path_pic_folder(self) -> pathlib.Path:
|
||||
return self.path_listing() / 'pics'
|
||||
|
||||
return self.path_listing() / "pics"
|
||||
|
||||
def path_pic_file(self, order, name) -> pathlib.Path:
|
||||
self.path_pic_folder().mkdir(parents=True, exist_ok=True)
|
||||
return self.path_pic_folder() / f'{order}_{name}'
|
||||
|
||||
return self.path_pic_folder() / f"{order}_{name}"
|
||||
|
||||
def path_floorplan_folder(self) -> pathlib.Path:
|
||||
return self.path_listing() / 'floorplans'
|
||||
|
||||
return self.path_listing() / "floorplans"
|
||||
|
||||
def path_floorplan_file(self, order, name) -> pathlib.Path:
|
||||
self.path_floorplan_folder().mkdir(parents=True, exist_ok=True)
|
||||
return self.path_floorplan_folder() / f'{order}_{name}'
|
||||
|
||||
return self.path_floorplan_folder() / f"{order}_{name}"
|
||||
|
||||
def list_floorplans(self):
|
||||
images = list(self.path_floorplan_folder().glob('*'))
|
||||
images = list(self.path_floorplan_folder().glob("*"))
|
||||
# todo add check if return is image
|
||||
return images
|
||||
|
||||
|
||||
def calculate_sqm_model(self):
|
||||
objs = []
|
||||
for floorplan_path in self.list_floorplans():
|
||||
estimated_sqm, model_output, predictions = floorplan.calculate_model(floorplan_path)
|
||||
objs.append({
|
||||
'floorplan_path': str(floorplan_path),
|
||||
'estimated_sqm': estimated_sqm,
|
||||
'model_output': model_output,
|
||||
'no_predictions': len(predictions) # cant serialize the predictions itself since its a tensor
|
||||
})
|
||||
|
||||
with open(self.path_floorplan_model_json(), 'w') as f:
|
||||
estimated_sqm, model_output, predictions = floorplan.calculate_model(
|
||||
floorplan_path
|
||||
)
|
||||
objs.append(
|
||||
{
|
||||
"floorplan_path": str(floorplan_path),
|
||||
"estimated_sqm": estimated_sqm,
|
||||
"model_output": model_output,
|
||||
"no_predictions": len(
|
||||
predictions
|
||||
), # cant serialize the predictions itself since its a tensor
|
||||
}
|
||||
)
|
||||
|
||||
with open(self.path_floorplan_model_json(), "w") as f:
|
||||
json.dump(objs, f)
|
||||
|
||||
|
||||
@property
|
||||
def sqm_model(self, recalculate=True):
|
||||
if not self.path_floorplan_model_json().exists() or recalculate:
|
||||
self.calculate_sqm_model()
|
||||
|
||||
|
||||
with open(self.path_floorplan_json()) as f:
|
||||
objs = json.load(f)
|
||||
|
||||
max_sqm = max([o['estimated_sqm'] for o in objs if o is None]) # filter out Nones
|
||||
|
||||
max_sqm = max(
|
||||
[o["estimated_sqm"] for o in objs if o is None]
|
||||
) # filter out Nones
|
||||
return max_sqm
|
||||
|
||||
|
||||
def calculate_sqm_ocr(self, recalculate=True):
|
||||
if not recalculate and self.path_floorplan_ocr_json().exists():
|
||||
return
|
||||
|
||||
|
||||
objs = []
|
||||
for floorplan_path in self.list_floorplans():
|
||||
estimated_sqm, model_output = floorplan.calculate_ocr(floorplan_path)
|
||||
objs.append({
|
||||
'floorplan_path': str(floorplan_path),
|
||||
'estimated_sqm': estimated_sqm,
|
||||
'text': model_output,
|
||||
})
|
||||
|
||||
with open(self.path_floorplan_ocr_json(), 'w') as f:
|
||||
objs.append(
|
||||
{
|
||||
"floorplan_path": str(floorplan_path),
|
||||
"estimated_sqm": estimated_sqm,
|
||||
"text": model_output,
|
||||
}
|
||||
)
|
||||
|
||||
with open(self.path_floorplan_ocr_json(), "w") as f:
|
||||
json.dump(objs, f)
|
||||
|
||||
|
||||
@property
|
||||
def sqm_ocr(self, recalculate=False):
|
||||
if not self.path_floorplan_ocr_json().exists() or recalculate:
|
||||
self.calculate_sqm_ocr()
|
||||
|
||||
|
||||
with open(self.path_floorplan_ocr_json()) as f:
|
||||
objs = json.load(f)
|
||||
|
||||
|
||||
sqms = [o['estimated_sqm'] for o in objs if o['estimated_sqm'] is not None]
|
||||
|
||||
sqms = [o["estimated_sqm"] for o in objs if o["estimated_sqm"] is not None]
|
||||
if len(sqms) == 0:
|
||||
return None
|
||||
max_sqm = max(sqms)
|
||||
return max_sqm
|
||||
|
||||
|
||||
def calculate_route(self, dest_lat: float, dest_lon: float, recalculate=False):
|
||||
if self.path_routing_json().exists() and not recalculate:
|
||||
return
|
||||
|
||||
result = routing.transit_route(self.latitude, self.longitude, dest_lat, dest_lon)
|
||||
with open(self.path_routing_json(), 'w') as f:
|
||||
|
||||
result = routing.transit_route(
|
||||
self.latitude, self.longitude, dest_lat, dest_lon
|
||||
)
|
||||
with open(self.path_routing_json(), "w") as f:
|
||||
json.dump(result, f)
|
||||
|
||||
|
||||
@property
|
||||
def travel_time(self) -> List:
|
||||
if not self.path_routing_json().exists():
|
||||
return []
|
||||
with open(self.path_routing_json()) as f:
|
||||
d = json.load(f)
|
||||
|
||||
|
||||
return routing.extract_time(d)
|
||||
|
||||
|
||||
|
||||
@property
|
||||
def url(self):
|
||||
return f'https://www.rightmove.co.uk/properties/{self.identifier}'
|
||||
|
||||
return f"https://www.rightmove.co.uk/properties/{self.identifier}"
|
||||
|
||||
@property
|
||||
def detailobject(self):
|
||||
if self._cached is None:
|
||||
with open(self.path_detail_json()) as f:
|
||||
self._cached = json.load(f)
|
||||
return self._cached
|
||||
|
||||
|
||||
@property
|
||||
def price(self) -> float:
|
||||
return self.detailobject['property']['price']
|
||||
|
||||
return self.detailobject["property"]["price"]
|
||||
|
||||
@property
|
||||
def price_per_sqm(self) -> float:
|
||||
if self.sqm_ocr is None or self.sqm_ocr == 0:
|
||||
return None
|
||||
return self.price / self.sqm_ocr
|
||||
|
||||
|
||||
@property
|
||||
def bedrooms(self) -> int:
|
||||
return self.detailobject['property']['bedrooms']
|
||||
|
||||
return self.detailobject["property"]["bedrooms"]
|
||||
|
||||
@property
|
||||
def latitude(self) -> float:
|
||||
return self.detailobject['property']['latitude']
|
||||
|
||||
return self.detailobject["property"]["latitude"]
|
||||
|
||||
@property
|
||||
def longitude(self) -> float:
|
||||
return self.detailobject['property']['longitude']
|
||||
|
||||
return self.detailobject["property"]["longitude"]
|
||||
|
||||
@property
|
||||
def leaseLeft(self) -> int:
|
||||
ds = self.detailobject['property'].get('tenureInfo', {}).get('content', [])
|
||||
ds = self.detailobject["property"].get("tenureInfo", {}).get("content", [])
|
||||
for d in ds:
|
||||
if d['type'] == 'lengthOfLease':
|
||||
matches = re.findall(r'(\d+\.?\d*)', d['value'])
|
||||
if d["type"] == "lengthOfLease":
|
||||
matches = re.findall(r"(\d+\.?\d*)", d["value"])
|
||||
if len(matches):
|
||||
return float(matches[0])
|
||||
return None
|
||||
|
||||
|
||||
@property
|
||||
def development(self) -> bool:
|
||||
# aka new home
|
||||
return self.detailobject['property']['development']
|
||||
|
||||
return self.detailobject["property"]["development"]
|
||||
|
||||
def dict_nicely(self):
|
||||
return {
|
||||
'identifier': self.identifier,
|
||||
'sqm_ocr': self.sqm_ocr,
|
||||
'price': self.price,
|
||||
'price_per_sqm': self.price_per_sqm,
|
||||
'url': self.url,
|
||||
'bedrooms': self.bedrooms,
|
||||
'travel_time_fastest': self.travel_time[0],
|
||||
'travel_time_second': None if len(self.travel_time) < 2 else self.travel_time[1],
|
||||
'lease_left': self.leaseLeft,
|
||||
'development': self.development,
|
||||
"identifier": self.identifier,
|
||||
"sqm_ocr": self.sqm_ocr,
|
||||
"price": self.price,
|
||||
"price_per_sqm": self.price_per_sqm,
|
||||
"url": self.url,
|
||||
"bedrooms": self.bedrooms,
|
||||
"travel_time_fastest": self.travel_time[0],
|
||||
"travel_time_second": None
|
||||
if len(self.travel_time) < 2
|
||||
else self.travel_time[1],
|
||||
"lease_left": self.leaseLeft,
|
||||
"development": self.development,
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
listings = Listing.get_all_listings()
|
||||
print(listings[0].list_floorplans())
|
||||
|
|
|
|||
|
|
@ -1,32 +1,40 @@
|
|||
def record():
|
||||
from rec.query import listing_query, detail_query
|
||||
import json
|
||||
|
||||
|
||||
page = 1
|
||||
listing = listing_query(page, 2, 2, 5, 200000, 500000)
|
||||
with open(f'/Users/kadir/code/realestate/crawler/code/json/queries/listing{page}.json', 'w') as f:
|
||||
with open(
|
||||
f"/Users/kadir/code/realestate/crawler/code/json/queries/listing{page}.json",
|
||||
"w",
|
||||
) as f:
|
||||
json.dump(listing, f)
|
||||
|
||||
for prop in listing['properties']:
|
||||
identifier = prop['identifier']
|
||||
for prop in listing["properties"]:
|
||||
identifier = prop["identifier"]
|
||||
resp = detail_query(identifier)
|
||||
# print(identifier, resp.status_code)
|
||||
with open(f'/Users/kadir/code/realestate/crawler/code/json/queries/detail_{identifier}.json', 'w') as f:
|
||||
with open(
|
||||
f"/Users/kadir/code/realestate/crawler/code/json/queries/detail_{identifier}.json",
|
||||
"w",
|
||||
) as f:
|
||||
json.dump(resp, f)
|
||||
|
||||
|
||||
|
||||
def process():
|
||||
import json
|
||||
import pathlib
|
||||
path = pathlib.Path('/Users/kadir/code/realestate/crawler/code/json/queries/')
|
||||
|
||||
detailjsons = list(path.glob('detail_*json'))
|
||||
path = pathlib.Path("/Users/kadir/code/realestate/crawler/code/json/queries/")
|
||||
|
||||
detailjsons = list(path.glob("detail_*json"))
|
||||
for file in detailjsons:
|
||||
|
||||
with open(file) as f:
|
||||
js = json.load(f)
|
||||
|
||||
for floorplan in js['property']['floorplans']:
|
||||
print(floorplan['url'])
|
||||
|
||||
for floorplan in js["property"]["floorplans"]:
|
||||
print(floorplan["url"])
|
||||
|
||||
|
||||
# record()
|
||||
process()
|
||||
process()
|
||||
|
|
|
|||
|
|
@ -1,13 +1,13 @@
|
|||
import requests
|
||||
|
||||
headers = {
|
||||
'Host': 'media.rightmove.co.uk',
|
||||
"Host": "media.rightmove.co.uk",
|
||||
# 'Accept-Encoding': 'gzip, deflate, br',
|
||||
'User-Agent': 'okhttp/4.10.0',
|
||||
"User-Agent": "okhttp/4.10.0",
|
||||
}
|
||||
|
||||
response = requests.get(
|
||||
'https://media.rightmove.co.uk/47k/46001/138680705/46001_32532509_IMG_00_0000.jpeg',
|
||||
"https://media.rightmove.co.uk/47k/46001/138680705/46001_32532509_IMG_00_0000.jpeg",
|
||||
headers=headers,
|
||||
verify=False,
|
||||
)
|
||||
)
|
||||
|
|
|
|||
|
|
@ -1,58 +1,68 @@
|
|||
import requests
|
||||
|
||||
headers = {
|
||||
'Host': 'api.rightmove.co.uk',
|
||||
"Host": "api.rightmove.co.uk",
|
||||
# 'Accept-Encoding': 'gzip, deflate, br',
|
||||
'User-Agent': 'okhttp/4.10.0',
|
||||
'Connection': 'close',
|
||||
"User-Agent": "okhttp/4.10.0",
|
||||
"Connection": "close",
|
||||
}
|
||||
|
||||
params = {
|
||||
'locationIdentifier': 'POSTCODE^4228216',
|
||||
'channel': 'BUY',
|
||||
'page': '1',
|
||||
'numberOfPropertiesPerPage': '25',
|
||||
'radius': '3.0',
|
||||
'sortBy': 'distance',
|
||||
'includeUnavailableProperties': 'false',
|
||||
'propertyTypes': 'flat',
|
||||
'mustHave': 'newHome', # added manually later
|
||||
'dontShow': 'sharedOwnership,retirement',
|
||||
'minPrice': '150000',
|
||||
'maxPrice': '500000',
|
||||
'minBedrooms': '2',
|
||||
'maxBedrooms': '2',
|
||||
'apiApplication': 'ANDROID',
|
||||
'appVersion': '3.70.0',
|
||||
"locationIdentifier": "POSTCODE^4228216",
|
||||
"channel": "BUY",
|
||||
"page": "1",
|
||||
"numberOfPropertiesPerPage": "25",
|
||||
"radius": "3.0",
|
||||
"sortBy": "distance",
|
||||
"includeUnavailableProperties": "false",
|
||||
"propertyTypes": "flat",
|
||||
"mustHave": "newHome", # added manually later
|
||||
"dontShow": "sharedOwnership,retirement",
|
||||
"minPrice": "150000",
|
||||
"maxPrice": "500000",
|
||||
"minBedrooms": "2",
|
||||
"maxBedrooms": "2",
|
||||
"apiApplication": "ANDROID",
|
||||
"appVersion": "3.70.0",
|
||||
}
|
||||
|
||||
response = requests.get('https://api.rightmove.co.uk/api/property-listing', params=params, headers=headers, verify=False)
|
||||
response = requests.get(
|
||||
"https://api.rightmove.co.uk/api/property-listing",
|
||||
params=params,
|
||||
headers=headers,
|
||||
verify=False,
|
||||
)
|
||||
|
||||
import requests
|
||||
|
||||
headers = {
|
||||
'Host': 'api.rightmove.co.uk',
|
||||
"Host": "api.rightmove.co.uk",
|
||||
# 'Accept-Encoding': 'gzip, deflate, br',
|
||||
'User-Agent': 'okhttp/4.10.0',
|
||||
'Connection': 'close',
|
||||
"User-Agent": "okhttp/4.10.0",
|
||||
"Connection": "close",
|
||||
}
|
||||
|
||||
params = {
|
||||
'locationIdentifier': 'POSTCODE^4228216',
|
||||
'channel': 'BUY',
|
||||
'page': '2',
|
||||
'numberOfPropertiesPerPage': '25',
|
||||
'radius': '3.0',
|
||||
'sortBy': 'distance',
|
||||
'includeUnavailableProperties': 'false',
|
||||
'propertyTypes': 'flat',
|
||||
'dontShow': 'sharedOwnership,retirement',
|
||||
'minPrice': '150000',
|
||||
'maxPrice': '600000',
|
||||
'minBedrooms': '2',
|
||||
'maxBedrooms': '2',
|
||||
'apiApplication': 'ANDROID',
|
||||
'appVersion': '3.70.0',
|
||||
"locationIdentifier": "POSTCODE^4228216",
|
||||
"channel": "BUY",
|
||||
"page": "2",
|
||||
"numberOfPropertiesPerPage": "25",
|
||||
"radius": "3.0",
|
||||
"sortBy": "distance",
|
||||
"includeUnavailableProperties": "false",
|
||||
"propertyTypes": "flat",
|
||||
"dontShow": "sharedOwnership,retirement",
|
||||
"minPrice": "150000",
|
||||
"maxPrice": "600000",
|
||||
"minBedrooms": "2",
|
||||
"maxBedrooms": "2",
|
||||
"apiApplication": "ANDROID",
|
||||
"appVersion": "3.70.0",
|
||||
}
|
||||
|
||||
response = requests.get('https://api.rightmove.co.uk/api/property-listing', params=params, headers=headers, verify=False)
|
||||
response = requests.get(
|
||||
"https://api.rightmove.co.uk/api/property-listing",
|
||||
params=params,
|
||||
headers=headers,
|
||||
verify=False,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -1,16 +1,16 @@
|
|||
import requests
|
||||
|
||||
API_KEY = 'AIzaSyBoBHzeQFgR7O-NlNsuHXQcC1B7ccEHpl8'
|
||||
API_KEY = "AIzaSyBoBHzeQFgR7O-NlNsuHXQcC1B7ccEHpl8"
|
||||
url = "https://maps.googleapis.com/maps/api/distancematrix/json"
|
||||
origin = '51.5636306598907,-0.11061106079085892'
|
||||
origin = "51.5636306598907,-0.11061106079085892"
|
||||
dest = "51.53836609846008,-0.12743940233824352"
|
||||
|
||||
params = {
|
||||
"origins": origin,
|
||||
"destinations": dest,
|
||||
"key": API_KEY,
|
||||
"departure_time": "", # timstamp, optional
|
||||
"mode": "transit",
|
||||
"origins": origin,
|
||||
"destinations": dest,
|
||||
"key": API_KEY,
|
||||
"departure_time": "", # timstamp, optional
|
||||
"mode": "transit",
|
||||
}
|
||||
|
||||
r = requests.get(url, params=params)
|
||||
|
|
@ -18,6 +18,5 @@ print(r.status_code)
|
|||
|
||||
print(r.json())
|
||||
|
||||
with open('code/json/routing_distancematrix.json', 'w') as f:
|
||||
f.write(r.text)
|
||||
|
||||
with open("code/json/routing_distancematrix.json", "w") as f:
|
||||
f.write(r.text)
|
||||
|
|
|
|||
|
|
@ -2,83 +2,77 @@ import requests
|
|||
from utils import nextMonday
|
||||
from collections import defaultdict
|
||||
|
||||
API_KEY = 'AIzaSyBoBHzeQFgR7O-NlNsuHXQcC1B7ccEHpl8'
|
||||
API_KEY = "AIzaSyBoBHzeQFgR7O-NlNsuHXQcC1B7ccEHpl8"
|
||||
url = "https://routes.googleapis.com/directions/v2:computeRoutes"
|
||||
|
||||
|
||||
def travel_time(origin_lat:float, origin_lon:float, dest_lat:float, dest_lon:float):
|
||||
monday9am = nextMonday()
|
||||
def travel_time(origin_lat: float, origin_lon: float, dest_lat: float, dest_lon: float):
|
||||
monday9am = nextMonday()
|
||||
|
||||
header = {
|
||||
"X-Goog-Api-Key": API_KEY,
|
||||
"Content-Type": "application/json",
|
||||
"X-Goog-FieldMask": "routes.distanceMeters,routes.duration,routes.staticDuration,routes.legs.steps.distanceMeters,routes.legs.steps.staticDuration,routes.legs.steps.travelMode",
|
||||
}
|
||||
header = {
|
||||
"X-Goog-Api-Key": API_KEY,
|
||||
"Content-Type": "application/json",
|
||||
"X-Goog-FieldMask": "routes.distanceMeters,routes.duration,routes.staticDuration,routes.legs.steps.distanceMeters,routes.legs.steps.staticDuration,routes.legs.steps.travelMode",
|
||||
}
|
||||
|
||||
body = {
|
||||
"origin":{
|
||||
"location":{
|
||||
"latLng":{
|
||||
"latitude": origin_lat,
|
||||
"longitude": origin_lon
|
||||
}
|
||||
}
|
||||
},
|
||||
"destination":{
|
||||
"location":{
|
||||
"latLng":{
|
||||
"latitude": dest_lat,
|
||||
"longitude": dest_lon
|
||||
}
|
||||
}
|
||||
},
|
||||
"travelMode": "TRANSIT",
|
||||
# "2023-10-15T15:01:23.045123456Z"
|
||||
"departureTime": monday9am.strftime("%Y-%m-%dT%H:%M:%S.%fZ"),
|
||||
"computeAlternativeRoutes": False,
|
||||
# "routeModifiers": {
|
||||
# "avoidTolls": false,
|
||||
# "avoidHighways": false,
|
||||
# "avoidFerries": false
|
||||
# },
|
||||
"languageCode": "en-US",
|
||||
"units": "METRIC"
|
||||
}
|
||||
body = {
|
||||
"origin": {
|
||||
"location": {"latLng": {"latitude": origin_lat, "longitude": origin_lon}}
|
||||
},
|
||||
"destination": {
|
||||
"location": {"latLng": {"latitude": dest_lat, "longitude": dest_lon}}
|
||||
},
|
||||
"travelMode": "TRANSIT",
|
||||
# "2023-10-15T15:01:23.045123456Z"
|
||||
"departureTime": monday9am.strftime("%Y-%m-%dT%H:%M:%S.%fZ"),
|
||||
"computeAlternativeRoutes": False,
|
||||
# "routeModifiers": {
|
||||
# "avoidTolls": false,
|
||||
# "avoidHighways": false,
|
||||
# "avoidFerries": false
|
||||
# },
|
||||
"languageCode": "en-US",
|
||||
"units": "METRIC",
|
||||
}
|
||||
|
||||
r = requests.post(url, json=body, headers=header)
|
||||
if r.status_code == 200:
|
||||
return r.json()
|
||||
|
||||
raise Exception(r.json())
|
||||
|
||||
r = requests.post(url, json=body, headers=header)
|
||||
if r.status_code == 200:
|
||||
return r.json()
|
||||
|
||||
raise Exception(r.json())
|
||||
|
||||
def extract_time(d):
|
||||
r = d['routes'][0]
|
||||
print(r.keys())
|
||||
distance = r['distanceMeters']
|
||||
duration = r['duration']
|
||||
duration_static = r['staticDuration']
|
||||
|
||||
steps = r['legs'][0]['steps']
|
||||
# print(steps)
|
||||
duration_per_transit = defaultdict(lambda: 0)
|
||||
distance_per_transit = defaultdict(lambda: 0)
|
||||
|
||||
for step in steps:
|
||||
duration_per_transit[step['travelMode']] += int(step['staticDuration'].strip('s'))
|
||||
distance_per_transit[step['travelMode']] += step.get('distanceMeters', 0)
|
||||
|
||||
|
||||
print(f"dis {distance}, dur {duration}, duration per transit {dict(duration_per_transit)}, distance per transit {dict(distance_per_transit)}")
|
||||
|
||||
r = d["routes"][0]
|
||||
print(r.keys())
|
||||
distance = r["distanceMeters"]
|
||||
duration = r["duration"]
|
||||
duration_static = r["staticDuration"]
|
||||
|
||||
steps = r["legs"][0]["steps"]
|
||||
# print(steps)
|
||||
duration_per_transit = defaultdict(lambda: 0)
|
||||
distance_per_transit = defaultdict(lambda: 0)
|
||||
|
||||
for step in steps:
|
||||
duration_per_transit[step["travelMode"]] += int(
|
||||
step["staticDuration"].strip("s")
|
||||
)
|
||||
distance_per_transit[step["travelMode"]] += step.get("distanceMeters", 0)
|
||||
|
||||
print(
|
||||
f"dis {distance}, dur {duration}, duration per transit {dict(duration_per_transit)}, distance per transit {dict(distance_per_transit)}"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import json
|
||||
with open('code/json/routing_routeapi.json', 'r') as f:
|
||||
d = json.load(f)
|
||||
|
||||
extract_time(d)
|
||||
|
||||
|
||||
import json
|
||||
|
||||
with open("code/json/routing_routeapi.json", "r") as f:
|
||||
d = json.load(f)
|
||||
|
||||
extract_time(d)
|
||||
|
||||
|
||||
# if __name__ == "__main__":
|
||||
# origin = 51.5635664310333, -0.1107173751570373 # home
|
||||
|
|
@ -87,4 +81,3 @@ if __name__ == "__main__":
|
|||
# import json
|
||||
# with open('code/json/routing_routeapi.json', 'w') as f:
|
||||
# json.dump(d, f)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,15 +1,20 @@
|
|||
import requests
|
||||
|
||||
headers = {
|
||||
'Host': 'api.rightmove.co.uk',
|
||||
"Host": "api.rightmove.co.uk",
|
||||
# 'Accept-Encoding': 'gzip, deflate, br',
|
||||
'User-Agent': 'okhttp/4.10.0',
|
||||
'Connection': 'close',
|
||||
"User-Agent": "okhttp/4.10.0",
|
||||
"Connection": "close",
|
||||
}
|
||||
|
||||
params = {
|
||||
'apiApplication': 'ANDROID',
|
||||
'appVersion': '3.70.0',
|
||||
"apiApplication": "ANDROID",
|
||||
"appVersion": "3.70.0",
|
||||
}
|
||||
|
||||
response = requests.get('https://api.rightmove.co.uk/api/property/119578451', params=params, headers=headers, verify=False)
|
||||
response = requests.get(
|
||||
"https://api.rightmove.co.uk/api/property/119578451",
|
||||
params=params,
|
||||
headers=headers,
|
||||
verify=False,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -3,21 +3,22 @@ from PIL import Image
|
|||
from transformers import Pix2StructProcessor, Pix2StructForConditionalGeneration
|
||||
import pytesseract
|
||||
|
||||
|
||||
def inference(image_path):
|
||||
image = Image.open(image_path)
|
||||
question = "How many living rooms are displayed on this floor plan?" # not sure if it even has an effect
|
||||
processor = Pix2StructProcessor.from_pretrained('google/deplot')
|
||||
model = Pix2StructForConditionalGeneration.from_pretrained('google/deplot')
|
||||
question = "How many living rooms are displayed on this floor plan?" # not sure if it even has an effect
|
||||
processor = Pix2StructProcessor.from_pretrained("google/deplot")
|
||||
model = Pix2StructForConditionalGeneration.from_pretrained("google/deplot")
|
||||
|
||||
inputs = processor(images=image, text=question, return_tensors="pt")
|
||||
predictions = model.generate(**inputs, max_new_tokens=512)
|
||||
output = processor.decode(predictions[0], skip_special_tokens=True)
|
||||
|
||||
|
||||
return output, predictions
|
||||
|
||||
|
||||
|
||||
def extract_total_sqm(deplot_input_str):
|
||||
sqmregex = r'(\d+\.\d*) ?(sq ?m|sq. ?m)'
|
||||
sqmregex = r"(\d+\.\d*) ?(sq ?m|sq. ?m)"
|
||||
matches = re.findall(sqmregex, deplot_input_str.lower())
|
||||
if len(matches) == 0:
|
||||
return None
|
||||
|
|
|
|||
|
|
@ -18,10 +18,10 @@ headers = {
|
|||
|
||||
def detail_query(detail_id: int):
|
||||
params = {
|
||||
'apiApplication': 'ANDROID',
|
||||
'appVersion': '3.70.0',
|
||||
"apiApplication": "ANDROID",
|
||||
"appVersion": "3.70.0",
|
||||
}
|
||||
url = f'https://api.rightmove.co.uk/api/property/{detail_id}'
|
||||
url = f"https://api.rightmove.co.uk/api/property/{detail_id}"
|
||||
response = requests.get(url, params=params, headers=headers, verify=False)
|
||||
if response.status_code != 200:
|
||||
raise Exception("Failed due to: ", response.text)
|
||||
|
|
@ -30,7 +30,16 @@ def detail_query(detail_id: int):
|
|||
|
||||
|
||||
# @cache.memoize()
|
||||
def listing_query(page: int, min_bedrooms: int, max_bedrooms: int, radius: float, min_price: int, max_price: int, mustNewHome: bool = False, max_days_since_added: int = None) -> dict:
|
||||
def listing_query(
|
||||
page: int,
|
||||
min_bedrooms: int,
|
||||
max_bedrooms: int,
|
||||
radius: float,
|
||||
min_price: int,
|
||||
max_price: int,
|
||||
mustNewHome: bool = False,
|
||||
max_days_since_added: int = None,
|
||||
) -> dict:
|
||||
params = {
|
||||
"locationIdentifier": "POSTCODE^4228216",
|
||||
"channel": "BUY",
|
||||
|
|
@ -49,12 +58,12 @@ def listing_query(page: int, min_bedrooms: int, max_bedrooms: int, radius: float
|
|||
"appVersion": "3.70.0",
|
||||
}
|
||||
if max_days_since_added:
|
||||
if max_days_since_added not in [1,3,7,14]:
|
||||
raise Exception("Invalid max days. Can only be", [1,3,7,14])
|
||||
params['maxDaysSinceAdded'] = max_days_since_added
|
||||
|
||||
if max_days_since_added not in [1, 3, 7, 14]:
|
||||
raise Exception("Invalid max days. Can only be", [1, 3, 7, 14])
|
||||
params["maxDaysSinceAdded"] = max_days_since_added
|
||||
|
||||
if mustNewHome:
|
||||
params['mustHave'] = 'newHome'
|
||||
params["mustHave"] = "newHome"
|
||||
|
||||
response = requests.get(
|
||||
"https://api.rightmove.co.uk/api/property-listing",
|
||||
|
|
@ -69,7 +78,14 @@ def listing_query(page: int, min_bedrooms: int, max_bedrooms: int, radius: float
|
|||
|
||||
|
||||
if __name__ == "__main__":
|
||||
response = listing_query(page=1, min_bedrooms=2, max_bedrooms=2, radius=5.0, min_price=150000, max_price=700000)
|
||||
response = listing_query(
|
||||
page=1,
|
||||
min_bedrooms=2,
|
||||
max_bedrooms=2,
|
||||
radius=5.0,
|
||||
min_price=150000,
|
||||
max_price=700000,
|
||||
)
|
||||
resp = response
|
||||
for d in resp["properties"]:
|
||||
rl = RightmoveListing(
|
||||
|
|
|
|||
|
|
@ -2,100 +2,101 @@ import requests
|
|||
from rec.utils import nextMonday
|
||||
from collections import defaultdict
|
||||
|
||||
API_KEY = 'AIzaSyBoBHzeQFgR7O-NlNsuHXQcC1B7ccEHpl8'
|
||||
API_KEY = "AIzaSyBoBHzeQFgR7O-NlNsuHXQcC1B7ccEHpl8"
|
||||
url = "https://routes.googleapis.com/directions/v2:computeRoutes"
|
||||
|
||||
|
||||
def transit_route(origin_lat:float, origin_lon:float, dest_lat:float, dest_lon:float, compute_alternative_routes=True):
|
||||
monday9am = nextMonday()
|
||||
def transit_route(
|
||||
origin_lat: float,
|
||||
origin_lon: float,
|
||||
dest_lat: float,
|
||||
dest_lon: float,
|
||||
compute_alternative_routes=True,
|
||||
):
|
||||
monday9am = nextMonday()
|
||||
|
||||
header = {
|
||||
"X-Goog-Api-Key": API_KEY,
|
||||
"Content-Type": "application/json",
|
||||
"X-Goog-FieldMask": "routes.distanceMeters,routes.duration,routes.staticDuration,routes.legs.steps.distanceMeters,routes.legs.steps.staticDuration,routes.legs.steps.travelMode",
|
||||
}
|
||||
header = {
|
||||
"X-Goog-Api-Key": API_KEY,
|
||||
"Content-Type": "application/json",
|
||||
"X-Goog-FieldMask": "routes.distanceMeters,routes.duration,routes.staticDuration,routes.legs.steps.distanceMeters,routes.legs.steps.staticDuration,routes.legs.steps.travelMode",
|
||||
}
|
||||
|
||||
body = {
|
||||
"origin":{
|
||||
"location":{
|
||||
"latLng":{
|
||||
"latitude": origin_lat,
|
||||
"longitude": origin_lon
|
||||
}
|
||||
}
|
||||
},
|
||||
"destination":{
|
||||
"location":{
|
||||
"latLng":{
|
||||
"latitude": dest_lat,
|
||||
"longitude": dest_lon
|
||||
}
|
||||
}
|
||||
},
|
||||
"travelMode": "TRANSIT",
|
||||
# "2023-10-15T15:01:23.045123456Z"
|
||||
"departureTime": monday9am.strftime("%Y-%m-%dT%H:%M:%S.%fZ"),
|
||||
"computeAlternativeRoutes": compute_alternative_routes,
|
||||
# "routeModifiers": {
|
||||
# "avoidTolls": false,
|
||||
# "avoidHighways": false,
|
||||
# "avoidFerries": false
|
||||
# },
|
||||
"languageCode": "en-US",
|
||||
"units": "METRIC"
|
||||
}
|
||||
body = {
|
||||
"origin": {
|
||||
"location": {"latLng": {"latitude": origin_lat, "longitude": origin_lon}}
|
||||
},
|
||||
"destination": {
|
||||
"location": {"latLng": {"latitude": dest_lat, "longitude": dest_lon}}
|
||||
},
|
||||
"travelMode": "TRANSIT",
|
||||
# "2023-10-15T15:01:23.045123456Z"
|
||||
"departureTime": monday9am.strftime("%Y-%m-%dT%H:%M:%S.%fZ"),
|
||||
"computeAlternativeRoutes": compute_alternative_routes,
|
||||
# "routeModifiers": {
|
||||
# "avoidTolls": false,
|
||||
# "avoidHighways": false,
|
||||
# "avoidFerries": false
|
||||
# },
|
||||
"languageCode": "en-US",
|
||||
"units": "METRIC",
|
||||
}
|
||||
|
||||
r = requests.post(url, json=body, headers=header)
|
||||
if r.status_code == 200:
|
||||
return r.json()
|
||||
|
||||
raise Exception(r.json())
|
||||
r = requests.post(url, json=body, headers=header)
|
||||
if r.status_code == 200:
|
||||
return r.json()
|
||||
|
||||
raise Exception(r.json())
|
||||
|
||||
|
||||
def extract_time(d, limit: int = 2):
|
||||
res = []
|
||||
for route in d["routes"]:
|
||||
distance = route["distanceMeters"]
|
||||
duration = int(route["duration"].strip("s"))
|
||||
duration_static = int(route["staticDuration"].strip("s"))
|
||||
|
||||
steps = route["legs"][0]["steps"]
|
||||
initial_walk_duration = 0
|
||||
used_transit = False
|
||||
duration_per_transit = defaultdict(lambda: 0)
|
||||
distance_per_transit = defaultdict(lambda: 0)
|
||||
number_of_transit_stops = 0
|
||||
|
||||
for step in steps:
|
||||
if used_transit == False and step["travelMode"] == "WALK":
|
||||
initial_walk_duration += int(step["staticDuration"].strip("s"))
|
||||
else:
|
||||
used_transit = True
|
||||
duration_per_transit[step["travelMode"]] += int(
|
||||
step["staticDuration"].strip("s")
|
||||
)
|
||||
distance_per_transit[step["travelMode"]] += step.get("distanceMeters", 0)
|
||||
if step["travelMode"] == "TRANSIT":
|
||||
number_of_transit_stops += 1
|
||||
|
||||
res.append(
|
||||
{
|
||||
"duration": duration,
|
||||
"distance": distance,
|
||||
"duration_static": duration_static,
|
||||
"initial_walk_duration": initial_walk_duration,
|
||||
"duration_per_transit": dict(duration_per_transit),
|
||||
"distance_per_transit": dict(distance_per_transit),
|
||||
"number_of_transit_stops": number_of_transit_stops,
|
||||
}
|
||||
)
|
||||
|
||||
return res[:limit]
|
||||
|
||||
def extract_time(d, limit:int=2):
|
||||
res = []
|
||||
for route in d['routes']:
|
||||
distance = route['distanceMeters']
|
||||
duration = int(route['duration'].strip('s'))
|
||||
duration_static = int(route['staticDuration'].strip('s'))
|
||||
|
||||
steps = route['legs'][0]['steps']
|
||||
initial_walk_duration = 0
|
||||
used_transit = False
|
||||
duration_per_transit = defaultdict(lambda: 0)
|
||||
distance_per_transit = defaultdict(lambda: 0)
|
||||
number_of_transit_stops = 0
|
||||
|
||||
for step in steps:
|
||||
if used_transit == False and step['travelMode'] == 'WALK':
|
||||
initial_walk_duration += int(step['staticDuration'].strip('s'))
|
||||
else:
|
||||
used_transit = True
|
||||
duration_per_transit[step['travelMode']] += int(step['staticDuration'].strip('s'))
|
||||
distance_per_transit[step['travelMode']] += step.get('distanceMeters', 0)
|
||||
if step['travelMode'] == 'TRANSIT':
|
||||
number_of_transit_stops += 1
|
||||
|
||||
res.append({
|
||||
'duration': duration,
|
||||
'distance': distance,
|
||||
'duration_static': duration_static,
|
||||
'initial_walk_duration': initial_walk_duration,
|
||||
'duration_per_transit': dict(duration_per_transit),
|
||||
'distance_per_transit': dict(distance_per_transit),
|
||||
'number_of_transit_stops': number_of_transit_stops,
|
||||
})
|
||||
|
||||
return res[:limit]
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import json
|
||||
with open('code/json/routing_routeapi.json', 'r') as f:
|
||||
d = json.load(f)
|
||||
|
||||
extract_time(d)
|
||||
|
||||
|
||||
import json
|
||||
|
||||
with open("code/json/routing_routeapi.json", "r") as f:
|
||||
d = json.load(f)
|
||||
|
||||
extract_time(d)
|
||||
|
||||
|
||||
# if __name__ == "__main__":
|
||||
# origin = 51.5635664310333, -0.1107173751570373 # home
|
||||
|
|
@ -104,4 +105,3 @@ if __name__ == "__main__":
|
|||
# import json
|
||||
# with open('code/json/routing_routeapi.json', 'w') as f:
|
||||
# json.dump(d, f)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
from datetime import datetime, timedelta, timezone
|
||||
|
||||
|
||||
def nextMonday():
|
||||
"""
|
||||
I think this function doesnt work when the day is monday itself.
|
||||
|
|
@ -10,8 +11,11 @@ def nextMonday():
|
|||
now = datetime.now(timezone.utc)
|
||||
days_until_monday = (0 - now.weekday() + 7) % 7
|
||||
monday = now + timedelta(days=days_until_monday)
|
||||
monday_9am = monday.replace(hour=9, minute=0, second=0, microsecond=0, tzinfo=timezone.utc)
|
||||
monday_9am = monday.replace(
|
||||
hour=9, minute=0, second=0, microsecond=0, tzinfo=timezone.utc
|
||||
)
|
||||
return monday_9am
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
if __name__ == "__main__":
|
||||
print(nextMonday())
|
||||
|
|
|
|||
|
|
@ -1,10 +1,9 @@
|
|||
from rec.db import RightmoveListing, session
|
||||
from sqlalchemy import select
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
print("x")
|
||||
x = select(RightmoveListing).where(RightmoveListing.price <600000)
|
||||
x = select(RightmoveListing).where(RightmoveListing.price < 600000)
|
||||
print("y")
|
||||
d = list(session.execute(x))
|
||||
print(d)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue