from dataclasses import dataclass import json import pathlib from typing import List, Dict from rec import floorplan, routing import re _DATA_DIR = pathlib.Path('data/rs/') @dataclass() class Listing(): identifier: int _cached: Dict = None @staticmethod def get_all_listings() -> List['Listing']: listing_paths = sorted(list(_DATA_DIR.glob('*/listing.json'))) identifiers = [] for listing_path in listing_paths: with open(listing_path) as f: d = json.load(f) identifiers.append(Listing(d['identifier'])) return identifiers def path_listing(self) -> pathlib.Path: p = _DATA_DIR / str(self.identifier) p.mkdir(parents=True, exist_ok=True) return p def path_listing_json(self) -> pathlib.Path: return self.path_listing() / 'listing.json' def path_detail_json(self) -> pathlib.Path: return self.path_listing() / 'detail.json' def path_routing_json(self) -> pathlib.Path: return self.path_listing() / 'routing.json' def path_floorplan_model_json(self) -> pathlib.Path: return self.path_listing() / 'floorplan_model.json' def path_floorplan_ocr_json(self) -> pathlib.Path: return self.path_listing() / 'floorplan_ocr.json' def path_pic_folder(self) -> pathlib.Path: return self.path_listing() / 'pics' def path_pic_file(self, order, name) -> pathlib.Path: self.path_pic_folder().mkdir(parents=True, exist_ok=True) return self.path_pic_folder() / f'{order}_{name}' def path_floorplan_folder(self) -> pathlib.Path: return self.path_listing() / 'floorplans' def path_floorplan_file(self, order, name) -> pathlib.Path: self.path_floorplan_folder().mkdir(parents=True, exist_ok=True) return self.path_floorplan_folder() / f'{order}_{name}' def list_floorplans(self): images = list(self.path_floorplan_folder().glob('*')) # todo add check if return is image return images def calculate_sqm_model(self): objs = [] for floorplan_path in self.list_floorplans(): estimated_sqm, model_output, predictions = floorplan.calculate_model(floorplan_path) objs.append({ 'floorplan_path': str(floorplan_path), 'estimated_sqm': estimated_sqm, 'model_output': model_output, 'no_predictions': len(predictions) # cant serialize the predictions itself since its a tensor }) with open(self.path_floorplan_model_json(), 'w') as f: json.dump(objs, f) @property def sqm_model(self, recalculate=True): if not self.path_floorplan_model_json().exists() or recalculate: self.calculate_sqm_model() with open(self.path_floorplan_json()) as f: objs = json.load(f) max_sqm = max([o['estimated_sqm'] for o in objs if o is None]) # filter out Nones return max_sqm def calculate_sqm_ocr(self, recalculate=True): if not recalculate and self.path_floorplan_ocr_json().exists(): return objs = [] for floorplan_path in self.list_floorplans(): estimated_sqm, model_output = floorplan.calculate_ocr(floorplan_path) objs.append({ 'floorplan_path': str(floorplan_path), 'estimated_sqm': estimated_sqm, 'text': model_output, }) with open(self.path_floorplan_ocr_json(), 'w') as f: json.dump(objs, f) @property def sqm_ocr(self, recalculate=False): if not self.path_floorplan_ocr_json().exists() or recalculate: self.calculate_sqm_ocr() with open(self.path_floorplan_ocr_json()) as f: objs = json.load(f) sqms = [o['estimated_sqm'] for o in objs if o['estimated_sqm'] is not None] if len(sqms) == 0: return None max_sqm = max(sqms) return max_sqm def calculate_route(self, dest_lat: float, dest_lon: float, recalculate=False): if self.path_routing_json().exists() and not recalculate: return result = routing.transit_route(self.latitude, self.longitude, dest_lat, dest_lon) with open(self.path_routing_json(), 'w') as f: json.dump(result, f) @property def travel_time(self) -> List: if not self.path_routing_json().exists(): return [] with open(self.path_routing_json()) as f: d = json.load(f) return routing.extract_time(d) @property def url(self): return f'https://www.rightmove.co.uk/properties/{self.identifier}' @property def detailobject(self): if self._cached is None: with open(self.path_detail_json()) as f: self._cached = json.load(f) return self._cached @property def price(self) -> float: return self.detailobject['property']['price'] @property def price_per_sqm(self) -> float: if self.sqm_ocr is None or self.sqm_ocr == 0: return None return self.price / self.sqm_ocr @property def bedrooms(self) -> int: return self.detailobject['property']['bedrooms'] @property def latitude(self) -> float: return self.detailobject['property']['latitude'] @property def longitude(self) -> float: return self.detailobject['property']['longitude'] @property def leaseLeft(self) -> int: ds = self.detailobject['property'].get('tenureInfo', {}).get('content', []) for d in ds: if d['type'] == 'lengthOfLease': matches = re.findall(r'(\d+\.?\d*)', d['value']) if len(matches): return float(matches[0]) return None @property def development(self) -> bool: # aka new home return self.detailobject['property']['development'] def dict_nicely(self): return { 'sqm_ocr': self.sqm_ocr, 'price': self.price, 'price_per_sqm': self.price_per_sqm, 'url': self.url, 'bedrooms': self.bedrooms, 'travel_time_fastest': self.travel_time[0], 'travel_time_second': None if len(self.travel_time) < 2 else self.travel_time[1], 'lease_left': self.leaseLeft, 'development': self.development, } if __name__ == '__main__': listings = Listing.get_all_listings() print(listings[0].list_floorplans())