145 lines
4.5 KiB
Python
145 lines
4.5 KiB
Python
from dataclasses import dataclass
|
|
import json
|
|
import pathlib
|
|
from typing import List, Dict
|
|
from rec import floorplan
|
|
|
|
_DATA_DIR = pathlib.Path('data/rs/')
|
|
|
|
@dataclass()
|
|
class Listing():
|
|
identifier: int
|
|
_cached: Dict = None
|
|
|
|
@staticmethod
|
|
def get_all_listings() -> List['Listing']:
|
|
listing_paths = sorted(list(_DATA_DIR.glob('*/listing.json')))
|
|
identifiers = []
|
|
for listing_path in listing_paths:
|
|
with open(listing_path) as f:
|
|
d = json.load(f)
|
|
identifiers.append(Listing(d['identifier']))
|
|
|
|
return identifiers
|
|
|
|
def path_listing(self) -> pathlib.Path:
|
|
return _DATA_DIR / str(self.identifier)
|
|
|
|
def path_listing_json(self) -> pathlib.Path:
|
|
return self.path_listing() / 'listing.json'
|
|
|
|
def path_detail_json(self) -> pathlib.Path:
|
|
return self.path_listing() / 'detail.json'
|
|
|
|
def path_floorplan_model_json(self) -> pathlib.Path:
|
|
return self.path_listing() / 'floorplan_model.json'
|
|
|
|
def path_floorplan_ocr_json(self) -> pathlib.Path:
|
|
return self.path_listing() / 'floorplan_ocr.json'
|
|
|
|
def path_pic_folder(self) -> pathlib.Path:
|
|
return self.path_listing() / 'pics'
|
|
|
|
def path_pic_folder(self, order, name) -> pathlib.Path:
|
|
return self.path_pic_folder() / f'{order}_{name}'
|
|
|
|
|
|
def path_floorplan_folder(self) -> pathlib.Path:
|
|
return self.path_listing() / 'floorplans'
|
|
|
|
def path_floorplan_file(self, order, name) -> pathlib.Path:
|
|
return self.path_pic_folder() / f'{order}_{name}'
|
|
|
|
def list_floorplans(self):
|
|
images = list(self.path_floorplan_folder().glob('*'))
|
|
# todo add check if return is image
|
|
return images
|
|
|
|
def calculate_sqm_model(self):
|
|
objs = []
|
|
for floorplan_path in self.list_floorplans():
|
|
estimated_sqm, model_output, predictions = floorplan.calculate_model(floorplan_path)
|
|
objs.append({
|
|
'floorplan_path': str(floorplan_path),
|
|
'estimated_sqm': estimated_sqm,
|
|
'model_output': model_output,
|
|
'no_predictions': len(predictions) # cant serialize the predictions itself since its a tensor
|
|
})
|
|
|
|
with open(self.path_floorplan_model_json(), 'w') as f:
|
|
json.dump(objs, f)
|
|
|
|
@property
|
|
def sqm_model(self, recalculate=True):
|
|
if not self.path_floorplan_model_json().exists() or recalculate:
|
|
self.calculate_sqm_model()
|
|
|
|
with open(self.path_floorplan_json()) as f:
|
|
objs = json.load(f)
|
|
|
|
max_sqm = max([o['estimated_sqm'] for o in objs if o is None]) # filter out Nones
|
|
return max_sqm
|
|
|
|
def calculate_sqm_ocr(self):
|
|
objs = []
|
|
for floorplan_path in self.list_floorplans():
|
|
estimated_sqm, model_output = floorplan.calculate_ocr(floorplan_path)
|
|
objs.append({
|
|
'floorplan_path': str(floorplan_path),
|
|
'estimated_sqm': estimated_sqm,
|
|
'text': model_output,
|
|
})
|
|
|
|
with open(self.path_floorplan_ocr_json(), 'w') as f:
|
|
json.dump(objs, f)
|
|
|
|
@property
|
|
def sqm_ocr(self, recalculate=False):
|
|
if not self.path_floorplan_ocr_json().exists() or recalculate:
|
|
self.calculate_sqm_ocr()
|
|
|
|
with open(self.path_floorplan_ocr_json()) as f:
|
|
objs = json.load(f)
|
|
|
|
|
|
sqms = [o['estimated_sqm'] for o in objs if o['estimated_sqm'] is not None]
|
|
if len(sqms) == 0:
|
|
return None
|
|
max_sqm = max(sqms)
|
|
return max_sqm
|
|
|
|
@property
|
|
def url(self):
|
|
return f'https://www.rightmove.co.uk/properties/{self.identifier}'
|
|
|
|
@property
|
|
def detailobject(self):
|
|
if self._cached is None:
|
|
with open(self.path_detail_json()) as f:
|
|
self._cached = json.load(f)
|
|
return self._cached
|
|
|
|
@property
|
|
def price(self) -> float:
|
|
return self.detailobject['property']['price']
|
|
|
|
@property
|
|
def price_per_sqm(self) -> float:
|
|
if self.sqm_ocr is None:
|
|
return None
|
|
return self.price / self.sqm_ocr
|
|
|
|
def dict_nicely(self):
|
|
return {
|
|
'sqm_ocr': self.sqm_ocr,
|
|
'price': self.price,
|
|
'price_per_sqm': self.price_per_sqm,
|
|
'url': self.url,
|
|
}
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
listings = Listing.get_all_listings()
|
|
print(listings[0].list_floorplans())
|