from dataclasses import dataclass import json import pathlib from typing import List from rec import floorplan _DATA_DIR = pathlib.Path('data/rs/') @dataclass() class Listing(): identifier: int @staticmethod def get_all_listings() -> List['Listing']: listing_paths = sorted(list(_DATA_DIR.glob('*/listing.json'))) identifiers = [] for listing_path in listing_paths: with open(listing_path) as f: d = json.load(f) identifiers.append(Listing(d['identifier'])) return identifiers def path_listing(self) -> pathlib.Path: return _DATA_DIR / str(self.identifier) def path_listing_json(self) -> pathlib.Path: return self.path_listing() / 'listing.json' def path_detail_json(self) -> pathlib.Path: return self.path_listing() / 'detail.json' def path_floorplan_model_json(self) -> pathlib.Path: return self.path_listing() / 'floorplan_model.json' def path_floorplan_ocr_json(self) -> pathlib.Path: return self.path_listing() / 'floorplan_ocr.json' def path_pic_folder(self) -> pathlib.Path: return self.path_listing() / 'pics' def path_pic_folder(self, order, name) -> pathlib.Path: return self.path_pic_folder() / f'{order}_{name}' def path_floorplan_folder(self) -> pathlib.Path: return self.path_listing() / 'floorplans' def path_floorplan_file(self, order, name) -> pathlib.Path: return self.path_pic_folder() / f'{order}_{name}' def list_floorplans(self): images = list(self.path_floorplan_folder().glob('*')) # todo add check if return is image return images def calculate_sqm_model(self): objs = [] for floorplan_path in self.list_floorplans(): estimated_sqm, model_output, predictions = floorplan.calculate_model(floorplan_path) objs.append({ 'floorplan_path': str(floorplan_path), 'estimated_sqm': estimated_sqm, 'model_output': model_output, 'no_predictions': len(predictions) # cant serialize the predictions itself since its a tensor }) with open(self.path_floorplan_model_json(), 'w') as f: json.dump(objs, f) @property def sqm_model(self, recalculate=True): if recalculate and not self.path_floorplan_model_json().exists(): self.calculate_sqm_model() with open(self.path_floorplan_json()) as f: objs = json.load(f) max_sqm = max([o['estimated_sqm'] for o in objs if o is None]) # filter out Nones return max_sqm def calculate_sqm_ocr(self): objs = [] for floorplan_path in self.list_floorplans(): estimated_sqm, model_output = floorplan.calculate_ocr(floorplan_path) objs.append({ 'floorplan_path': str(floorplan_path), 'estimated_sqm': estimated_sqm, 'text': model_output, }) with open(self.path_floorplan_ocr_json(), 'w') as f: json.dump(objs, f) @property def sqm_ocr(self, recalculate=True): if recalculate and not self.path_floorplan_ocr_json().exists(): self.calculate_sqm_ocr() with open(self.path_floorplan_ocr_json()) as f: objs = json.load(f) max_sqm = max([o['estimated_sqm'] for o in objs if o is None]) # filter out Nones return max_sqm if __name__ == '__main__': listings = Listing.get_all_listings() print(listings[0].list_floorplans())