adding tesseract OCR for floorplan detection

This commit is contained in:
Kadir 2024-03-10 22:32:34 +00:00
parent 508aa02812
commit d108bf11ee
8 changed files with 153 additions and 29 deletions

View file

@ -30,8 +30,11 @@ class Listing():
def path_detail_json(self) -> pathlib.Path:
return self.path_listing() / 'detail.json'
def path_floorplan_json(self) -> pathlib.Path:
return self.path_listing() / 'floorplan.json'
def path_floorplan_model_json(self) -> pathlib.Path:
return self.path_listing() / 'floorplan_model.json'
def path_floorplan_ocr_json(self) -> pathlib.Path:
return self.path_listing() / 'floorplan_ocr.json'
def path_pic_folder(self) -> pathlib.Path:
return self.path_listing() / 'pics'
@ -51,36 +54,58 @@ class Listing():
# todo add check if return is image
return images
def calculate_sqm(self):
def calculate_sqm_model(self):
objs = []
for floorplan_path in self.list_floorplans():
estimated_sqm, model_output, predictions = floorplan.calculate(floorplan_path)
estimated_sqm, model_output, predictions = floorplan.calculate_model(floorplan_path)
objs.append({
'floorplan_path': floorplan_path,
'floorplan_path': str(floorplan_path),
'estimated_sqm': estimated_sqm,
'model_output': model_output,
'no_predictions': len(predictions) # cant serialize the predictions itself since its a tensor
})
with open(self.path_floorplan_json(), 'w') as f:
with open(self.path_floorplan_model_json(), 'w') as f:
json.dump(objs, f)
max_sqm = max([o['estimated_sqm'] for o in objs if o is None]) # filter out Nones
return max_sqm
@property
def sqm(self, recalculate=True):
if recalculate and not self.path_floorplan_json().exists():
self.calculate_sqm()
def sqm_model(self, recalculate=True):
if recalculate and not self.path_floorplan_model_json().exists():
self.calculate_sqm_model()
with open(self.path_floorplan_json()) as f:
objs = json.load(f)
max_sqm = max([o['estimated_sqm'] for o in objs if o is None]) # filter out Nones
return max_sqm
def calculate_sqm_ocr(self):
objs = []
for floorplan_path in self.list_floorplans():
estimated_sqm, model_output = floorplan.calculate_ocr(floorplan_path)
objs.append({
'floorplan_path': str(floorplan_path),
'estimated_sqm': estimated_sqm,
'text': model_output,
})
with open(self.path_floorplan_ocr_json(), 'w') as f:
json.dump(objs, f)
@property
def sqm_ocr(self, recalculate=True):
if recalculate and not self.path_floorplan_ocr_json().exists():
self.calculate_sqm_ocr()
with open(self.path_floorplan_ocr_json()) as f:
objs = json.load(f)
max_sqm = max([o['estimated_sqm'] for o in objs if o is None]) # filter out Nones
return max_sqm
if __name__ == '__main__':
listings = Listing.get_all_listings()
print(listings[0].list_floorplans())
print(listings[0].list_floorplans())