fixing floorplan detection and adding recalculation method

This commit is contained in:
Kadir 2024-03-18 00:56:39 +00:00
parent 335adc0856
commit 4dea766a12
4 changed files with 21 additions and 4 deletions

View file

@ -3,13 +3,13 @@ import pathlib
import json
from data_access import Listing
d = listing_query(1, 3, 3, 15, 0, 800000)
d = listing_query(1, 3, 3, 15, 0, 800000, max_days_since_added=7)
folder = pathlib.Path("data/rs/")
for i in range(1, 10000):
try:
print(f"page {i}")
d = listing_query(i, 3, 3, 15, 0, 800000, max_days_since_added=3)
d = listing_query(i, 3, 3, 15, 0, 800000, max_days_since_added=1)
except:
break

View file

@ -0,0 +1,15 @@
# recalculate regex from sqm from already previously ocr'ed text
import json
from rec.floorplan import extract_total_sqm
from tqdm import tqdm
from data_access import Listing
for listing in tqdm(list(Listing.get_all_listings())):
with open(listing.path_floorplan_ocr_json()) as f:
floorplans = json.load(f)
for floorplan in floorplans:
floorplan['estimated_sqm'] = extract_total_sqm(floorplan['text'])
with open(listing.path_floorplan_ocr_json(), 'w') as f:
floorplans = json.dump(floorplans, f)

View file

@ -154,7 +154,9 @@ class Listing():
@property
def price_per_sqm(self) -> float:
if self.sqm_ocr is None:
if self.sqm_ocr == 0:
print(self.identifier)
if self.sqm_ocr is None or self.sqm_ocr == 0:
return None
return self.price / self.sqm_ocr

View file

@ -17,7 +17,7 @@ def inference(image_path):
def extract_total_sqm(deplot_input_str):
sqmregex = r'(\d+\.\d*) ?(sqm|sq.m|sq m|m)'
sqmregex = r'(\d+\.\d*) ?(sq ?m|sq. ?m)'
matches = re.findall(sqmregex, deplot_input_str.lower())
if len(matches) == 0:
return None