fixing floorplan detection and adding recalculation method
This commit is contained in:
parent
335adc0856
commit
4dea766a12
4 changed files with 21 additions and 4 deletions
|
|
@ -3,13 +3,13 @@ import pathlib
|
|||
import json
|
||||
from data_access import Listing
|
||||
|
||||
d = listing_query(1, 3, 3, 15, 0, 800000)
|
||||
d = listing_query(1, 3, 3, 15, 0, 800000, max_days_since_added=7)
|
||||
folder = pathlib.Path("data/rs/")
|
||||
|
||||
for i in range(1, 10000):
|
||||
try:
|
||||
print(f"page {i}")
|
||||
d = listing_query(i, 3, 3, 15, 0, 800000, max_days_since_added=3)
|
||||
d = listing_query(i, 3, 3, 15, 0, 800000, max_days_since_added=1)
|
||||
except:
|
||||
break
|
||||
|
||||
|
|
|
|||
15
crawler/9_recalculate_regex_squaremeter.py
Normal file
15
crawler/9_recalculate_regex_squaremeter.py
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
# recalculate regex from sqm from already previously ocr'ed text
|
||||
import json
|
||||
from rec.floorplan import extract_total_sqm
|
||||
from tqdm import tqdm
|
||||
from data_access import Listing
|
||||
|
||||
for listing in tqdm(list(Listing.get_all_listings())):
|
||||
with open(listing.path_floorplan_ocr_json()) as f:
|
||||
floorplans = json.load(f)
|
||||
|
||||
for floorplan in floorplans:
|
||||
floorplan['estimated_sqm'] = extract_total_sqm(floorplan['text'])
|
||||
|
||||
with open(listing.path_floorplan_ocr_json(), 'w') as f:
|
||||
floorplans = json.dump(floorplans, f)
|
||||
|
|
@ -154,7 +154,9 @@ class Listing():
|
|||
|
||||
@property
|
||||
def price_per_sqm(self) -> float:
|
||||
if self.sqm_ocr is None:
|
||||
if self.sqm_ocr == 0:
|
||||
print(self.identifier)
|
||||
if self.sqm_ocr is None or self.sqm_ocr == 0:
|
||||
return None
|
||||
return self.price / self.sqm_ocr
|
||||
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ def inference(image_path):
|
|||
|
||||
|
||||
def extract_total_sqm(deplot_input_str):
|
||||
sqmregex = r'(\d+\.\d*) ?(sqm|sq.m|sq m|m)'
|
||||
sqmregex = r'(\d+\.\d*) ?(sq ?m|sq. ?m)'
|
||||
matches = re.findall(sqmregex, deplot_input_str.lower())
|
||||
if len(matches) == 0:
|
||||
return None
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue