From 4dfbcc64c1c23fb43a0918afe6574a2694acbe02 Mon Sep 17 00:00:00 2001 From: Kadir Date: Sun, 25 Aug 2024 12:11:55 +0200 Subject: [PATCH] floorplan rec: fix regex rule + filter out extreme values --- crawler/rec/floorplan.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/crawler/rec/floorplan.py b/crawler/rec/floorplan.py index 562ab08..98c588a 100644 --- a/crawler/rec/floorplan.py +++ b/crawler/rec/floorplan.py @@ -18,12 +18,13 @@ def inference(image_path): def extract_total_sqm(input_str: str): - sqmregex = r"(\d+\.\d*) ?(sq ?m|sq. ?m)" + sqmregex = r"(\d+\.?\d*) ?(sq ?m|sq. ?m)" matches = re.findall(sqmregex, input_str.lower()) - if len(matches) == 0: - return None sqms = [float(m[0]) for m in matches] - return max(sqms) + filtered = [sqm for sqm in sqms if 30 < sqm < 160] + if len(filtered) == 0: + return None + return max(filtered) def calculate_model(image_path):