Real crawling scripts and floorplan detection

1. get all listings
2. get all detail jsons
3. get all images
4. get all floorplans
5. detecting floorplans

Also updating dependencies for huggingface etc.
This commit is contained in:
Kadir 2024-03-10 18:49:39 +00:00
parent 46bb641026
commit 508aa02812
12 changed files with 1531 additions and 170 deletions

30
crawler/rec/floorplan.py Normal file
View file

@ -0,0 +1,30 @@
import re
from PIL import Image
from transformers import Pix2StructProcessor, Pix2StructForConditionalGeneration
def inference(image_path):
image = Image.open(image_path)
question = "How many living rooms are displayed on this floor plan?" # not sure if it even has an effect
processor = Pix2StructProcessor.from_pretrained('google/deplot')
model = Pix2StructForConditionalGeneration.from_pretrained('google/deplot')
inputs = processor(images=image, text=question, return_tensors="pt")
predictions = model.generate(**inputs, max_new_tokens=512)
output = processor.decode(predictions[0], skip_special_tokens=True)
return output, predictions
def extract_total_sqm(deplot_input_str):
sqmregex = r'(\d+\.\d*) ?(sqm|sq.m|sq m|m)'
matches = re.findall(sqmregex, deplot_input_str.lower())
if len(matches) == 0:
return None
sqms = [float(m[0]) for m in matches]
return max(sqms)
def calculate(image_path):
output, predictions_tensor = inference(image_path)
estimated_sqm = extract_total_sqm()
return estimated_sqm, output, predictions_tensor