Real crawling scripts and floorplan detection
1. get all listings 2. get all detail jsons 3. get all images 4. get all floorplans 5. detecting floorplans Also updating dependencies for huggingface etc.
This commit is contained in:
parent
46bb641026
commit
508aa02812
12 changed files with 1531 additions and 170 deletions
30
crawler/rec/floorplan.py
Normal file
30
crawler/rec/floorplan.py
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
import re
|
||||
from PIL import Image
|
||||
from transformers import Pix2StructProcessor, Pix2StructForConditionalGeneration
|
||||
|
||||
def inference(image_path):
|
||||
image = Image.open(image_path)
|
||||
question = "How many living rooms are displayed on this floor plan?" # not sure if it even has an effect
|
||||
processor = Pix2StructProcessor.from_pretrained('google/deplot')
|
||||
model = Pix2StructForConditionalGeneration.from_pretrained('google/deplot')
|
||||
|
||||
inputs = processor(images=image, text=question, return_tensors="pt")
|
||||
predictions = model.generate(**inputs, max_new_tokens=512)
|
||||
output = processor.decode(predictions[0], skip_special_tokens=True)
|
||||
|
||||
return output, predictions
|
||||
|
||||
|
||||
def extract_total_sqm(deplot_input_str):
|
||||
sqmregex = r'(\d+\.\d*) ?(sqm|sq.m|sq m|m)'
|
||||
matches = re.findall(sqmregex, deplot_input_str.lower())
|
||||
if len(matches) == 0:
|
||||
return None
|
||||
sqms = [float(m[0]) for m in matches]
|
||||
return max(sqms)
|
||||
|
||||
|
||||
def calculate(image_path):
|
||||
output, predictions_tensor = inference(image_path)
|
||||
estimated_sqm = extract_total_sqm()
|
||||
return estimated_sqm, output, predictions_tensor
|
||||
Loading…
Add table
Add a link
Reference in a new issue