wrongmove/crawler/rec/floorplan.py

40 lines
1.3 KiB
Python
Raw Normal View History

import re
from PIL import Image
from transformers import Pix2StructProcessor, Pix2StructForConditionalGeneration
import pytesseract
2024-03-25 20:48:48 +00:00
def inference(image_path):
image = Image.open(image_path)
2024-03-25 20:48:48 +00:00
question = "How many living rooms are displayed on this floor plan?" # not sure if it even has an effect
processor = Pix2StructProcessor.from_pretrained("google/deplot")
model = Pix2StructForConditionalGeneration.from_pretrained("google/deplot")
inputs = processor(images=image, text=question, return_tensors="pt")
predictions = model.generate(**inputs, max_new_tokens=512)
output = processor.decode(predictions[0], skip_special_tokens=True)
2024-03-25 20:48:48 +00:00
return output, predictions
2024-03-25 20:48:48 +00:00
2024-08-25 09:50:41 +02:00
def extract_total_sqm(input_str: str):
2024-03-25 20:48:48 +00:00
sqmregex = r"(\d+\.\d*) ?(sq ?m|sq. ?m)"
2024-08-25 09:50:41 +02:00
matches = re.findall(sqmregex, input_str.lower())
if len(matches) == 0:
return None
sqms = [float(m[0]) for m in matches]
return max(sqms)
def calculate_model(image_path):
output, predictions_tensor = inference(image_path)
estimated_sqm = extract_total_sqm(output)
return estimated_sqm, output, predictions_tensor
def calculate_ocr(image_path):
img = Image.open(image_path)
text = pytesseract.image_to_string(img)
estimated_sqm = extract_total_sqm(text)
return estimated_sqm, text