31 lines
1.1 KiB
Python
31 lines
1.1 KiB
Python
|
|
import re
|
||
|
|
from PIL import Image
|
||
|
|
from transformers import Pix2StructProcessor, Pix2StructForConditionalGeneration
|
||
|
|
|
||
|
|
def inference(image_path):
|
||
|
|
image = Image.open(image_path)
|
||
|
|
question = "How many living rooms are displayed on this floor plan?" # not sure if it even has an effect
|
||
|
|
processor = Pix2StructProcessor.from_pretrained('google/deplot')
|
||
|
|
model = Pix2StructForConditionalGeneration.from_pretrained('google/deplot')
|
||
|
|
|
||
|
|
inputs = processor(images=image, text=question, return_tensors="pt")
|
||
|
|
predictions = model.generate(**inputs, max_new_tokens=512)
|
||
|
|
output = processor.decode(predictions[0], skip_special_tokens=True)
|
||
|
|
|
||
|
|
return output, predictions
|
||
|
|
|
||
|
|
|
||
|
|
def extract_total_sqm(deplot_input_str):
|
||
|
|
sqmregex = r'(\d+\.\d*) ?(sqm|sq.m|sq m|m)'
|
||
|
|
matches = re.findall(sqmregex, deplot_input_str.lower())
|
||
|
|
if len(matches) == 0:
|
||
|
|
return None
|
||
|
|
sqms = [float(m[0]) for m in matches]
|
||
|
|
return max(sqms)
|
||
|
|
|
||
|
|
|
||
|
|
def calculate(image_path):
|
||
|
|
output, predictions_tensor = inference(image_path)
|
||
|
|
estimated_sqm = extract_total_sqm()
|
||
|
|
return estimated_sqm, output, predictions_tensor
|