2024-03-10 18:49:39 +00:00
|
|
|
import re
|
|
|
|
|
from PIL import Image
|
2025-03-30 23:41:52 +01:00
|
|
|
import cv2
|
|
|
|
|
import numpy as np
|
2024-03-10 18:49:39 +00:00
|
|
|
|
2024-03-25 20:48:48 +00:00
|
|
|
|
2024-03-10 18:49:39 +00:00
|
|
|
def inference(image_path):
|
2025-05-21 21:24:57 +00:00
|
|
|
from transformers import Pix2StructProcessor, Pix2StructForConditionalGeneration
|
2024-03-10 18:49:39 +00:00
|
|
|
image = Image.open(image_path)
|
2024-03-25 20:48:48 +00:00
|
|
|
question = "How many living rooms are displayed on this floor plan?" # not sure if it even has an effect
|
|
|
|
|
processor = Pix2StructProcessor.from_pretrained("google/deplot")
|
|
|
|
|
model = Pix2StructForConditionalGeneration.from_pretrained("google/deplot")
|
2024-03-10 18:49:39 +00:00
|
|
|
|
|
|
|
|
inputs = processor(images=image, text=question, return_tensors="pt")
|
|
|
|
|
predictions = model.generate(**inputs, max_new_tokens=512)
|
|
|
|
|
output = processor.decode(predictions[0], skip_special_tokens=True)
|
2024-03-25 20:48:48 +00:00
|
|
|
|
2024-03-10 18:49:39 +00:00
|
|
|
return output, predictions
|
2024-03-25 20:48:48 +00:00
|
|
|
|
2024-03-10 18:49:39 +00:00
|
|
|
|
2024-08-25 09:50:41 +02:00
|
|
|
def extract_total_sqm(input_str: str):
|
2024-08-25 12:11:55 +02:00
|
|
|
sqmregex = r"(\d+\.?\d*) ?(sq ?m|sq. ?m)"
|
2024-08-25 09:50:41 +02:00
|
|
|
matches = re.findall(sqmregex, input_str.lower())
|
2024-03-10 18:49:39 +00:00
|
|
|
sqms = [float(m[0]) for m in matches]
|
2024-08-25 12:11:55 +02:00
|
|
|
filtered = [sqm for sqm in sqms if 30 < sqm < 160]
|
|
|
|
|
if len(filtered) == 0:
|
|
|
|
|
return None
|
|
|
|
|
return max(filtered)
|
2024-03-10 18:49:39 +00:00
|
|
|
|
|
|
|
|
|
2024-03-10 22:32:34 +00:00
|
|
|
def calculate_model(image_path):
|
2024-03-10 18:49:39 +00:00
|
|
|
output, predictions_tensor = inference(image_path)
|
2024-03-10 22:32:34 +00:00
|
|
|
estimated_sqm = extract_total_sqm(output)
|
2024-03-10 18:49:39 +00:00
|
|
|
return estimated_sqm, output, predictions_tensor
|
2024-03-10 22:32:34 +00:00
|
|
|
|
2025-05-07 21:25:40 +00:00
|
|
|
|
2025-03-30 23:41:52 +01:00
|
|
|
def improve_img_for_ocr(img: Image):
|
|
|
|
|
img2 = np.array(img.convert('L'))
|
|
|
|
|
cv2.resize(img2, None, fx=1.2, fy=1.2, interpolation=cv2.INTER_CUBIC)
|
2025-05-21 21:24:57 +00:00
|
|
|
thresh = cv2.adaptiveThreshold(img2, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
|
|
|
|
|
cv2.THRESH_BINARY, 11, 2)
|
2025-03-30 23:41:52 +01:00
|
|
|
return Image.fromarray(thresh)
|
2024-03-10 22:32:34 +00:00
|
|
|
|
2025-05-07 21:25:40 +00:00
|
|
|
|
2024-03-10 22:32:34 +00:00
|
|
|
def calculate_ocr(image_path):
|
2025-05-21 21:24:57 +00:00
|
|
|
import pytesseract
|
2024-03-10 22:32:34 +00:00
|
|
|
img = Image.open(image_path)
|
|
|
|
|
text = pytesseract.image_to_string(img)
|
|
|
|
|
estimated_sqm = extract_total_sqm(text)
|
2025-03-30 23:41:52 +01:00
|
|
|
if estimated_sqm is None:
|
|
|
|
|
improved_img = improve_img_for_ocr(img)
|
|
|
|
|
text2 = pytesseract.image_to_string(improved_img)
|
|
|
|
|
estimated_sqm2 = extract_total_sqm(text2)
|
|
|
|
|
with open("recalculating.log", "a") as f:
|
2025-05-21 21:24:57 +00:00
|
|
|
f.write(
|
|
|
|
|
f"before: {estimated_sqm} after: {estimated_sqm2} - {image_path}\n"
|
|
|
|
|
)
|
2025-03-30 23:41:52 +01:00
|
|
|
return estimated_sqm2, text2
|
2025-05-07 21:25:40 +00:00
|
|
|
|
2024-03-10 22:32:34 +00:00
|
|
|
return estimated_sqm, text
|