wrongmove/crawler/rec/floorplan.py

63 lines
2.2 KiB
Python

import re
from pathlib import Path
from typing import Any
from PIL import Image
import cv2
import numpy as np
def inference(image_path: str | Path) -> tuple[str, Any]:
from transformers import Pix2StructProcessor, Pix2StructForConditionalGeneration
image = Image.open(image_path)
question = "How many living rooms are displayed on this floor plan?" # not sure if it even has an effect
processor = Pix2StructProcessor.from_pretrained("google/deplot")
model = Pix2StructForConditionalGeneration.from_pretrained("google/deplot")
inputs = processor(images=image, text=question, return_tensors="pt")
predictions = model.generate(**inputs, max_new_tokens=512)
output = processor.decode(predictions[0], skip_special_tokens=True)
return output, predictions
def extract_total_sqm(input_str: str) -> float | None:
sqmregex = r"(\d+\.?\d*) ?(sq ?m|sq. ?m)"
matches = re.findall(sqmregex, input_str.lower())
sqms = [float(m[0]) for m in matches]
filtered = [sqm for sqm in sqms if 30 < sqm < 160]
if len(filtered) == 0:
return None
return max(filtered)
def calculate_model(image_path: str | Path) -> tuple[float | None, str, Any]:
output, predictions_tensor = inference(image_path)
estimated_sqm = extract_total_sqm(output)
return estimated_sqm, output, predictions_tensor
def improve_img_for_ocr(img: Image.Image) -> Image.Image:
img2 = np.array(img.convert("L"))
cv2.resize(img2, None, fx=1.2, fy=1.2, interpolation=cv2.INTER_CUBIC)
thresh = cv2.adaptiveThreshold(
img2, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2
)
return Image.fromarray(thresh)
def calculate_ocr(image_path: str | Path) -> tuple[float | None, str]:
import pytesseract
img = Image.open(image_path)
text = pytesseract.image_to_string(img)
estimated_sqm = extract_total_sqm(text)
if estimated_sqm is None:
improved_img = improve_img_for_ocr(img)
text2 = pytesseract.image_to_string(improved_img)
estimated_sqm2 = extract_total_sqm(text2)
with open("recalculating.log", "a") as f:
f.write(f"before: {estimated_sqm} after: {estimated_sqm2} - {image_path}\n")
return estimated_sqm2, text2
return estimated_sqm, text