defer transformers and pytesseract imports to when used. this shortens startup time of all other commands quite a bit

This commit is contained in:
Viktor Barzin 2025-05-21 21:24:57 +00:00
parent 10ae25e0d3
commit 1e0f302178
No known key found for this signature in database
GPG key ID: 4056458DBDBF8863

View file

@ -2,11 +2,10 @@ import re
from PIL import Image
import cv2
import numpy as np
from transformers import Pix2StructProcessor, Pix2StructForConditionalGeneration
import pytesseract
def inference(image_path):
from transformers import Pix2StructProcessor, Pix2StructForConditionalGeneration
image = Image.open(image_path)
question = "How many living rooms are displayed on this floor plan?" # not sure if it even has an effect
processor = Pix2StructProcessor.from_pretrained("google/deplot")
@ -38,13 +37,13 @@ def calculate_model(image_path):
def improve_img_for_ocr(img: Image):
img2 = np.array(img.convert('L'))
cv2.resize(img2, None, fx=1.2, fy=1.2, interpolation=cv2.INTER_CUBIC)
thresh = cv2.adaptiveThreshold(
img2, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2
)
thresh = cv2.adaptiveThreshold(img2, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY, 11, 2)
return Image.fromarray(thresh)
def calculate_ocr(image_path):
import pytesseract
img = Image.open(image_path)
text = pytesseract.image_to_string(img)
estimated_sqm = extract_total_sqm(text)
@ -53,7 +52,9 @@ def calculate_ocr(image_path):
text2 = pytesseract.image_to_string(improved_img)
estimated_sqm2 = extract_total_sqm(text2)
with open("recalculating.log", "a") as f:
f.write(f"before: {estimated_sqm} after: {estimated_sqm2} - {image_path}\n")
f.write(
f"before: {estimated_sqm} after: {estimated_sqm2} - {image_path}\n"
)
return estimated_sqm2, text2
return estimated_sqm, text