defer transformers and pytesseract imports to when used. this shortens startup time of all other commands quite a bit

This commit is contained in:
Viktor Barzin 2025-05-21 21:24:57 +00:00
parent 10ae25e0d3
commit 1e0f302178
No known key found for this signature in database
GPG key ID: 4056458DBDBF8863

View file

@ -2,11 +2,10 @@ import re
from PIL import Image from PIL import Image
import cv2 import cv2
import numpy as np import numpy as np
from transformers import Pix2StructProcessor, Pix2StructForConditionalGeneration
import pytesseract
def inference(image_path): def inference(image_path):
from transformers import Pix2StructProcessor, Pix2StructForConditionalGeneration
image = Image.open(image_path) image = Image.open(image_path)
question = "How many living rooms are displayed on this floor plan?" # not sure if it even has an effect question = "How many living rooms are displayed on this floor plan?" # not sure if it even has an effect
processor = Pix2StructProcessor.from_pretrained("google/deplot") processor = Pix2StructProcessor.from_pretrained("google/deplot")
@ -38,13 +37,13 @@ def calculate_model(image_path):
def improve_img_for_ocr(img: Image): def improve_img_for_ocr(img: Image):
img2 = np.array(img.convert('L')) img2 = np.array(img.convert('L'))
cv2.resize(img2, None, fx=1.2, fy=1.2, interpolation=cv2.INTER_CUBIC) cv2.resize(img2, None, fx=1.2, fy=1.2, interpolation=cv2.INTER_CUBIC)
thresh = cv2.adaptiveThreshold( thresh = cv2.adaptiveThreshold(img2, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
img2, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2 cv2.THRESH_BINARY, 11, 2)
)
return Image.fromarray(thresh) return Image.fromarray(thresh)
def calculate_ocr(image_path): def calculate_ocr(image_path):
import pytesseract
img = Image.open(image_path) img = Image.open(image_path)
text = pytesseract.image_to_string(img) text = pytesseract.image_to_string(img)
estimated_sqm = extract_total_sqm(text) estimated_sqm = extract_total_sqm(text)
@ -53,7 +52,9 @@ def calculate_ocr(image_path):
text2 = pytesseract.image_to_string(improved_img) text2 = pytesseract.image_to_string(improved_img)
estimated_sqm2 = extract_total_sqm(text2) estimated_sqm2 = extract_total_sqm(text2)
with open("recalculating.log", "a") as f: with open("recalculating.log", "a") as f:
f.write(f"before: {estimated_sqm} after: {estimated_sqm2} - {image_path}\n") f.write(
f"before: {estimated_sqm} after: {estimated_sqm2} - {image_path}\n"
)
return estimated_sqm2, text2 return estimated_sqm2, text2
return estimated_sqm, text return estimated_sqm, text