Flatten repo structure: move crawler/ to root, remove vqa/ and immoweb/
The crawler subdirectory was the only active project. Moving it to the repo root simplifies paths and removes the unnecessary nesting. The vqa/ and immoweb/ directories were legacy/unused and have been removed. Updated .drone.yml, .gitignore, .claude/ docs, and skills to reflect the new flat structure.
This commit is contained in:
parent
e2247be700
commit
eafbc1ac52
221 changed files with 70 additions and 146140 deletions
67
rec/floorplan.py
Normal file
67
rec/floorplan.py
Normal file
|
|
@ -0,0 +1,67 @@
|
|||
import logging
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
from PIL import Image
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
MIN_SQM = 30
|
||||
MAX_SQM = 160
|
||||
|
||||
|
||||
def inference(image_path: str | Path) -> tuple[str, Any]:
|
||||
from transformers import Pix2StructProcessor, Pix2StructForConditionalGeneration
|
||||
|
||||
image = Image.open(image_path)
|
||||
question = "How many living rooms are displayed on this floor plan?" # not sure if it even has an effect
|
||||
processor = Pix2StructProcessor.from_pretrained("google/deplot")
|
||||
model = Pix2StructForConditionalGeneration.from_pretrained("google/deplot")
|
||||
|
||||
inputs = processor(images=image, text=question, return_tensors="pt")
|
||||
predictions = model.generate(**inputs, max_new_tokens=512)
|
||||
output = processor.decode(predictions[0], skip_special_tokens=True)
|
||||
|
||||
return output, predictions
|
||||
|
||||
|
||||
def extract_total_sqm(input_str: str) -> float | None:
|
||||
# Note: can be used on the output of inference() to extract sqm from model predictions.
|
||||
sqmregex = r"(\d+\.?\d*) ?(sq ?m|sq. ?m)"
|
||||
matches = re.findall(sqmregex, input_str.lower())
|
||||
sqms = [float(m[0]) for m in matches]
|
||||
filtered = [sqm for sqm in sqms if MIN_SQM < sqm < MAX_SQM]
|
||||
if len(filtered) == 0:
|
||||
return None
|
||||
return max(filtered)
|
||||
|
||||
|
||||
def improve_img_for_ocr(img: Image.Image) -> Image.Image:
|
||||
grayscale_image = np.array(img.convert("L"))
|
||||
grayscale_image = cv2.resize(grayscale_image, None, fx=1.2, fy=1.2, interpolation=cv2.INTER_CUBIC)
|
||||
thresh = cv2.adaptiveThreshold(
|
||||
grayscale_image, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2
|
||||
)
|
||||
return Image.fromarray(thresh)
|
||||
|
||||
|
||||
def calculate_ocr(image_path: str | Path) -> tuple[float | None, str]:
|
||||
import pytesseract
|
||||
|
||||
path = Path(image_path)
|
||||
if not path.exists():
|
||||
raise FileNotFoundError(f"Image not found: {image_path}")
|
||||
|
||||
img = Image.open(path)
|
||||
text = pytesseract.image_to_string(img)
|
||||
estimated_sqm = extract_total_sqm(text)
|
||||
if estimated_sqm is None:
|
||||
improved_img = improve_img_for_ocr(img)
|
||||
text2 = pytesseract.image_to_string(improved_img)
|
||||
estimated_sqm2 = extract_total_sqm(text2)
|
||||
logger.debug(f"before: {estimated_sqm} after: {estimated_sqm2} - {image_path}")
|
||||
return estimated_sqm2, text2
|
||||
|
||||
return estimated_sqm, text
|
||||
Loading…
Add table
Add a link
Reference in a new issue