The crawler subdirectory was the only active project. Moving it to the repo root simplifies paths and removes the unnecessary nesting. The vqa/ and immoweb/ directories were legacy/unused and have been removed. Updated .drone.yml, .gitignore, .claude/ docs, and skills to reflect the new flat structure.
104 lines
3.9 KiB
Python
104 lines
3.9 KiB
Python
"""Unit tests for rec/floorplan.py."""
|
|
from unittest.mock import patch
|
|
import numpy as np
|
|
from PIL import Image
|
|
import pytest
|
|
|
|
from rec.floorplan import extract_total_sqm, improve_img_for_ocr, calculate_ocr
|
|
|
|
|
|
class TestExtractTotalSqm:
|
|
|
|
def test_normal_value(self) -> None:
|
|
assert extract_total_sqm("Total area: 75.5 sq m") == 75.5
|
|
|
|
def test_multiple_values_returns_max_in_range(self) -> None:
|
|
assert extract_total_sqm("Room 1: 20 sqm, Total: 65 sq m") == 65.0
|
|
|
|
def test_no_match_returns_none(self) -> None:
|
|
assert extract_total_sqm("No area info") is None
|
|
|
|
def test_below_minimum_returns_none(self) -> None:
|
|
assert extract_total_sqm("Area: 15 sq m") is None
|
|
|
|
def test_above_maximum_returns_none(self) -> None:
|
|
assert extract_total_sqm("Area: 200 sq m") is None
|
|
|
|
def test_edge_just_above_min(self) -> None:
|
|
assert extract_total_sqm("Area: 30.1 sq m") == 30.1
|
|
|
|
def test_edge_just_below_max(self) -> None:
|
|
assert extract_total_sqm("Area: 159.9 sq m") == 159.9
|
|
|
|
def test_exactly_at_min_boundary_returns_none(self) -> None:
|
|
# MIN_SQM < sqm, so 30 is not strictly greater than 30
|
|
assert extract_total_sqm("Area: 30 sq m") is None
|
|
|
|
def test_exactly_at_max_boundary_returns_none(self) -> None:
|
|
# sqm < MAX_SQM, so 160 is not strictly less than 160
|
|
assert extract_total_sqm("Area: 160 sq m") is None
|
|
|
|
def test_format_sq_dot_m(self) -> None:
|
|
assert extract_total_sqm("Area: 80 sq. m") == 80.0
|
|
|
|
def test_format_sqm_no_space(self) -> None:
|
|
assert extract_total_sqm("Area: 80sqm") == 80.0
|
|
|
|
def test_format_sq_m_with_space(self) -> None:
|
|
assert extract_total_sqm("Area: 80 sq m") == 80.0
|
|
|
|
def test_empty_string(self) -> None:
|
|
assert extract_total_sqm("") is None
|
|
|
|
def test_multiple_valid_values_returns_max(self) -> None:
|
|
assert extract_total_sqm("Living: 40 sq m, Total: 100 sq m") == 100.0
|
|
|
|
|
|
class TestImproveImgForOcr:
|
|
|
|
def test_produces_valid_pil_image(self) -> None:
|
|
# Create a small test image (50x50 white image)
|
|
img = Image.fromarray(np.ones((50, 50, 3), dtype=np.uint8) * 200)
|
|
result = improve_img_for_ocr(img)
|
|
assert isinstance(result, Image.Image)
|
|
# Result should be a grayscale (thresholded) image
|
|
assert result.mode == "L"
|
|
|
|
def test_output_dimensions_scaled(self) -> None:
|
|
img = Image.fromarray(np.ones((100, 100, 3), dtype=np.uint8) * 128)
|
|
result = improve_img_for_ocr(img)
|
|
# After 1.2x resize, 100 -> 120
|
|
assert result.size[0] == 120
|
|
assert result.size[1] == 120
|
|
|
|
|
|
class TestCalculateOcr:
|
|
|
|
def test_invalid_path_raises_file_not_found(self) -> None:
|
|
with pytest.raises(FileNotFoundError):
|
|
calculate_ocr("/nonexistent/path/to/image.png")
|
|
|
|
def test_returns_sqm_from_first_pass(self, tmp_path) -> None: # type: ignore[no-untyped-def]
|
|
# Create a real image file so the path check passes
|
|
image_file = tmp_path / "test.png"
|
|
Image.fromarray(np.ones((10, 10, 3), dtype=np.uint8)).save(str(image_file))
|
|
|
|
with patch("pytesseract.image_to_string", return_value="Total: 85 sq m"):
|
|
result_sqm, result_text = calculate_ocr(str(image_file))
|
|
|
|
assert result_sqm == 85.0
|
|
assert result_text == "Total: 85 sq m"
|
|
|
|
def test_falls_back_to_improved_image(self, tmp_path) -> None: # type: ignore[no-untyped-def]
|
|
image_file = tmp_path / "test.png"
|
|
Image.fromarray(np.ones((10, 10, 3), dtype=np.uint8)).save(str(image_file))
|
|
|
|
# First call returns no sqm data, second (on improved image) returns valid data
|
|
with patch("pytesseract.image_to_string", side_effect=[
|
|
"No area info here",
|
|
"Total: 72 sq m",
|
|
]):
|
|
result_sqm, result_text = calculate_ocr(str(image_file))
|
|
|
|
assert result_sqm == 72.0
|
|
assert result_text == "Total: 72 sq m"
|