Refactor codebase following Clean Code principles and add 229 tests

- Extract helpers to reduce function sizes (listing_tasks, app.py, query.py, listing_fetcher)
  - Replace nonlocal mutations with _PipelineState dataclass in listing_tasks
  - Fix bugs: isinstance→equality check in repository, verify_exp for OIDC tokens
  - Consolidate duplicate filter methods in listing_repository
  - Move hardcoded config to env vars with backward-compatible defaults
  - Simplify CLI decorator to auto-build QueryParameters
  - Add deprecation docstring to data_access.py
  - Test count: 158 → 387 (all passing)
This commit is contained in:
Viktor Barzin 2026-02-07 20:19:57 +00:00
parent 7e05b3c971
commit 150342bb9e
No known key found for this signature in database
GPG key ID: 0EB088298288D958
48 changed files with 5029 additions and 990 deletions

View file

@ -0,0 +1,104 @@
"""Unit tests for rec/floorplan.py."""
from unittest.mock import patch
import numpy as np
from PIL import Image
import pytest
from rec.floorplan import extract_total_sqm, improve_img_for_ocr, calculate_ocr
class TestExtractTotalSqm:
def test_normal_value(self) -> None:
assert extract_total_sqm("Total area: 75.5 sq m") == 75.5
def test_multiple_values_returns_max_in_range(self) -> None:
assert extract_total_sqm("Room 1: 20 sqm, Total: 65 sq m") == 65.0
def test_no_match_returns_none(self) -> None:
assert extract_total_sqm("No area info") is None
def test_below_minimum_returns_none(self) -> None:
assert extract_total_sqm("Area: 15 sq m") is None
def test_above_maximum_returns_none(self) -> None:
assert extract_total_sqm("Area: 200 sq m") is None
def test_edge_just_above_min(self) -> None:
assert extract_total_sqm("Area: 30.1 sq m") == 30.1
def test_edge_just_below_max(self) -> None:
assert extract_total_sqm("Area: 159.9 sq m") == 159.9
def test_exactly_at_min_boundary_returns_none(self) -> None:
# MIN_SQM < sqm, so 30 is not strictly greater than 30
assert extract_total_sqm("Area: 30 sq m") is None
def test_exactly_at_max_boundary_returns_none(self) -> None:
# sqm < MAX_SQM, so 160 is not strictly less than 160
assert extract_total_sqm("Area: 160 sq m") is None
def test_format_sq_dot_m(self) -> None:
assert extract_total_sqm("Area: 80 sq. m") == 80.0
def test_format_sqm_no_space(self) -> None:
assert extract_total_sqm("Area: 80sqm") == 80.0
def test_format_sq_m_with_space(self) -> None:
assert extract_total_sqm("Area: 80 sq m") == 80.0
def test_empty_string(self) -> None:
assert extract_total_sqm("") is None
def test_multiple_valid_values_returns_max(self) -> None:
assert extract_total_sqm("Living: 40 sq m, Total: 100 sq m") == 100.0
class TestImproveImgForOcr:
def test_produces_valid_pil_image(self) -> None:
# Create a small test image (50x50 white image)
img = Image.fromarray(np.ones((50, 50, 3), dtype=np.uint8) * 200)
result = improve_img_for_ocr(img)
assert isinstance(result, Image.Image)
# Result should be a grayscale (thresholded) image
assert result.mode == "L"
def test_output_dimensions_scaled(self) -> None:
img = Image.fromarray(np.ones((100, 100, 3), dtype=np.uint8) * 128)
result = improve_img_for_ocr(img)
# After 1.2x resize, 100 -> 120
assert result.size[0] == 120
assert result.size[1] == 120
class TestCalculateOcr:
def test_invalid_path_raises_file_not_found(self) -> None:
with pytest.raises(FileNotFoundError):
calculate_ocr("/nonexistent/path/to/image.png")
def test_returns_sqm_from_first_pass(self, tmp_path) -> None: # type: ignore[no-untyped-def]
# Create a real image file so the path check passes
image_file = tmp_path / "test.png"
Image.fromarray(np.ones((10, 10, 3), dtype=np.uint8)).save(str(image_file))
with patch("pytesseract.image_to_string", return_value="Total: 85 sq m"):
result_sqm, result_text = calculate_ocr(str(image_file))
assert result_sqm == 85.0
assert result_text == "Total: 85 sq m"
def test_falls_back_to_improved_image(self, tmp_path) -> None: # type: ignore[no-untyped-def]
image_file = tmp_path / "test.png"
Image.fromarray(np.ones((10, 10, 3), dtype=np.uint8)).save(str(image_file))
# First call returns no sqm data, second (on improved image) returns valid data
with patch("pytesseract.image_to_string", side_effect=[
"No area info here",
"Total: 72 sq m",
]):
result_sqm, result_text = calculate_ocr(str(image_file))
assert result_sqm == 72.0
assert result_text == "Total: 72 sq m"