wrongmove/crawler/services/floorplan_detector.py
Viktor Barzin 150342bb9e
Refactor codebase following Clean Code principles and add 229 tests
- Extract helpers to reduce function sizes (listing_tasks, app.py, query.py, listing_fetcher)
  - Replace nonlocal mutations with _PipelineState dataclass in listing_tasks
  - Fix bugs: isinstance→equality check in repository, verify_exp for OIDC tokens
  - Consolidate duplicate filter methods in listing_repository
  - Move hardcoded config to env vars with backward-compatible defaults
  - Simplify CLI decorator to auto-build QueryParameters
  - Add deprecation docstring to data_access.py
  - Test count: 158 → 387 (all passing)
2026-02-07 20:19:57 +00:00

47 lines
1.6 KiB
Python

"""Floorplan detector service - OCR-based square meter detection."""
import asyncio
from models import Listing
from rec import floorplan
from repositories.listing_repository import ListingRepository
from tqdm.asyncio import tqdm
import multiprocessing
# Use a quarter of available CPUs to avoid starving other processes
MAX_OCR_WORKERS = max(1, multiprocessing.cpu_count() // 4)
async def detect_floorplan(repository: ListingRepository) -> None:
"""Detect square meters from floorplan images for all listings."""
listings = await repository.get_listings()
semaphore = asyncio.Semaphore(MAX_OCR_WORKERS)
updated_listings = [
listing
for listing in await tqdm.gather(
*[_calculate_sqm_ocr(listing, semaphore) for listing in listings]
)
if listing is not None
]
await repository.upsert_listings(updated_listings)
async def _calculate_sqm_ocr(
listing: Listing, semaphore: asyncio.Semaphore
) -> Listing | None:
"""Calculate square meters from floorplan images using OCR."""
if listing.square_meters is not None:
return None
if not listing.floorplan_image_paths:
listing.square_meters = 0
return listing
sqms: list[float] = []
for floorplan_path in listing.floorplan_image_paths:
async with semaphore:
estimated_sqm, _ = await asyncio.to_thread(
floorplan.calculate_ocr, floorplan_path
)
if estimated_sqm is not None:
sqms.append(estimated_sqm)
max_sqm = max(sqms, default=0) # try once, if we fail, keep as 0
listing.square_meters = max_sqm
return listing