Refactor codebase following Clean Code principles and add 229 tests
- Extract helpers to reduce function sizes (listing_tasks, app.py, query.py, listing_fetcher) - Replace nonlocal mutations with _PipelineState dataclass in listing_tasks - Fix bugs: isinstance→equality check in repository, verify_exp for OIDC tokens - Consolidate duplicate filter methods in listing_repository - Move hardcoded config to env vars with backward-compatible defaults - Simplify CLI decorator to auto-build QueryParameters - Add deprecation docstring to data_access.py - Test count: 158 → 387 (all passing)
This commit is contained in:
parent
7e05b3c971
commit
150342bb9e
48 changed files with 5029 additions and 990 deletions
|
|
@ -6,12 +6,14 @@ from repositories.listing_repository import ListingRepository
|
|||
from tqdm.asyncio import tqdm
|
||||
import multiprocessing
|
||||
|
||||
# Use a quarter of available CPUs to avoid starving other processes
|
||||
MAX_OCR_WORKERS = max(1, multiprocessing.cpu_count() // 4)
|
||||
|
||||
|
||||
async def detect_floorplan(repository: ListingRepository) -> None:
|
||||
"""Detect square meters from floorplan images for all listings."""
|
||||
listings = await repository.get_listings()
|
||||
cpu_count = multiprocessing.cpu_count() // 4
|
||||
semaphore = asyncio.Semaphore(cpu_count)
|
||||
semaphore = asyncio.Semaphore(MAX_OCR_WORKERS)
|
||||
|
||||
updated_listings = [
|
||||
listing
|
||||
|
|
@ -29,6 +31,9 @@ async def _calculate_sqm_ocr(
|
|||
"""Calculate square meters from floorplan images using OCR."""
|
||||
if listing.square_meters is not None:
|
||||
return None
|
||||
if not listing.floorplan_image_paths:
|
||||
listing.square_meters = 0
|
||||
return listing
|
||||
sqms: list[float] = []
|
||||
for floorplan_path in listing.floorplan_image_paths:
|
||||
async with semaphore:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue