Flatten repo structure: move crawler/ to root, remove vqa/ and immoweb/
The crawler subdirectory was the only active project. Moving it to the repo root simplifies paths and removes the unnecessary nesting. The vqa/ and immoweb/ directories were legacy/unused and have been removed. Updated .drone.yml, .gitignore, .claude/ docs, and skills to reflect the new flat structure.
This commit is contained in:
parent
e2247be700
commit
eafbc1ac52
221 changed files with 70 additions and 146140 deletions
47
services/floorplan_detector.py
Normal file
47
services/floorplan_detector.py
Normal file
|
|
@ -0,0 +1,47 @@
|
|||
"""Floorplan detector service - OCR-based square meter detection."""
|
||||
import asyncio
|
||||
from models import Listing
|
||||
from rec import floorplan
|
||||
from repositories.listing_repository import ListingRepository
|
||||
from tqdm.asyncio import tqdm
|
||||
import multiprocessing
|
||||
|
||||
# Use a quarter of available CPUs to avoid starving other processes
|
||||
MAX_OCR_WORKERS = max(1, multiprocessing.cpu_count() // 4)
|
||||
|
||||
|
||||
async def detect_floorplan(repository: ListingRepository) -> None:
|
||||
"""Detect square meters from floorplan images for all listings."""
|
||||
listings = await repository.get_listings()
|
||||
semaphore = asyncio.Semaphore(MAX_OCR_WORKERS)
|
||||
|
||||
updated_listings = [
|
||||
listing
|
||||
for listing in await tqdm.gather(
|
||||
*[_calculate_sqm_ocr(listing, semaphore) for listing in listings]
|
||||
)
|
||||
if listing is not None
|
||||
]
|
||||
await repository.upsert_listings(updated_listings)
|
||||
|
||||
|
||||
async def _calculate_sqm_ocr(
|
||||
listing: Listing, semaphore: asyncio.Semaphore
|
||||
) -> Listing | None:
|
||||
"""Calculate square meters from floorplan images using OCR."""
|
||||
if listing.square_meters is not None:
|
||||
return None
|
||||
if not listing.floorplan_image_paths:
|
||||
listing.square_meters = 0
|
||||
return listing
|
||||
sqms: list[float] = []
|
||||
for floorplan_path in listing.floorplan_image_paths:
|
||||
async with semaphore:
|
||||
estimated_sqm, _ = await asyncio.to_thread(
|
||||
floorplan.calculate_ocr, floorplan_path
|
||||
)
|
||||
if estimated_sqm is not None:
|
||||
sqms.append(estimated_sqm)
|
||||
max_sqm = max(sqms, default=0) # try once, if we fail, keep as 0
|
||||
listing.square_meters = max_sqm
|
||||
return listing
|
||||
Loading…
Add table
Add a link
Reference in a new issue