wrongmove/crawler/4_detect_floorplan.py

44 lines
1.3 KiB
Python
Raw Normal View History

2025-05-17 22:58:35 +00:00
import asyncio
from models import Listing
from rec import floorplan
from repositories.listing_repository import ListingRepository
2025-05-17 22:58:35 +00:00
from tqdm.asyncio import tqdm
import multiprocessing
async def detect_floorplan(repository: ListingRepository):
listings = await repository.get_listings()
2025-05-18 12:27:26 +00:00
cpu_count = multiprocessing.cpu_count() // 4
2025-05-17 22:58:35 +00:00
semaphore = asyncio.Semaphore(cpu_count)
updated_listings = [
listing
for listing in await tqdm.gather(
*[_calculate_sqm_ocr(listing, semaphore) for listing in listings]
)
if listing is not None
]
await repository.upsert_listings(updated_listings)
2025-05-17 22:58:35 +00:00
async def _calculate_sqm_ocr(
2025-05-31 23:50:43 +00:00
listing: Listing, semaphore: asyncio.Semaphore
) -> Listing | None:
if listing.square_meters is not None:
return None
sqms = []
for floorplan_path in listing.floorplan_image_paths:
async with semaphore:
estimated_sqm, _ = await asyncio.to_thread(
floorplan.calculate_ocr, floorplan_path
)
if estimated_sqm is not None:
sqms.append(estimated_sqm)
max_sqm = max(sqms, default=None)
if max_sqm is not None:
listing.square_meters = max_sqm
return listing
else:
listing.square_meters = None
return None