diff --git a/crawler/4_detect_floorplan.py b/crawler/4_detect_floorplan.py deleted file mode 100644 index bf02f67..0000000 --- a/crawler/4_detect_floorplan.py +++ /dev/null @@ -1,43 +0,0 @@ -import asyncio -from models import Listing -from rec import floorplan -from repositories.listing_repository import ListingRepository -from tqdm.asyncio import tqdm -import multiprocessing - - -async def detect_floorplan(repository: ListingRepository): - listings = await repository.get_listings() - cpu_count = multiprocessing.cpu_count() // 4 - semaphore = asyncio.Semaphore(cpu_count) - - updated_listings = [ - listing - for listing in await tqdm.gather( - *[_calculate_sqm_ocr(listing, semaphore) for listing in listings] - ) - if listing is not None - ] - await repository.upsert_listings(updated_listings) - - -async def _calculate_sqm_ocr( - listing: Listing, semaphore: asyncio.Semaphore -) -> Listing | None: - if listing.square_meters is not None: - return None - sqms = [] - for floorplan_path in listing.floorplan_image_paths: - async with semaphore: - estimated_sqm, _ = await asyncio.to_thread( - floorplan.calculate_ocr, floorplan_path - ) - if estimated_sqm is not None: - sqms.append(estimated_sqm) - max_sqm = max(sqms, default=0) # try once, if we fail, keep as 0 - # if max_sqm is not None: - listing.square_meters = max_sqm - return listing - # else: - # listing.square_meters = None - # return None