wrongmove/crawler/4_detect_floorplan.py
2025-05-17 22:58:35 +00:00

31 lines
800 B
Python

import asyncio
import pathlib
from data_access import Listing
from tqdm.asyncio import tqdm
import multiprocessing
async def detect_floorplan(listing_paths: list[str]):
listings = Listing.get_all_listings(listing_paths)
cpu_count = multiprocessing.cpu_count() / 4
semaphore = asyncio.Semaphore(cpu_count)
await tqdm.gather(
*[_detect_floorplan_with_semaphore(listing, semaphore) for listing in listings]
)
async def _detect_floorplan_with_semaphore(
listing: Listing, semaphore: asyncio.Semaphore
):
async with semaphore:
return await listing.calculate_sqm_ocr(recalculate=False)
def main():
listing_paths = sorted(list(pathlib.Path("data/rs").glob("*/listing.json")))
detect_floorplan(listing_paths)
if __name__ == "__main__":
main()