detect floorplan using asyncio

This commit is contained in:
Viktor Barzin 2025-05-17 22:58:35 +00:00
parent 68cc70bd11
commit b1e0ed170b
No known key found for this signature in database
GPG key ID: 4056458DBDBF8863
3 changed files with 22 additions and 9 deletions

View file

@ -1,15 +1,25 @@
import asyncio
import pathlib
from data_access import Listing
from tqdm import tqdm
from tqdm.asyncio import tqdm
import multiprocessing
def detect_floorplan(listing_paths: list[str]):
async def detect_floorplan(listing_paths: list[str]):
listings = Listing.get_all_listings(listing_paths)
cpu_count = multiprocessing.cpu_count() / 4
semaphore = asyncio.Semaphore(cpu_count)
for listing in tqdm(listings):
tqdm.write(str(listing.identifier))
# listing.calculate_sqm_model() # using google/deplot model. Too slow, rather use tesseract
listing.calculate_sqm_ocr(recalculate=False)
await tqdm.gather(
*[_detect_floorplan_with_semaphore(listing, semaphore) for listing in listings]
)
async def _detect_floorplan_with_semaphore(
listing: Listing, semaphore: asyncio.Semaphore
):
async with semaphore:
return await listing.calculate_sqm_ocr(recalculate=False)
def main():

View file

@ -1,3 +1,4 @@
import asyncio
from dataclasses import dataclass
import json
import pathlib
@ -135,13 +136,15 @@ class Listing:
) # filter out Nones
return max_sqm
def calculate_sqm_ocr(self, recalculate=True):
async def calculate_sqm_ocr(self, recalculate=True):
if not recalculate and self.path_floorplan_ocr_json().exists():
return
objs = []
for floorplan_path in self.list_floorplans():
estimated_sqm, model_output = floorplan.calculate_ocr(floorplan_path)
estimated_sqm, model_output = await asyncio.to_thread(
floorplan.calculate_ocr, floorplan_path
)
objs.append(
{
"floorplan_path": str(floorplan_path),

View file

@ -135,7 +135,7 @@ def detect_floorplan(ctx: click.core.Context):
data_dir = ctx.obj['data_dir']
click.echo(f'Running detect_floorplan in {data_dir}')
listing_paths = sorted(list(pathlib.Path(data_dir).glob("*/listing.json")))
detect_floorplan_module.detect_floorplan(listing_paths)
asyncio.run(detect_floorplan_module.detect_floorplan(listing_paths))
@cli.command()