refactor detect floorplan to use model listings

This commit is contained in:
Viktor Barzin 2025-06-07 14:30:32 +00:00
parent ba87d07cd2
commit 325823e631
No known key found for this signature in database
GPG key ID: 4056458DBDBF8863
3 changed files with 37 additions and 15 deletions

View file

@ -1,21 +1,43 @@
import asyncio
from data_access import Listing
from models import Listing
from rec import floorplan
from repositories.listing_repository import ListingRepository
from tqdm.asyncio import tqdm
import multiprocessing
async def detect_floorplan(listing_paths: list[str]):
listings = Listing.get_all_listings(listing_paths)
async def detect_floorplan(repository: ListingRepository):
listings = await repository.get_listings()
cpu_count = multiprocessing.cpu_count() // 4
semaphore = asyncio.Semaphore(cpu_count)
await tqdm.gather(
*[_detect_floorplan_with_semaphore(listing, semaphore) for listing in listings]
)
updated_listings = [
listing
for listing in await tqdm.gather(
*[_calculate_sqm_ocr(listing, semaphore) for listing in listings]
)
if listing is not None
]
await repository.upsert_listings(updated_listings)
async def _detect_floorplan_with_semaphore(
async def _calculate_sqm_ocr(
listing: Listing, semaphore: asyncio.Semaphore
):
async with semaphore:
return await listing.calculate_sqm_ocr(recalculate=False)
) -> Listing | None:
if listing.square_meters is not None:
return None
sqms = []
for floorplan_path in listing.floorplan_image_paths:
async with semaphore:
estimated_sqm, _ = await asyncio.to_thread(
floorplan.calculate_ocr, floorplan_path
)
if estimated_sqm is not None:
sqms.append(estimated_sqm)
max_sqm = max(sqms, default=None)
if max_sqm is not None:
listing.square_meters = max_sqm
return listing
else:
listing.square_meters = None
return None

View file

@ -165,7 +165,7 @@ def dump_listings(
@click.pass_context
def dump_images(ctx: click.core.Context):
data_dir = ctx.obj["data_dir"]
click.echo(f"Running dump_images stored in {data_dir}")
click.echo(f"Running dump_images for listings stored in {engine.url}")
repository = ListingRepository(engine=engine)
asyncio.run(dump_images_module.dump_images(repository, image_base_path=data_dir))
@ -174,9 +174,9 @@ def dump_images(ctx: click.core.Context):
@click.pass_context
def detect_floorplan(ctx: click.core.Context):
data_dir = ctx.obj["data_dir"]
click.echo(f"Running detect_floorplan in {data_dir}")
listing_paths = sorted(list(pathlib.Path(data_dir).glob("*/listing.json")))
asyncio.run(detect_floorplan_module.detect_floorplan(listing_paths))
click.echo(f"Running detect_floorplan for listings stored in {engine.url}")
repository = ListingRepository(engine=engine)
asyncio.run(detect_floorplan_module.detect_floorplan(repository))
@cli.command()

View file

@ -44,7 +44,7 @@ def improve_img_for_ocr(img: Image):
return Image.fromarray(thresh)
def calculate_ocr(image_path):
def calculate_ocr(image_path) -> tuple[float | None, str]:
import pytesseract
img = Image.open(image_path)