refactor detect floorplan to use model listings
This commit is contained in:
parent
ba87d07cd2
commit
325823e631
3 changed files with 37 additions and 15 deletions
|
|
@ -1,21 +1,43 @@
|
|||
import asyncio
|
||||
from data_access import Listing
|
||||
from models import Listing
|
||||
from rec import floorplan
|
||||
from repositories.listing_repository import ListingRepository
|
||||
from tqdm.asyncio import tqdm
|
||||
import multiprocessing
|
||||
|
||||
|
||||
async def detect_floorplan(listing_paths: list[str]):
|
||||
listings = Listing.get_all_listings(listing_paths)
|
||||
async def detect_floorplan(repository: ListingRepository):
|
||||
listings = await repository.get_listings()
|
||||
cpu_count = multiprocessing.cpu_count() // 4
|
||||
semaphore = asyncio.Semaphore(cpu_count)
|
||||
|
||||
await tqdm.gather(
|
||||
*[_detect_floorplan_with_semaphore(listing, semaphore) for listing in listings]
|
||||
)
|
||||
updated_listings = [
|
||||
listing
|
||||
for listing in await tqdm.gather(
|
||||
*[_calculate_sqm_ocr(listing, semaphore) for listing in listings]
|
||||
)
|
||||
if listing is not None
|
||||
]
|
||||
await repository.upsert_listings(updated_listings)
|
||||
|
||||
|
||||
async def _detect_floorplan_with_semaphore(
|
||||
async def _calculate_sqm_ocr(
|
||||
listing: Listing, semaphore: asyncio.Semaphore
|
||||
):
|
||||
async with semaphore:
|
||||
return await listing.calculate_sqm_ocr(recalculate=False)
|
||||
) -> Listing | None:
|
||||
if listing.square_meters is not None:
|
||||
return None
|
||||
sqms = []
|
||||
for floorplan_path in listing.floorplan_image_paths:
|
||||
async with semaphore:
|
||||
estimated_sqm, _ = await asyncio.to_thread(
|
||||
floorplan.calculate_ocr, floorplan_path
|
||||
)
|
||||
if estimated_sqm is not None:
|
||||
sqms.append(estimated_sqm)
|
||||
max_sqm = max(sqms, default=None)
|
||||
if max_sqm is not None:
|
||||
listing.square_meters = max_sqm
|
||||
return listing
|
||||
else:
|
||||
listing.square_meters = None
|
||||
return None
|
||||
|
|
|
|||
|
|
@ -165,7 +165,7 @@ def dump_listings(
|
|||
@click.pass_context
|
||||
def dump_images(ctx: click.core.Context):
|
||||
data_dir = ctx.obj["data_dir"]
|
||||
click.echo(f"Running dump_images stored in {data_dir}")
|
||||
click.echo(f"Running dump_images for listings stored in {engine.url}")
|
||||
repository = ListingRepository(engine=engine)
|
||||
asyncio.run(dump_images_module.dump_images(repository, image_base_path=data_dir))
|
||||
|
||||
|
|
@ -174,9 +174,9 @@ def dump_images(ctx: click.core.Context):
|
|||
@click.pass_context
|
||||
def detect_floorplan(ctx: click.core.Context):
|
||||
data_dir = ctx.obj["data_dir"]
|
||||
click.echo(f"Running detect_floorplan in {data_dir}")
|
||||
listing_paths = sorted(list(pathlib.Path(data_dir).glob("*/listing.json")))
|
||||
asyncio.run(detect_floorplan_module.detect_floorplan(listing_paths))
|
||||
click.echo(f"Running detect_floorplan for listings stored in {engine.url}")
|
||||
repository = ListingRepository(engine=engine)
|
||||
asyncio.run(detect_floorplan_module.detect_floorplan(repository))
|
||||
|
||||
|
||||
@cli.command()
|
||||
|
|
|
|||
|
|
@ -44,7 +44,7 @@ def improve_img_for_ocr(img: Image):
|
|||
return Image.fromarray(thresh)
|
||||
|
||||
|
||||
def calculate_ocr(image_path):
|
||||
def calculate_ocr(image_path) -> tuple[float | None, str]:
|
||||
import pytesseract
|
||||
|
||||
img = Image.open(image_path)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue