refactor detect floorplan to use model listings
This commit is contained in:
parent
ba87d07cd2
commit
325823e631
3 changed files with 37 additions and 15 deletions
|
|
@ -1,21 +1,43 @@
|
||||||
import asyncio
|
import asyncio
|
||||||
from data_access import Listing
|
from models import Listing
|
||||||
|
from rec import floorplan
|
||||||
|
from repositories.listing_repository import ListingRepository
|
||||||
from tqdm.asyncio import tqdm
|
from tqdm.asyncio import tqdm
|
||||||
import multiprocessing
|
import multiprocessing
|
||||||
|
|
||||||
|
|
||||||
async def detect_floorplan(listing_paths: list[str]):
|
async def detect_floorplan(repository: ListingRepository):
|
||||||
listings = Listing.get_all_listings(listing_paths)
|
listings = await repository.get_listings()
|
||||||
cpu_count = multiprocessing.cpu_count() // 4
|
cpu_count = multiprocessing.cpu_count() // 4
|
||||||
semaphore = asyncio.Semaphore(cpu_count)
|
semaphore = asyncio.Semaphore(cpu_count)
|
||||||
|
|
||||||
await tqdm.gather(
|
updated_listings = [
|
||||||
*[_detect_floorplan_with_semaphore(listing, semaphore) for listing in listings]
|
listing
|
||||||
)
|
for listing in await tqdm.gather(
|
||||||
|
*[_calculate_sqm_ocr(listing, semaphore) for listing in listings]
|
||||||
|
)
|
||||||
|
if listing is not None
|
||||||
|
]
|
||||||
|
await repository.upsert_listings(updated_listings)
|
||||||
|
|
||||||
|
|
||||||
async def _detect_floorplan_with_semaphore(
|
async def _calculate_sqm_ocr(
|
||||||
listing: Listing, semaphore: asyncio.Semaphore
|
listing: Listing, semaphore: asyncio.Semaphore
|
||||||
):
|
) -> Listing | None:
|
||||||
async with semaphore:
|
if listing.square_meters is not None:
|
||||||
return await listing.calculate_sqm_ocr(recalculate=False)
|
return None
|
||||||
|
sqms = []
|
||||||
|
for floorplan_path in listing.floorplan_image_paths:
|
||||||
|
async with semaphore:
|
||||||
|
estimated_sqm, _ = await asyncio.to_thread(
|
||||||
|
floorplan.calculate_ocr, floorplan_path
|
||||||
|
)
|
||||||
|
if estimated_sqm is not None:
|
||||||
|
sqms.append(estimated_sqm)
|
||||||
|
max_sqm = max(sqms, default=None)
|
||||||
|
if max_sqm is not None:
|
||||||
|
listing.square_meters = max_sqm
|
||||||
|
return listing
|
||||||
|
else:
|
||||||
|
listing.square_meters = None
|
||||||
|
return None
|
||||||
|
|
|
||||||
|
|
@ -165,7 +165,7 @@ def dump_listings(
|
||||||
@click.pass_context
|
@click.pass_context
|
||||||
def dump_images(ctx: click.core.Context):
|
def dump_images(ctx: click.core.Context):
|
||||||
data_dir = ctx.obj["data_dir"]
|
data_dir = ctx.obj["data_dir"]
|
||||||
click.echo(f"Running dump_images stored in {data_dir}")
|
click.echo(f"Running dump_images for listings stored in {engine.url}")
|
||||||
repository = ListingRepository(engine=engine)
|
repository = ListingRepository(engine=engine)
|
||||||
asyncio.run(dump_images_module.dump_images(repository, image_base_path=data_dir))
|
asyncio.run(dump_images_module.dump_images(repository, image_base_path=data_dir))
|
||||||
|
|
||||||
|
|
@ -174,9 +174,9 @@ def dump_images(ctx: click.core.Context):
|
||||||
@click.pass_context
|
@click.pass_context
|
||||||
def detect_floorplan(ctx: click.core.Context):
|
def detect_floorplan(ctx: click.core.Context):
|
||||||
data_dir = ctx.obj["data_dir"]
|
data_dir = ctx.obj["data_dir"]
|
||||||
click.echo(f"Running detect_floorplan in {data_dir}")
|
click.echo(f"Running detect_floorplan for listings stored in {engine.url}")
|
||||||
listing_paths = sorted(list(pathlib.Path(data_dir).glob("*/listing.json")))
|
repository = ListingRepository(engine=engine)
|
||||||
asyncio.run(detect_floorplan_module.detect_floorplan(listing_paths))
|
asyncio.run(detect_floorplan_module.detect_floorplan(repository))
|
||||||
|
|
||||||
|
|
||||||
@cli.command()
|
@cli.command()
|
||||||
|
|
|
||||||
|
|
@ -44,7 +44,7 @@ def improve_img_for_ocr(img: Image):
|
||||||
return Image.fromarray(thresh)
|
return Image.fromarray(thresh)
|
||||||
|
|
||||||
|
|
||||||
def calculate_ocr(image_path):
|
def calculate_ocr(image_path) -> tuple[float | None, str]:
|
||||||
import pytesseract
|
import pytesseract
|
||||||
|
|
||||||
img = Image.open(image_path)
|
img = Image.open(image_path)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue