add method to dump all data for listings e2e

This commit is contained in:
Viktor Barzin 2025-06-21 12:04:31 +00:00
parent 3e1be6750c
commit ec2e0dded8
No known key found for this signature in database
GPG key ID: 4056458DBDBF8863

View file

@ -1,4 +1,5 @@
import asyncio
import importlib
import json
import pathlib
from typing import Any
@ -9,6 +10,24 @@ from tqdm.asyncio import tqdm
from data_access import Listing
from models import Listing as modelListing
dump_images_module = importlib.import_module("3_dump_images")
detect_floorplan_module = importlib.import_module("4_detect_floorplan")
async def dump_listings_full(
parameters: QueryParameters,
repository: ListingRepository,
data_dir: pathlib.Path = pathlib.Path("data/rs/"),
) -> list[modelListing]:
"""Fetches all listings, images as well as detects floorplans"""
new_listings = await dump_listings(parameters, repository, data_dir)
await dump_images_module.dump_images(repository, image_base_path=data_dir)
await detect_floorplan_module.detect_floorplan(repository)
# refresh listings
listings = await repository.get_listings(parameters) # this can be better
new_listings = [l for l in listings if l.id in new_listings]
return new_listings
async def dump_listings(
parameters: QueryParameters,