diff --git a/crawler/1_dump_listings.py b/crawler/1_dump_listings.py index 43ac753..537c997 100644 --- a/crawler/1_dump_listings.py +++ b/crawler/1_dump_listings.py @@ -1,4 +1,5 @@ import asyncio +import importlib import json import pathlib from typing import Any @@ -9,6 +10,24 @@ from tqdm.asyncio import tqdm from data_access import Listing from models import Listing as modelListing +dump_images_module = importlib.import_module("3_dump_images") +detect_floorplan_module = importlib.import_module("4_detect_floorplan") + + +async def dump_listings_full( + parameters: QueryParameters, + repository: ListingRepository, + data_dir: pathlib.Path = pathlib.Path("data/rs/"), +) -> list[modelListing]: + """Fetches all listings, images as well as detects floorplans""" + new_listings = await dump_listings(parameters, repository, data_dir) + await dump_images_module.dump_images(repository, image_base_path=data_dir) + await detect_floorplan_module.detect_floorplan(repository) + # refresh listings + listings = await repository.get_listings(parameters) # this can be better + new_listings = [l for l in listings if l.id in new_listings] + return new_listings + async def dump_listings( parameters: QueryParameters,