merge dump listings and dump details commands - fetch both details and listings in the same command

This commit is contained in:
Viktor Barzin 2025-06-07 12:00:23 +00:00
parent 29213f3d26
commit 842f7cefbe
No known key found for this signature in database
GPG key ID: 4056458DBDBF8863
7 changed files with 54 additions and 59 deletions

View file

@ -11,14 +11,13 @@ from data_access import Listing
import csv_exporter
from rec.query import ListingType, FurnishType, QueryParameters
from rec.routing import API_KEY_ENVIRONMENT_VARIABLE, TravelMode
from repositories.listing_repositorty import ListingRepository
from repositories.listing_repository import ListingRepository
from ui_exporter import export_immoweb as export_immoweb_ui
from functools import wraps
from database import engine
dump_listings_module = importlib.import_module("1_dump_listings")
dump_detail_module = importlib.import_module("2_dump_detail")
dump_images_module = importlib.import_module("3_dump_images")
detect_floorplan_module = importlib.import_module("4_detect_floorplan")
routing_module = importlib.import_module("5_routing")
@ -157,21 +156,12 @@ def dump_listings(
)
data_dir_path = pathlib.Path(data_dir)
listings = asyncio.run(
dump_listings_module.dump_listings(query_parameters, data_dir_path)
dump_listings_module.dump_listings(query_parameters, engine, data_dir_path)
)
repository = ListingRepository(engine=engine)
asyncio.run(repository.upsert_listings(listings))
@cli.command()
@click.pass_context
def dump_details(ctx: click.core.Context):
data_dir = ctx.obj["data_dir"]
click.echo(f"Running dump_detail for listings stored in {data_dir}")
listing_paths = sorted(list(pathlib.Path(data_dir).glob("*/listing.json")))
asyncio.run(dump_detail_module.dump_detail(listing_paths))
@cli.command()
@click.pass_context
def dump_images(ctx: click.core.Context):
@ -298,7 +288,7 @@ def export_csv(
)
output_file_path = pathlib.Path(output_file)
listing_paths = sorted(list(pathlib.Path(data_dir).glob("*/listing.json")))
listings = Listing.get_all_listings([str(path) for path in listing_paths])
listings = Listing.get_all_listings([path for path in listing_paths])
asyncio.run(
csv_exporter.export_to_csv(
listings,
@ -365,7 +355,7 @@ def populate_db(
click.echo(f"Populating the database with data from {data_dir}")
repository = ListingRepository(engine=engine)
listings = Listing.get_all_listings(
[str(path) for path in pathlib.Path(data_dir).glob("*/listing.json")]
[path for path in pathlib.Path(data_dir).glob("*/listing.json")]
)
asyncio.run(repository.upsert_listings(listings))