From 0bdfeec1953b119cb97c0a09add9e8bae18530b9 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Sun, 1 Jun 2025 20:11:00 +0000 Subject: [PATCH] make the csv exporter use the filtering params to allow exporting customizable reports --- crawler/csv_exporter.py | 4 ++++ crawler/main.py | 40 +++++++++++++++++++++++++++++++++++++--- 2 files changed, 41 insertions(+), 3 deletions(-) diff --git a/crawler/csv_exporter.py b/crawler/csv_exporter.py index a8b364c..0428f3b 100644 --- a/crawler/csv_exporter.py +++ b/crawler/csv_exporter.py @@ -2,13 +2,17 @@ import asyncio from pathlib import Path from data_access import Listing import pandas as pd +from rec.query import QueryParameters, filter_listings async def export_to_csv( listings: list[Listing], output_file: Path, columns: list[str], + query_parameters: QueryParameters | None = None, ) -> None: + if query_parameters is not None: + listings = await filter_listings(listings, query_parameters) ds = await asyncio.gather(*[listing.dict_nicely() for listing in listings]) df = pd.DataFrame(ds) # read decisions on file diff --git a/crawler/main.py b/crawler/main.py index 4d8f697..e9b4dce 100644 --- a/crawler/main.py +++ b/crawler/main.py @@ -258,14 +258,48 @@ def routing( ), ) @click.pass_context -def export_csv(ctx: click.core.Context, output_file: str, columns: tuple[str]): +@listing_filter_options +def export_csv( + ctx: click.core.Context, + output_file: str, + columns: tuple[str], + district: list[str], + min_bedrooms: int, + max_bedrooms: int, + min_price: int, + max_price: int, + type: str, + furnish_types: list[str], + available_from: datetime | None, + last_seen_days: int, + min_sqm: int | None = None, +): data_dir = ctx.obj["data_dir"] - click.echo(f"Exporting data to {output_file} using {data_dir=}") + query_parameters = QueryParameters( + listing_type=ListingType[type], + district_names=set(district), + min_bedrooms=min_bedrooms, + max_bedrooms=max_bedrooms, + min_price=min_price, + max_price=max_price, + furnish_types=[FurnishType[furnish_type] for furnish_type in furnish_types], + let_date_available_from=available_from, + last_seen_days=last_seen_days, + min_sqm=min_sqm, + ) + click.echo( + f"Exporting data to {output_file} using {data_dir=} and query parameters: {query_parameters}" + ) output_file_path = pathlib.Path(output_file) listing_paths = sorted(list(pathlib.Path(data_dir).glob("*/listing.json"))) listings = Listing.get_all_listings([str(path) for path in listing_paths]) asyncio.run( - csv_exporter.export_to_csv(listings, output_file_path, list(columns)), + csv_exporter.export_to_csv( + listings, + output_file_path, + list(columns), + query_parameters=query_parameters, + ), )