add command to export the data in a way that the ui (immoweb) can consume

This commit is contained in:
Viktor Barzin 2025-05-26 19:36:54 +00:00
parent 102c20ac42
commit 7e8c79d3d1
No known key found for this signature in database
GPG key ID: 4056458DBDBF8863
4 changed files with 66 additions and 3 deletions

View file

@ -10,6 +10,7 @@ from data_access import Listing
import csv_exporter
from rec.query import ListingType, FurnishType
from rec.routing import API_KEY_ENVIRONMENT_VARIABLE, TravelMode
from ui_exporter import export_immoweb as export_immoweb_ui
dump_listings_module = importlib.import_module('1_dump_listings')
dump_detail_module = importlib.import_module('2_dump_detail')
@ -230,6 +231,25 @@ def export_csv(ctx: click.core.Context, output_file: str, columns: tuple[str]):
asyncio.run(
csv_exporter.export_to_csv(listings, output_file_path,
list(columns)), )
@cli.command()
@click.option(
'--output-file',
'-O',
help='Path to the output immoweb file',
required=True,
type=click.Path(
writable=True,
file_okay=True,
dir_okay=False,
resolve_path=True,
),
)
@click.pass_context
def export_immoweb(ctx, output_file: str):
click.echo(f'Exporting data to {output_file}')
asyncio.run(export_immoweb_ui(ctx, output_file))
if __name__ == '__main__':

View file

@ -71,7 +71,7 @@ async def listing_query(
) -> dict[str, Any]:
params: dict[str, str] = {
"locationIdentifier": location_id,
"channel": channel.upper(),
"channel": str(channel).upper(),
"page": str(page),
"numberOfPropertiesPerPage": str(page_size),
"radius": str(radius),

View file

@ -4,10 +4,10 @@ set -euxo pipefail
DATA_DIR="data/rs"
python main.py --data-dir $DATA_DIR dump-listings --min-price 2500 --max-price 3500 --min-bedrooms 2 --max-bedrooms 4 --district islington -t rent
python main.py --data-dir $DATA_DIR dump-listings --min-price 2000 --max-price 4000 --min-bedrooms 2 --max-bedrooms 4 -t rent
python main.py --data-dir $DATA_DIR dump-details
python main.py --data-dir $DATA_DIR dump-images
python main.py --data-dir $DATA_DIR detect-floorplan
#python main.py --data-dir $DATA_DIR routing --destination-address 'Meta Brock Street' -m transit # NOTE: THIS CONSUMES API CALLS; USE CAREFULLY; add -l to limit number of entries
python main.py --data-dir $DATA_DIR export-csv -O data/listings.csv
python main.py --data-dir $DATA_DIR export-immoweb -O ../immoweb/data/london_geojs.js

43
crawler/ui_exporter.py Normal file
View file

@ -0,0 +1,43 @@
import json
import pathlib
from data_access import Listing
async def export_immoweb(ctx, output_file: str):
data_dir = ctx.obj['data_dir']
output_file_path = pathlib.Path(output_file)
listing_paths = sorted(list(pathlib.Path(data_dir).glob("*/listing.json")))
# listing_paths = listing_paths[:10]
listings = Listing.get_all_listings([str(path) for path in listing_paths])
# Convert listings to immoweb format
immoweb_listings = []
for listing in listings:
immoweb_listing = {
'type': 'Feature',
'properties': {
'city': 'London', # change me
'country': 'United Kingdom',
'qm': await listing.sqm_ocr(),
'qmprice': await listing.price_per_sqm(),
'rooms': listing.bedrooms,
'total_price': listing.price,
},
'geometry': {
'coordinates': [
listing.longitude,
listing.latitude,
],
'type': 'Point',
}
}
immoweb_listings.append(immoweb_listing)
prefix = 'var data = '
serialized_data = {"type": "FeatureCollection", "features": immoweb_listings}
result = prefix + json.dumps(serialized_data, indent=4)
with open(output_file_path, 'w') as f:
f.write(result)
# json.dump(serialized_data, f, indent=4)