add command to export the data in a way that the ui (immoweb) can consume
This commit is contained in:
parent
102c20ac42
commit
7e8c79d3d1
4 changed files with 66 additions and 3 deletions
|
|
@ -10,6 +10,7 @@ from data_access import Listing
|
|||
import csv_exporter
|
||||
from rec.query import ListingType, FurnishType
|
||||
from rec.routing import API_KEY_ENVIRONMENT_VARIABLE, TravelMode
|
||||
from ui_exporter import export_immoweb as export_immoweb_ui
|
||||
|
||||
dump_listings_module = importlib.import_module('1_dump_listings')
|
||||
dump_detail_module = importlib.import_module('2_dump_detail')
|
||||
|
|
@ -230,6 +231,25 @@ def export_csv(ctx: click.core.Context, output_file: str, columns: tuple[str]):
|
|||
asyncio.run(
|
||||
csv_exporter.export_to_csv(listings, output_file_path,
|
||||
list(columns)), )
|
||||
|
||||
@cli.command()
|
||||
@click.option(
|
||||
'--output-file',
|
||||
'-O',
|
||||
help='Path to the output immoweb file',
|
||||
required=True,
|
||||
type=click.Path(
|
||||
writable=True,
|
||||
file_okay=True,
|
||||
dir_okay=False,
|
||||
resolve_path=True,
|
||||
),
|
||||
)
|
||||
@click.pass_context
|
||||
def export_immoweb(ctx, output_file: str):
|
||||
click.echo(f'Exporting data to {output_file}')
|
||||
asyncio.run(export_immoweb_ui(ctx, output_file))
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
|
|||
|
|
@ -71,7 +71,7 @@ async def listing_query(
|
|||
) -> dict[str, Any]:
|
||||
params: dict[str, str] = {
|
||||
"locationIdentifier": location_id,
|
||||
"channel": channel.upper(),
|
||||
"channel": str(channel).upper(),
|
||||
"page": str(page),
|
||||
"numberOfPropertiesPerPage": str(page_size),
|
||||
"radius": str(radius),
|
||||
|
|
|
|||
|
|
@ -4,10 +4,10 @@ set -euxo pipefail
|
|||
|
||||
DATA_DIR="data/rs"
|
||||
|
||||
python main.py --data-dir $DATA_DIR dump-listings --min-price 2500 --max-price 3500 --min-bedrooms 2 --max-bedrooms 4 --district islington -t rent
|
||||
python main.py --data-dir $DATA_DIR dump-listings --min-price 2000 --max-price 4000 --min-bedrooms 2 --max-bedrooms 4 -t rent
|
||||
python main.py --data-dir $DATA_DIR dump-details
|
||||
python main.py --data-dir $DATA_DIR dump-images
|
||||
python main.py --data-dir $DATA_DIR detect-floorplan
|
||||
#python main.py --data-dir $DATA_DIR routing --destination-address 'Meta Brock Street' -m transit # NOTE: THIS CONSUMES API CALLS; USE CAREFULLY; add -l to limit number of entries
|
||||
python main.py --data-dir $DATA_DIR export-csv -O data/listings.csv
|
||||
|
||||
python main.py --data-dir $DATA_DIR export-immoweb -O ../immoweb/data/london_geojs.js
|
||||
|
|
|
|||
43
crawler/ui_exporter.py
Normal file
43
crawler/ui_exporter.py
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
|
||||
import json
|
||||
import pathlib
|
||||
|
||||
from data_access import Listing
|
||||
|
||||
|
||||
async def export_immoweb(ctx, output_file: str):
|
||||
data_dir = ctx.obj['data_dir']
|
||||
output_file_path = pathlib.Path(output_file)
|
||||
listing_paths = sorted(list(pathlib.Path(data_dir).glob("*/listing.json")))
|
||||
# listing_paths = listing_paths[:10]
|
||||
listings = Listing.get_all_listings([str(path) for path in listing_paths])
|
||||
|
||||
# Convert listings to immoweb format
|
||||
immoweb_listings = []
|
||||
for listing in listings:
|
||||
immoweb_listing = {
|
||||
'type': 'Feature',
|
||||
'properties': {
|
||||
'city': 'London', # change me
|
||||
'country': 'United Kingdom',
|
||||
'qm': await listing.sqm_ocr(),
|
||||
'qmprice': await listing.price_per_sqm(),
|
||||
'rooms': listing.bedrooms,
|
||||
'total_price': listing.price,
|
||||
},
|
||||
'geometry': {
|
||||
'coordinates': [
|
||||
listing.longitude,
|
||||
listing.latitude,
|
||||
],
|
||||
'type': 'Point',
|
||||
}
|
||||
}
|
||||
immoweb_listings.append(immoweb_listing)
|
||||
|
||||
prefix = 'var data = '
|
||||
serialized_data = {"type": "FeatureCollection", "features": immoweb_listings}
|
||||
result = prefix + json.dumps(serialized_data, indent=4)
|
||||
with open(output_file_path, 'w') as f:
|
||||
f.write(result)
|
||||
# json.dump(serialized_data, f, indent=4)
|
||||
Loading…
Add table
Add a link
Reference in a new issue