add command to export the data in a way that the ui (immoweb) can consume
This commit is contained in:
parent
102c20ac42
commit
7e8c79d3d1
4 changed files with 66 additions and 3 deletions
|
|
@ -10,6 +10,7 @@ from data_access import Listing
|
||||||
import csv_exporter
|
import csv_exporter
|
||||||
from rec.query import ListingType, FurnishType
|
from rec.query import ListingType, FurnishType
|
||||||
from rec.routing import API_KEY_ENVIRONMENT_VARIABLE, TravelMode
|
from rec.routing import API_KEY_ENVIRONMENT_VARIABLE, TravelMode
|
||||||
|
from ui_exporter import export_immoweb as export_immoweb_ui
|
||||||
|
|
||||||
dump_listings_module = importlib.import_module('1_dump_listings')
|
dump_listings_module = importlib.import_module('1_dump_listings')
|
||||||
dump_detail_module = importlib.import_module('2_dump_detail')
|
dump_detail_module = importlib.import_module('2_dump_detail')
|
||||||
|
|
@ -231,6 +232,25 @@ def export_csv(ctx: click.core.Context, output_file: str, columns: tuple[str]):
|
||||||
csv_exporter.export_to_csv(listings, output_file_path,
|
csv_exporter.export_to_csv(listings, output_file_path,
|
||||||
list(columns)), )
|
list(columns)), )
|
||||||
|
|
||||||
|
@cli.command()
|
||||||
|
@click.option(
|
||||||
|
'--output-file',
|
||||||
|
'-O',
|
||||||
|
help='Path to the output immoweb file',
|
||||||
|
required=True,
|
||||||
|
type=click.Path(
|
||||||
|
writable=True,
|
||||||
|
file_okay=True,
|
||||||
|
dir_okay=False,
|
||||||
|
resolve_path=True,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
@click.pass_context
|
||||||
|
def export_immoweb(ctx, output_file: str):
|
||||||
|
click.echo(f'Exporting data to {output_file}')
|
||||||
|
asyncio.run(export_immoweb_ui(ctx, output_file))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
cli()
|
cli()
|
||||||
|
|
|
||||||
|
|
@ -71,7 +71,7 @@ async def listing_query(
|
||||||
) -> dict[str, Any]:
|
) -> dict[str, Any]:
|
||||||
params: dict[str, str] = {
|
params: dict[str, str] = {
|
||||||
"locationIdentifier": location_id,
|
"locationIdentifier": location_id,
|
||||||
"channel": channel.upper(),
|
"channel": str(channel).upper(),
|
||||||
"page": str(page),
|
"page": str(page),
|
||||||
"numberOfPropertiesPerPage": str(page_size),
|
"numberOfPropertiesPerPage": str(page_size),
|
||||||
"radius": str(radius),
|
"radius": str(radius),
|
||||||
|
|
|
||||||
|
|
@ -4,10 +4,10 @@ set -euxo pipefail
|
||||||
|
|
||||||
DATA_DIR="data/rs"
|
DATA_DIR="data/rs"
|
||||||
|
|
||||||
python main.py --data-dir $DATA_DIR dump-listings --min-price 2500 --max-price 3500 --min-bedrooms 2 --max-bedrooms 4 --district islington -t rent
|
python main.py --data-dir $DATA_DIR dump-listings --min-price 2000 --max-price 4000 --min-bedrooms 2 --max-bedrooms 4 -t rent
|
||||||
python main.py --data-dir $DATA_DIR dump-details
|
python main.py --data-dir $DATA_DIR dump-details
|
||||||
python main.py --data-dir $DATA_DIR dump-images
|
python main.py --data-dir $DATA_DIR dump-images
|
||||||
python main.py --data-dir $DATA_DIR detect-floorplan
|
python main.py --data-dir $DATA_DIR detect-floorplan
|
||||||
#python main.py --data-dir $DATA_DIR routing --destination-address 'Meta Brock Street' -m transit # NOTE: THIS CONSUMES API CALLS; USE CAREFULLY; add -l to limit number of entries
|
#python main.py --data-dir $DATA_DIR routing --destination-address 'Meta Brock Street' -m transit # NOTE: THIS CONSUMES API CALLS; USE CAREFULLY; add -l to limit number of entries
|
||||||
python main.py --data-dir $DATA_DIR export-csv -O data/listings.csv
|
python main.py --data-dir $DATA_DIR export-csv -O data/listings.csv
|
||||||
|
python main.py --data-dir $DATA_DIR export-immoweb -O ../immoweb/data/london_geojs.js
|
||||||
|
|
|
||||||
43
crawler/ui_exporter.py
Normal file
43
crawler/ui_exporter.py
Normal file
|
|
@ -0,0 +1,43 @@
|
||||||
|
|
||||||
|
import json
|
||||||
|
import pathlib
|
||||||
|
|
||||||
|
from data_access import Listing
|
||||||
|
|
||||||
|
|
||||||
|
async def export_immoweb(ctx, output_file: str):
|
||||||
|
data_dir = ctx.obj['data_dir']
|
||||||
|
output_file_path = pathlib.Path(output_file)
|
||||||
|
listing_paths = sorted(list(pathlib.Path(data_dir).glob("*/listing.json")))
|
||||||
|
# listing_paths = listing_paths[:10]
|
||||||
|
listings = Listing.get_all_listings([str(path) for path in listing_paths])
|
||||||
|
|
||||||
|
# Convert listings to immoweb format
|
||||||
|
immoweb_listings = []
|
||||||
|
for listing in listings:
|
||||||
|
immoweb_listing = {
|
||||||
|
'type': 'Feature',
|
||||||
|
'properties': {
|
||||||
|
'city': 'London', # change me
|
||||||
|
'country': 'United Kingdom',
|
||||||
|
'qm': await listing.sqm_ocr(),
|
||||||
|
'qmprice': await listing.price_per_sqm(),
|
||||||
|
'rooms': listing.bedrooms,
|
||||||
|
'total_price': listing.price,
|
||||||
|
},
|
||||||
|
'geometry': {
|
||||||
|
'coordinates': [
|
||||||
|
listing.longitude,
|
||||||
|
listing.latitude,
|
||||||
|
],
|
||||||
|
'type': 'Point',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
immoweb_listings.append(immoweb_listing)
|
||||||
|
|
||||||
|
prefix = 'var data = '
|
||||||
|
serialized_data = {"type": "FeatureCollection", "features": immoweb_listings}
|
||||||
|
result = prefix + json.dumps(serialized_data, indent=4)
|
||||||
|
with open(output_file_path, 'w') as f:
|
||||||
|
f.write(result)
|
||||||
|
# json.dump(serialized_data, f, indent=4)
|
||||||
Loading…
Add table
Add a link
Reference in a new issue