From 7e8c79d3d197804f8e66f5c90ec8a81380f7238e Mon Sep 17 00:00:00 2001
From: Viktor Barzin <vbarzin@gmail.com>
Date: Mon, 26 May 2025 19:36:54 +0000
Subject: [PATCH] add command to export the data in a way that the ui (immoweb)
 can consume

---
 crawler/main.py        | 20 ++++++++++++++++++++
 crawler/rec/query.py   |  2 +-
 crawler/runall.sh      |  4 ++--
 crawler/ui_exporter.py | 43 ++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 66 insertions(+), 3 deletions(-)
 create mode 100644 crawler/ui_exporter.py

diff --git a/crawler/main.py b/crawler/main.py
index 7a24d83..ef0dcfd 100644
--- a/crawler/main.py
+++ b/crawler/main.py
@@ -10,6 +10,7 @@ from data_access import Listing
 import csv_exporter
 from rec.query import ListingType, FurnishType
 from rec.routing import API_KEY_ENVIRONMENT_VARIABLE, TravelMode
+from ui_exporter import export_immoweb as export_immoweb_ui
 
 dump_listings_module = importlib.import_module('1_dump_listings')
 dump_detail_module = importlib.import_module('2_dump_detail')
@@ -230,6 +231,25 @@ def export_csv(ctx: click.core.Context, output_file: str, columns: tuple[str]):
     asyncio.run(
         csv_exporter.export_to_csv(listings, output_file_path,
                                    list(columns)), )
+                                
+@cli.command()
+@click.option(
+    '--output-file',
+    '-O',
+    help='Path to the output immoweb file',
+    required=True,
+    type=click.Path(
+        writable=True,
+        file_okay=True,
+        dir_okay=False,
+        resolve_path=True,
+    ),
+)
+@click.pass_context
+def export_immoweb(ctx, output_file: str):
+    click.echo(f'Exporting data to {output_file}')
+    asyncio.run(export_immoweb_ui(ctx, output_file))
+
 
 
 if __name__ == '__main__':
diff --git a/crawler/rec/query.py b/crawler/rec/query.py
index ce0a2bc..47f4fd7 100644
--- a/crawler/rec/query.py
+++ b/crawler/rec/query.py
@@ -71,7 +71,7 @@ async def listing_query(
 ) -> dict[str, Any]:
     params: dict[str, str] = {
         "locationIdentifier": location_id,
-        "channel": channel.upper(),
+        "channel": str(channel).upper(),
         "page": str(page),
         "numberOfPropertiesPerPage": str(page_size),
         "radius": str(radius),
diff --git a/crawler/runall.sh b/crawler/runall.sh
index d372983..c483545 100755
--- a/crawler/runall.sh
+++ b/crawler/runall.sh
@@ -4,10 +4,10 @@ set -euxo pipefail
 
 DATA_DIR="data/rs"
 
-python main.py --data-dir $DATA_DIR dump-listings --min-price 2500 --max-price 3500 --min-bedrooms 2 --max-bedrooms 4 --district islington -t rent
+python main.py --data-dir $DATA_DIR dump-listings --min-price 2000 --max-price 4000 --min-bedrooms 2 --max-bedrooms 4 -t rent
 python main.py --data-dir $DATA_DIR dump-details 
 python main.py --data-dir $DATA_DIR dump-images 
 python main.py --data-dir $DATA_DIR detect-floorplan
 #python main.py --data-dir $DATA_DIR routing --destination-address 'Meta Brock Street' -m transit # NOTE: THIS CONSUMES API CALLS; USE CAREFULLY; add -l to limit number of entries
 python main.py --data-dir $DATA_DIR export-csv -O data/listings.csv
-
+python main.py --data-dir $DATA_DIR export-immoweb -O ../immoweb/data/london_geojs.js
diff --git a/crawler/ui_exporter.py b/crawler/ui_exporter.py
new file mode 100644
index 0000000..24496ff
--- /dev/null
+++ b/crawler/ui_exporter.py
@@ -0,0 +1,43 @@
+
+import json
+import pathlib
+
+from data_access import Listing
+
+
+async def export_immoweb(ctx, output_file: str):
+    data_dir = ctx.obj['data_dir']
+    output_file_path = pathlib.Path(output_file) 
+    listing_paths = sorted(list(pathlib.Path(data_dir).glob("*/listing.json")))
+    # listing_paths = listing_paths[:10]
+    listings = Listing.get_all_listings([str(path) for path in listing_paths])
+    
+    # Convert listings to immoweb format
+    immoweb_listings = []
+    for listing in listings:
+        immoweb_listing = {
+            'type': 'Feature',
+            'properties': {
+                'city': 'London', # change me
+                'country': 'United Kingdom',
+                'qm': await listing.sqm_ocr(),
+                'qmprice': await listing.price_per_sqm(),
+                'rooms': listing.bedrooms,
+                'total_price': listing.price,
+            },
+            'geometry': {
+                'coordinates': [
+                    listing.longitude,
+                    listing.latitude,
+                ],
+                'type': 'Point',
+            }
+        }
+        immoweb_listings.append(immoweb_listing)
+    
+    prefix = 'var data = '
+    serialized_data = {"type": "FeatureCollection", "features": immoweb_listings}
+    result = prefix + json.dumps(serialized_data, indent=4)
+    with open(output_file_path, 'w') as f:
+        f.write(result)
+        # json.dump(serialized_data, f, indent=4)