Flatten repo structure: move crawler/ to root, remove vqa/ and immoweb/

The crawler subdirectory was the only active project. Moving it to the repo root simplifies paths and removes the unnecessary nesting. The vqa/ and immoweb/ directories were legacy/unused and have been removed. Updated .drone.yml, .gitignore, .claude/ docs, and skills to reflect the new flat structure.
2026-02-07 23:01:20 +00:00 · 2026-02-07 23:01:20 +00:00 · eafbc1ac52
commit eafbc1ac52
parent e2247be700
221 changed files with 70 additions and 146140 deletions
--- a/csv_exporter.py
+++ b/csv_exporter.py
@ -0,0 +1,40 @@
+from pathlib import Path
+import pandas as pd
+from models.listing import QueryParameters
+from repositories.listing_repository import ListingRepository
+
+
+async def export_to_csv(
+    repository: ListingRepository,
+    output_file: Path,
+    query_parameters: QueryParameters | None = None,
+) -> None:
+    listings = await repository.get_listings(query_parameters=query_parameters)
+    ds = [listing.__dict__ for listing in listings]
+    df = pd.DataFrame(ds)
+
+    # read decisions on file
+    decisions_path = "data/decisions.json"
+    decisions = pd.read_json(decisions_path)
+    df.loc[:, "decision"] = df.id.apply(lambda x: decisions.get(x))
+
+    # remove _sa_instance_state column
+    drop_columns = ["_sa_instance_state", "additional_info"]
+    df = df.drop(columns=drop_columns)
+
+    # fill in gap values for service charge and lease left for Excel filters
+    if "service_charge" not in df.columns:
+        df.loc[:, "service_charge"] = -1
+    df.loc[:, "service_charge"] = df.service_charge.fillna(-1)
+    if "lease_left" not in df.columns:
+        df.loc[:, "lease_left"] = -1
+    df.loc[:, "lease_left"] = df.lease_left.fillna(-1)
+    if "square_meters" not in df.columns:
+        df.loc[:, "square_meters"] = -1
+    df.loc[:, "square_meters"] = df.square_meters.fillna(-1)
+
+    # Add price per sqm column
+    df.loc[:, "price_per_sqm"] = df.price / df.square_meters
+
+    df = df.sort_values(by=["price_per_sqm"], ascending=True)
+    df.to_csv(str(output_file), index=False)