From 47347543d26a4d8bb3f69fef86e84cad91e6d919 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Sat, 17 May 2025 23:14:00 +0000 Subject: [PATCH] update runall script to use the click entrypoint --- crawler/3_dump_images.py | 2 +- crawler/runall.sh | 15 +++++++++------ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/crawler/3_dump_images.py b/crawler/3_dump_images.py index ecbbe2c..a659235 100644 --- a/crawler/3_dump_images.py +++ b/crawler/3_dump_images.py @@ -2,7 +2,7 @@ import asyncio import json import pathlib import aiohttp -from tqdm import tqdm +from tqdm.asyncio import tqdm from data_access import Listing # Setting this too high either crashes rightmove or gets us blocked diff --git a/crawler/runall.sh b/crawler/runall.sh index f5311cc..ec02d0d 100755 --- a/crawler/runall.sh +++ b/crawler/runall.sh @@ -2,9 +2,12 @@ set -euxo pipefail -mkdir -p /tmp/re/ -python 1_dump_listings.py | tee -a /tmp/re/1.log | tee -a /tmp/re/log.log -python 2_dump_detail.py | tee -a /tmp/re/2.log | tee -a /tmp/re/log.log -python 3_dump_images.py | tee -a /tmp/re/3.log | tee -a /tmp/re/log.log -python 4_detect_floorplan.py | tee -a /tmp/re/4.log | tee -a /tmp/re/log.log -python 5_routing.py | tee -a /tmp/re/5.log | tee -a /tmp/re/log.log +DATA_DIR="data/rs" + +python main.py --data-dir $DATA_DIR dump-listings --max-price 3500 --min-bedrooms 2 --max-bedrooms 4 --district islington -t rent +python main.py --data-dir $DATA_DIR dump-details +python main.py --data-dir $DATA_DIR dump-images +python main.py --data-dir $DATA_DIR detect-floorplan +#python 5_routing.py | tee -a /tmp/re/5.log | tee -a /tmp/re/log.log +python main.py --data-dir $DATA_DIR export-csv -O data/listings.csv +