diff --git a/crawler/3_dump_images.py b/crawler/3_dump_images.py index ecbbe2c..a659235 100644 --- a/crawler/3_dump_images.py +++ b/crawler/3_dump_images.py @@ -2,7 +2,7 @@ import asyncio import json import pathlib import aiohttp -from tqdm import tqdm +from tqdm.asyncio import tqdm from data_access import Listing # Setting this too high either crashes rightmove or gets us blocked diff --git a/crawler/runall.sh b/crawler/runall.sh index f5311cc..ec02d0d 100755 --- a/crawler/runall.sh +++ b/crawler/runall.sh @@ -2,9 +2,12 @@ set -euxo pipefail -mkdir -p /tmp/re/ -python 1_dump_listings.py | tee -a /tmp/re/1.log | tee -a /tmp/re/log.log -python 2_dump_detail.py | tee -a /tmp/re/2.log | tee -a /tmp/re/log.log -python 3_dump_images.py | tee -a /tmp/re/3.log | tee -a /tmp/re/log.log -python 4_detect_floorplan.py | tee -a /tmp/re/4.log | tee -a /tmp/re/log.log -python 5_routing.py | tee -a /tmp/re/5.log | tee -a /tmp/re/log.log +DATA_DIR="data/rs" + +python main.py --data-dir $DATA_DIR dump-listings --max-price 3500 --min-bedrooms 2 --max-bedrooms 4 --district islington -t rent +python main.py --data-dir $DATA_DIR dump-details +python main.py --data-dir $DATA_DIR dump-images +python main.py --data-dir $DATA_DIR detect-floorplan +#python 5_routing.py | tee -a /tmp/re/5.log | tee -a /tmp/re/log.log +python main.py --data-dir $DATA_DIR export-csv -O data/listings.csv +