add runall script, update parameters to 4 bed etc and allow incremental updating
This commit is contained in:
parent
dbf72e42e3
commit
4b6b8628c2
3 changed files with 20 additions and 3 deletions
|
|
@ -14,12 +14,13 @@ for district, locid in districts.items():
|
|||
d = listing_query(
|
||||
page=i,
|
||||
min_bedrooms=1,
|
||||
max_bedrooms=3,
|
||||
max_bedrooms=4,
|
||||
radius=0,
|
||||
min_price=0,
|
||||
max_price=800000,
|
||||
max_price=1000000,
|
||||
location_id=locid,
|
||||
page_size=500,
|
||||
max_days_since_added=7,
|
||||
)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
|
|
|||
|
|
@ -4,10 +4,16 @@ from tqdm import tqdm
|
|||
|
||||
from data_access import Listing
|
||||
|
||||
incremental = True
|
||||
|
||||
|
||||
listings = Listing.get_all_listings()
|
||||
filtered_listings = []
|
||||
for listing in listings:
|
||||
if not listing.path_detail_json().exists():
|
||||
if not incremental and not listing.isRemoved:
|
||||
filtered_listings.append(listing)
|
||||
|
||||
if incremental and not listing.path_detail_json().exists():
|
||||
filtered_listings.append(listing)
|
||||
|
||||
for listing in tqdm(filtered_listings):
|
||||
|
|
|
|||
10
crawler/runall.sh
Executable file
10
crawler/runall.sh
Executable file
|
|
@ -0,0 +1,10 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
set -euxo pipefail
|
||||
|
||||
mkdir -p /tmp/re/
|
||||
python 1_dump_listings.py | tee -a /tmp/re/1.log | tee -a /tmp/re/log.log
|
||||
python 2_dump_detail.py | tee -a /tmp/re/2.log | tee -a /tmp/re/log.log
|
||||
python 3_dump_images.py | tee -a /tmp/re/3.log | tee -a /tmp/re/log.log
|
||||
python 4_detect_floorplan.py | tee -a /tmp/re/4.log | tee -a /tmp/re/log.log
|
||||
python 5_routing.py | tee -a /tmp/re/5.log | tee -a /tmp/re/log.log
|
||||
Loading…
Add table
Add a link
Reference in a new issue