add runall script, update parameters to 4 bed etc and allow incremental updating
This commit is contained in:
parent
dbf72e42e3
commit
4b6b8628c2
3 changed files with 20 additions and 3 deletions
|
|
@ -14,12 +14,13 @@ for district, locid in districts.items():
|
||||||
d = listing_query(
|
d = listing_query(
|
||||||
page=i,
|
page=i,
|
||||||
min_bedrooms=1,
|
min_bedrooms=1,
|
||||||
max_bedrooms=3,
|
max_bedrooms=4,
|
||||||
radius=0,
|
radius=0,
|
||||||
min_price=0,
|
min_price=0,
|
||||||
max_price=800000,
|
max_price=1000000,
|
||||||
location_id=locid,
|
location_id=locid,
|
||||||
page_size=500,
|
page_size=500,
|
||||||
|
max_days_since_added=7,
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(e)
|
print(e)
|
||||||
|
|
|
||||||
|
|
@ -4,10 +4,16 @@ from tqdm import tqdm
|
||||||
|
|
||||||
from data_access import Listing
|
from data_access import Listing
|
||||||
|
|
||||||
|
incremental = True
|
||||||
|
|
||||||
|
|
||||||
listings = Listing.get_all_listings()
|
listings = Listing.get_all_listings()
|
||||||
filtered_listings = []
|
filtered_listings = []
|
||||||
for listing in listings:
|
for listing in listings:
|
||||||
if not listing.path_detail_json().exists():
|
if not incremental and not listing.isRemoved:
|
||||||
|
filtered_listings.append(listing)
|
||||||
|
|
||||||
|
if incremental and not listing.path_detail_json().exists():
|
||||||
filtered_listings.append(listing)
|
filtered_listings.append(listing)
|
||||||
|
|
||||||
for listing in tqdm(filtered_listings):
|
for listing in tqdm(filtered_listings):
|
||||||
|
|
|
||||||
10
crawler/runall.sh
Executable file
10
crawler/runall.sh
Executable file
|
|
@ -0,0 +1,10 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -euxo pipefail
|
||||||
|
|
||||||
|
mkdir -p /tmp/re/
|
||||||
|
python 1_dump_listings.py | tee -a /tmp/re/1.log | tee -a /tmp/re/log.log
|
||||||
|
python 2_dump_detail.py | tee -a /tmp/re/2.log | tee -a /tmp/re/log.log
|
||||||
|
python 3_dump_images.py | tee -a /tmp/re/3.log | tee -a /tmp/re/log.log
|
||||||
|
python 4_detect_floorplan.py | tee -a /tmp/re/4.log | tee -a /tmp/re/log.log
|
||||||
|
python 5_routing.py | tee -a /tmp/re/5.log | tee -a /tmp/re/log.log
|
||||||
Loading…
Add table
Add a link
Reference in a new issue