add uvicorn logger when dumping listings
This commit is contained in:
parent
24b41ae2ed
commit
3d024277fb
1 changed files with 12 additions and 2 deletions
|
|
@ -16,7 +16,7 @@ from models import Listing as modelListing
|
|||
dump_images_module = importlib.import_module("3_dump_images")
|
||||
detect_floorplan_module = importlib.import_module("4_detect_floorplan")
|
||||
|
||||
logger = logging.getLogger("uvicorn")
|
||||
logger = logging.getLogger("uvicorn.error")
|
||||
|
||||
|
||||
async def dump_listings_full(
|
||||
|
|
@ -26,8 +26,13 @@ async def dump_listings_full(
|
|||
) -> list[modelListing]:
|
||||
"""Fetches all listings, images as well as detects floorplans"""
|
||||
new_listings = await dump_listings(parameters, repository, data_dir)
|
||||
logger.debug(f"Upserted {len(new_listings)} new listings")
|
||||
logger.debug("Starting to fetch floorplans")
|
||||
await dump_images_module.dump_images(repository, image_base_path=data_dir)
|
||||
logger.debug("Completed fetching floorplans")
|
||||
logger.debug("Starting floorplan detection")
|
||||
await detect_floorplan_module.detect_floorplan(repository)
|
||||
logger.debug("Completed floorplan detection")
|
||||
# refresh listings
|
||||
listings = await repository.get_listings(parameters) # this can be better
|
||||
new_listings = [l for l in listings if l.id in new_listings]
|
||||
|
|
@ -47,7 +52,7 @@ async def dump_listings(
|
|||
}
|
||||
else:
|
||||
districts = get_districts()
|
||||
print("Valid districts to scrape:", districts.keys())
|
||||
logger.debug("Valid districts to scrape:", districts.keys())
|
||||
|
||||
semaphore = asyncio.Semaphore(5) # if too high, rightmove drops connections
|
||||
json_responses: list[list[dict[str, Any]]] = await tqdm.gather(
|
||||
|
|
@ -58,6 +63,7 @@ async def dump_listings(
|
|||
desc="Fetching listings",
|
||||
)
|
||||
json_responses_flat = list(itertools.chain.from_iterable(json_responses))
|
||||
logger.debug(f"Total listings fetched {len(json_responses_flat)}")
|
||||
listings: list[Listing] = []
|
||||
for response_json in json_responses_flat:
|
||||
if response_json == {}:
|
||||
|
|
@ -75,6 +81,7 @@ async def dump_listings(
|
|||
missing_listing = [
|
||||
listing for listing in listings if listing.identifier not in all_listing_ids
|
||||
]
|
||||
logger.debug(f"Fetching details for {len(missing_listing)} missing listings")
|
||||
listing_details = await tqdm.gather(
|
||||
*[
|
||||
_fetch_detail_with_semaphore(semaphore, listing.identifier)
|
||||
|
|
@ -85,7 +92,9 @@ async def dump_listings(
|
|||
for listing, detail in zip(missing_listing, listing_details):
|
||||
listing._details_object = detail
|
||||
|
||||
logger.debug("Dumping listings to fs")
|
||||
await dump_listings_to_fs(missing_listing)
|
||||
logger.debug("Upserting listings in db")
|
||||
model_listings = await repository.upsert_listings_legacy(
|
||||
missing_listing
|
||||
) # upsert in db
|
||||
|
|
@ -101,6 +110,7 @@ async def _fetch_listings_with_semaphore(
|
|||
result = []
|
||||
# we don't know how many pages we have but we stop as soon as there's no more
|
||||
for page_id in range(1, 3):
|
||||
logger.debug(f"Processing {page_id=} for {district=}")
|
||||
# seems like all searches stop at 1500 entries (page_id * page_size)
|
||||
async with semaphore:
|
||||
try:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue