From f98cd0269635635519868d15fbf162629aeb97a6 Mon Sep 17 00:00:00 2001 From: Kadir Date: Mon, 6 May 2024 18:54:55 +0100 Subject: [PATCH] adding better exception messages and interrupt free crawling --- crawler/2_dump_detail.py | 5 ++--- crawler/data_access.py | 5 +++++ crawler/rec/query.py | 2 +- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/crawler/2_dump_detail.py b/crawler/2_dump_detail.py index 62cad80..4a1c295 100644 --- a/crawler/2_dump_detail.py +++ b/crawler/2_dump_detail.py @@ -15,6 +15,5 @@ for listing in tqdm(filtered_listings): d = detail_query(listing.identifier) with open(listing.path_detail_json(), "w") as f: json.dump(d, f) - except: - print("Failed at: ", listing.identifier) - raise + except Exception as e: + print(e) diff --git a/crawler/data_access.py b/crawler/data_access.py index f0aedef..9e985eb 100644 --- a/crawler/data_access.py +++ b/crawler/data_access.py @@ -161,6 +161,10 @@ class Listing: @property def price(self) -> float: return self.detailobject["property"]["price"] + + @property + def tenure_type(self) -> str: + return self.detailobject["property"]["tenureType"] @property def price_per_sqm(self) -> float: @@ -228,6 +232,7 @@ class Listing: "lease_left": self.leaseLeft, "service_charge": self.serviceCharge, "development": self.development, + "tenure_type": self.tenure_type, } diff --git a/crawler/rec/query.py b/crawler/rec/query.py index 4e5c4e3..fb67f98 100644 --- a/crawler/rec/query.py +++ b/crawler/rec/query.py @@ -36,7 +36,7 @@ def detail_query(detail_id: int): url = f"https://api.rightmove.co.uk/api/property/{detail_id}" response = requests.get(url, params=params, headers=headers, verify=False) if response.status_code != 200: - raise Exception("Failed due to: ", response.text) + raise Exception(f"id: {detail_id}. Status Code: {response.status_code}. Failed due to: {response.text}") return response.json()