From 6d343e52e7f092cb275d4d18d9e340380f00a5e8 Mon Sep 17 00:00:00 2001 From: Kadir Date: Sun, 11 Aug 2024 19:36:25 +0100 Subject: [PATCH] adding days updated --- crawler/3_dump_images.py | 5 ++++- crawler/data_access.py | 10 ++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/crawler/3_dump_images.py b/crawler/3_dump_images.py index 8e84217..5792b5b 100644 --- a/crawler/3_dump_images.py +++ b/crawler/3_dump_images.py @@ -25,4 +25,7 @@ for listing in tqdm(Listing.get_all_listings()): if p.exists(): continue tqdm.write(str(p)) - urlretrieve(url, p) + try: + urlretrieve(url, p) + except: + tqdm.write(f"404 for {url}") diff --git a/crawler/data_access.py b/crawler/data_access.py index 9e985eb..9ed4e7b 100644 --- a/crawler/data_access.py +++ b/crawler/data_access.py @@ -4,6 +4,8 @@ import pathlib from typing import List, Dict from rec import floorplan, routing import re +import datetime + _DATA_DIR = pathlib.Path("data/rs/") @@ -193,6 +195,13 @@ class Listing: if len(matches): return float(matches[0]) return None + + @property + def updateDaysAgo(self) -> int: + ts = self.detailobject["property"]["updateDate"] / 1000 + now = datetime.datetime.now() + ds = datetime.datetime.fromtimestamp(ts) + return (now - ds).days @property def serviceCharge(self) -> float: @@ -233,6 +242,7 @@ class Listing: "service_charge": self.serviceCharge, "development": self.development, "tenure_type": self.tenure_type, + "updated_days": self.updateDaysAgo, }