reformat most things
This commit is contained in:
parent
bd7c781adb
commit
835494d29f
7 changed files with 85 additions and 50 deletions
|
|
@ -16,19 +16,24 @@ for listing in listings:
|
||||||
log.info(f"Removed-Skip: Skipping {listing.identifier} is already removed.")
|
log.info(f"Removed-Skip: Skipping {listing.identifier} is already removed.")
|
||||||
continue
|
continue
|
||||||
if miles > 7:
|
if miles > 7:
|
||||||
log.info(f"Miles-Skip: Skipping {listing.identifier} as it is {miles} miles away")
|
log.info(
|
||||||
|
f"Miles-Skip: Skipping {listing.identifier} as it is {miles} miles away"
|
||||||
|
)
|
||||||
continue
|
continue
|
||||||
if listing.path_routing_json().exists():
|
if listing.path_routing_json().exists():
|
||||||
log.info(f"Path-Skip: Skipping {listing.identifier} as path routing already exists")
|
log.info(
|
||||||
|
f"Path-Skip: Skipping {listing.identifier} as path routing already exists"
|
||||||
|
)
|
||||||
continue
|
continue
|
||||||
if listing.sqm_ocr is None or listing.sqm_ocr < 30 or listing.sqm_ocr > 200:
|
if listing.sqm_ocr is None or listing.sqm_ocr < 30 or listing.sqm_ocr > 200:
|
||||||
log.info(f"Floorplan-Skip: Skipping {listing.identifier} as sqm_ocr is {listing.sqm_ocr}")
|
log.info(
|
||||||
|
f"Floorplan-Skip: Skipping {listing.identifier} as sqm_ocr is {listing.sqm_ocr}"
|
||||||
|
)
|
||||||
continue
|
continue
|
||||||
filtered_listings.append(listing)
|
filtered_listings.append(listing)
|
||||||
|
|
||||||
print(f"Filtered listings from {len(listings)} to {len(filtered_listings)}")
|
print(f"Filtered listings from {len(listings)} to {len(filtered_listings)}")
|
||||||
|
|
||||||
|
|
||||||
for listing in tqdm(filtered_listings):
|
for listing in tqdm(filtered_listings):
|
||||||
lat, long = BROCK_STREET_LAT_LONG
|
lat, long = BROCK_STREET_LAT_LONG
|
||||||
listing.calculate_route(lat, long, recalculate=False)
|
listing.calculate_route(lat, long, recalculate=False)
|
||||||
|
|
|
||||||
|
|
@ -10,4 +10,4 @@ for listing in listings:
|
||||||
recalculate_listings.append(listing)
|
recalculate_listings.append(listing)
|
||||||
|
|
||||||
for listing in tqdm(recalculate_listings):
|
for listing in tqdm(recalculate_listings):
|
||||||
listing.calculate_sqm_ocr(recalculate=True)
|
listing.calculate_sqm_ocr(recalculate=True)
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,6 @@ from rec import floorplan, routing
|
||||||
import re
|
import re
|
||||||
import datetime
|
import datetime
|
||||||
|
|
||||||
|
|
||||||
_DATA_DIR = pathlib.Path("data/rs/")
|
_DATA_DIR = pathlib.Path("data/rs/")
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -59,10 +58,10 @@ class Listing:
|
||||||
def path_floorplan_file(self, order, name) -> pathlib.Path:
|
def path_floorplan_file(self, order, name) -> pathlib.Path:
|
||||||
self.path_floorplan_folder().mkdir(parents=True, exist_ok=True)
|
self.path_floorplan_folder().mkdir(parents=True, exist_ok=True)
|
||||||
return self.path_floorplan_folder() / f"{order}_{name}"
|
return self.path_floorplan_folder() / f"{order}_{name}"
|
||||||
|
|
||||||
def path_last_seen_listing(self) -> pathlib.Path:
|
def path_last_seen_listing(self) -> pathlib.Path:
|
||||||
return self.path_listing() / "last_seen.json"
|
return self.path_listing() / "last_seen.json"
|
||||||
|
|
||||||
def dump_listing(self, d: dict):
|
def dump_listing(self, d: dict):
|
||||||
with open(self.path_listing_json(), "w") as f:
|
with open(self.path_listing_json(), "w") as f:
|
||||||
json.dump(d, f)
|
json.dump(d, f)
|
||||||
|
|
@ -121,7 +120,7 @@ class Listing:
|
||||||
"estimated_sqm": estimated_sqm,
|
"estimated_sqm": estimated_sqm,
|
||||||
"text": model_output,
|
"text": model_output,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
with open(self.path_floorplan_ocr_json(), "w") as f:
|
with open(self.path_floorplan_ocr_json(), "w") as f:
|
||||||
json.dump(objs, f)
|
json.dump(objs, f)
|
||||||
|
|
@ -168,7 +167,7 @@ class Listing:
|
||||||
if self._cached is None:
|
if self._cached is None:
|
||||||
with open(self.path_listing_json()) as f:
|
with open(self.path_listing_json()) as f:
|
||||||
return json.load(f)
|
return json.load(f)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def detailobject(self):
|
def detailobject(self):
|
||||||
if self._cached is None:
|
if self._cached is None:
|
||||||
|
|
@ -179,7 +178,7 @@ class Listing:
|
||||||
@property
|
@property
|
||||||
def price(self) -> float:
|
def price(self) -> float:
|
||||||
return self.detailobject["property"]["price"]
|
return self.detailobject["property"]["price"]
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def tenure_type(self) -> str:
|
def tenure_type(self) -> str:
|
||||||
return self.detailobject["property"]["tenureType"]
|
return self.detailobject["property"]["tenureType"]
|
||||||
|
|
@ -211,7 +210,7 @@ class Listing:
|
||||||
if len(matches):
|
if len(matches):
|
||||||
return float(matches[0])
|
return float(matches[0])
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def updateDaysAgo(self) -> int:
|
def updateDaysAgo(self) -> int:
|
||||||
ts = self.detailobject["property"]["updateDate"] / 1000
|
ts = self.detailobject["property"]["updateDate"] / 1000
|
||||||
|
|
@ -223,7 +222,7 @@ class Listing:
|
||||||
def last_seen(self) -> int:
|
def last_seen(self) -> int:
|
||||||
if not self.path_last_seen_listing().exists():
|
if not self.path_last_seen_listing().exists():
|
||||||
return None
|
return None
|
||||||
|
|
||||||
with open(self.path_last_seen_listing(), 'r') as f:
|
with open(self.path_last_seen_listing(), 'r') as f:
|
||||||
datetime_str = json.load(f)
|
datetime_str = json.load(f)
|
||||||
dt = datetime.datetime.fromisoformat(datetime_str)
|
dt = datetime.datetime.fromisoformat(datetime_str)
|
||||||
|
|
@ -249,11 +248,11 @@ class Listing:
|
||||||
except:
|
except:
|
||||||
print(self.identifier)
|
print(self.identifier)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def isRemoved(self) -> bool:
|
def isRemoved(self) -> bool:
|
||||||
return not self.detailobject["property"]["visible"]
|
return not self.detailobject["property"]["visible"]
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def status(self) -> str:
|
def status(self) -> str:
|
||||||
if self.isRemoved:
|
if self.isRemoved:
|
||||||
|
|
@ -263,25 +262,36 @@ class Listing:
|
||||||
|
|
||||||
def dict_nicely(self):
|
def dict_nicely(self):
|
||||||
return {
|
return {
|
||||||
"identifier": self.identifier,
|
"identifier":
|
||||||
"sqm_ocr": self.sqm_ocr,
|
self.identifier,
|
||||||
"price": self.price,
|
"sqm_ocr":
|
||||||
"price_per_sqm": self.price_per_sqm,
|
self.sqm_ocr,
|
||||||
"url": self.url,
|
"price":
|
||||||
"bedrooms": self.bedrooms,
|
self.price,
|
||||||
"travel_time_fastest": None
|
"price_per_sqm":
|
||||||
if len(self.travel_time) == 0
|
self.price_per_sqm,
|
||||||
else self.travel_time[0],
|
"url":
|
||||||
"travel_time_second": None
|
self.url,
|
||||||
if len(self.travel_time) < 2
|
"bedrooms":
|
||||||
else self.travel_time[1],
|
self.bedrooms,
|
||||||
"lease_left": self.leaseLeft,
|
"travel_time_fastest":
|
||||||
"service_charge": self.serviceCharge,
|
None if len(self.travel_time) == 0 else self.travel_time[0],
|
||||||
"development": self.development,
|
"travel_time_second":
|
||||||
"tenure_type": self.tenure_type,
|
None if len(self.travel_time) < 2 else self.travel_time[1],
|
||||||
"updated_days": self.updateDaysAgo,
|
"lease_left":
|
||||||
"status": self.status,
|
self.leaseLeft,
|
||||||
"last_seen": self.last_seen,
|
"service_charge":
|
||||||
|
self.serviceCharge,
|
||||||
|
"development":
|
||||||
|
self.development,
|
||||||
|
"tenure_type":
|
||||||
|
self.tenure_type,
|
||||||
|
"updated_days":
|
||||||
|
self.updateDaysAgo,
|
||||||
|
"status":
|
||||||
|
self.status,
|
||||||
|
"last_seen":
|
||||||
|
self.last_seen,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,6 @@ from sqlalchemy.orm import declarative_base
|
||||||
engine = create_engine("sqlite:///sqlite.db", echo=True)
|
engine = create_engine("sqlite:///sqlite.db", echo=True)
|
||||||
session = Session(engine)
|
session = Session(engine)
|
||||||
|
|
||||||
|
|
||||||
Base = declarative_base()
|
Base = declarative_base()
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -34,12 +34,16 @@ def calculate_model(image_path):
|
||||||
estimated_sqm = extract_total_sqm(output)
|
estimated_sqm = extract_total_sqm(output)
|
||||||
return estimated_sqm, output, predictions_tensor
|
return estimated_sqm, output, predictions_tensor
|
||||||
|
|
||||||
|
|
||||||
def improve_img_for_ocr(img: Image):
|
def improve_img_for_ocr(img: Image):
|
||||||
img2 = np.array(img.convert('L'))
|
img2 = np.array(img.convert('L'))
|
||||||
cv2.resize(img2, None, fx=1.2, fy=1.2, interpolation=cv2.INTER_CUBIC)
|
cv2.resize(img2, None, fx=1.2, fy=1.2, interpolation=cv2.INTER_CUBIC)
|
||||||
thresh = cv2.adaptiveThreshold(img2,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY,11,2)
|
thresh = cv2.adaptiveThreshold(
|
||||||
|
img2, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2
|
||||||
|
)
|
||||||
return Image.fromarray(thresh)
|
return Image.fromarray(thresh)
|
||||||
|
|
||||||
|
|
||||||
def calculate_ocr(image_path):
|
def calculate_ocr(image_path):
|
||||||
img = Image.open(image_path)
|
img = Image.open(image_path)
|
||||||
text = pytesseract.image_to_string(img)
|
text = pytesseract.image_to_string(img)
|
||||||
|
|
@ -51,5 +55,5 @@ def calculate_ocr(image_path):
|
||||||
with open("recalculating.log", "a") as f:
|
with open("recalculating.log", "a") as f:
|
||||||
f.write(f"before: {estimated_sqm} after: {estimated_sqm2} - {image_path}\n")
|
f.write(f"before: {estimated_sqm} after: {estimated_sqm2} - {image_path}\n")
|
||||||
return estimated_sqm2, text2
|
return estimated_sqm2, text2
|
||||||
|
|
||||||
return estimated_sqm, text
|
return estimated_sqm, text
|
||||||
|
|
|
||||||
|
|
@ -2,8 +2,7 @@
|
||||||
import enum
|
import enum
|
||||||
from typing import List
|
from typing import List
|
||||||
import requests
|
import requests
|
||||||
# from rec.db import RightmoveListing
|
from rec.db import RightmoveListing
|
||||||
|
|
||||||
import urllib3
|
import urllib3
|
||||||
|
|
||||||
urllib3.disable_warnings()
|
urllib3.disable_warnings()
|
||||||
|
|
@ -36,7 +35,10 @@ def detail_query(detail_id: int):
|
||||||
url = f"https://api.rightmove.co.uk/api/property/{detail_id}"
|
url = f"https://api.rightmove.co.uk/api/property/{detail_id}"
|
||||||
response = requests.get(url, params=params, headers=headers, verify=False)
|
response = requests.get(url, params=params, headers=headers, verify=False)
|
||||||
if response.status_code != 200:
|
if response.status_code != 200:
|
||||||
raise Exception(f"id: {detail_id}. Status Code: {response.status_code}. Failed due to: {response.text}")
|
raise Exception(
|
||||||
|
f"""id: {detail_id}. Status Code: {response.status_code}."""
|
||||||
|
f"""Failed due to: {response.text}"""
|
||||||
|
)
|
||||||
|
|
||||||
return response.json()
|
return response.json()
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -16,18 +16,34 @@ def transit_route(
|
||||||
monday9am = nextMonday()
|
monday9am = nextMonday()
|
||||||
|
|
||||||
header = {
|
header = {
|
||||||
"X-Goog-Api-Key": API_KEY,
|
"X-Goog-Api-Key":
|
||||||
"Content-Type": "application/json",
|
API_KEY,
|
||||||
"X-Goog-FieldMask": "routes.distanceMeters,routes.duration,routes.staticDuration,routes.legs.steps.distanceMeters,routes.legs.steps.staticDuration,routes.legs.steps.travelMode",
|
"Content-Type":
|
||||||
|
"application/json",
|
||||||
|
"X-Goog-FieldMask":
|
||||||
|
"routes.distanceMeters,routes.duration,routes.staticDuration,routes.legs.steps.distanceMeters,routes.legs.steps.staticDuration,routes.legs.steps.travelMode",
|
||||||
}
|
}
|
||||||
|
|
||||||
body = {
|
body = {
|
||||||
"origin": {
|
"origin":
|
||||||
"location": {"latLng": {"latitude": origin_lat, "longitude": origin_lon}}
|
{
|
||||||
},
|
"location":
|
||||||
"destination": {
|
{
|
||||||
"location": {"latLng": {"latitude": dest_lat, "longitude": dest_lon}}
|
"latLng": {
|
||||||
},
|
"latitude": origin_lat,
|
||||||
|
"longitude": origin_lon
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"destination":
|
||||||
|
{
|
||||||
|
"location": {
|
||||||
|
"latLng": {
|
||||||
|
"latitude": dest_lat,
|
||||||
|
"longitude": dest_lon
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"travelMode": "TRANSIT",
|
"travelMode": "TRANSIT",
|
||||||
# "2023-10-15T15:01:23.045123456Z"
|
# "2023-10-15T15:01:23.045123456Z"
|
||||||
"departureTime": monday9am.strftime("%Y-%m-%dT%H:%M:%S.%fZ"),
|
"departureTime": monday9am.strftime("%Y-%m-%dT%H:%M:%S.%fZ"),
|
||||||
|
|
@ -97,7 +113,6 @@ if __name__ == "__main__":
|
||||||
|
|
||||||
extract_time(d)
|
extract_time(d)
|
||||||
|
|
||||||
|
|
||||||
# if __name__ == "__main__":
|
# if __name__ == "__main__":
|
||||||
# origin = 51.5635664310333, -0.1107173751570373 # home
|
# origin = 51.5635664310333, -0.1107173751570373 # home
|
||||||
# dest = 51.50475678313417, 0.04915321000190009 # london city airport
|
# dest = 51.50475678313417, 0.04915321000190009 # london city airport
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue