reformat most things

This commit is contained in:
Viktor Barzin 2025-05-07 21:25:40 +00:00
parent bd7c781adb
commit 835494d29f
No known key found for this signature in database
GPG key ID: 4056458DBDBF8863
7 changed files with 85 additions and 50 deletions

View file

@ -16,19 +16,24 @@ for listing in listings:
log.info(f"Removed-Skip: Skipping {listing.identifier} is already removed.") log.info(f"Removed-Skip: Skipping {listing.identifier} is already removed.")
continue continue
if miles > 7: if miles > 7:
log.info(f"Miles-Skip: Skipping {listing.identifier} as it is {miles} miles away") log.info(
f"Miles-Skip: Skipping {listing.identifier} as it is {miles} miles away"
)
continue continue
if listing.path_routing_json().exists(): if listing.path_routing_json().exists():
log.info(f"Path-Skip: Skipping {listing.identifier} as path routing already exists") log.info(
f"Path-Skip: Skipping {listing.identifier} as path routing already exists"
)
continue continue
if listing.sqm_ocr is None or listing.sqm_ocr < 30 or listing.sqm_ocr > 200: if listing.sqm_ocr is None or listing.sqm_ocr < 30 or listing.sqm_ocr > 200:
log.info(f"Floorplan-Skip: Skipping {listing.identifier} as sqm_ocr is {listing.sqm_ocr}") log.info(
f"Floorplan-Skip: Skipping {listing.identifier} as sqm_ocr is {listing.sqm_ocr}"
)
continue continue
filtered_listings.append(listing) filtered_listings.append(listing)
print(f"Filtered listings from {len(listings)} to {len(filtered_listings)}") print(f"Filtered listings from {len(listings)} to {len(filtered_listings)}")
for listing in tqdm(filtered_listings): for listing in tqdm(filtered_listings):
lat, long = BROCK_STREET_LAT_LONG lat, long = BROCK_STREET_LAT_LONG
listing.calculate_route(lat, long, recalculate=False) listing.calculate_route(lat, long, recalculate=False)

View file

@ -6,7 +6,6 @@ from rec import floorplan, routing
import re import re
import datetime import datetime
_DATA_DIR = pathlib.Path("data/rs/") _DATA_DIR = pathlib.Path("data/rs/")
@ -263,25 +262,36 @@ class Listing:
def dict_nicely(self): def dict_nicely(self):
return { return {
"identifier": self.identifier, "identifier":
"sqm_ocr": self.sqm_ocr, self.identifier,
"price": self.price, "sqm_ocr":
"price_per_sqm": self.price_per_sqm, self.sqm_ocr,
"url": self.url, "price":
"bedrooms": self.bedrooms, self.price,
"travel_time_fastest": None "price_per_sqm":
if len(self.travel_time) == 0 self.price_per_sqm,
else self.travel_time[0], "url":
"travel_time_second": None self.url,
if len(self.travel_time) < 2 "bedrooms":
else self.travel_time[1], self.bedrooms,
"lease_left": self.leaseLeft, "travel_time_fastest":
"service_charge": self.serviceCharge, None if len(self.travel_time) == 0 else self.travel_time[0],
"development": self.development, "travel_time_second":
"tenure_type": self.tenure_type, None if len(self.travel_time) < 2 else self.travel_time[1],
"updated_days": self.updateDaysAgo, "lease_left":
"status": self.status, self.leaseLeft,
"last_seen": self.last_seen, "service_charge":
self.serviceCharge,
"development":
self.development,
"tenure_type":
self.tenure_type,
"updated_days":
self.updateDaysAgo,
"status":
self.status,
"last_seen":
self.last_seen,
} }

View file

@ -6,7 +6,6 @@ from sqlalchemy.orm import declarative_base
engine = create_engine("sqlite:///sqlite.db", echo=True) engine = create_engine("sqlite:///sqlite.db", echo=True)
session = Session(engine) session = Session(engine)
Base = declarative_base() Base = declarative_base()

View file

@ -34,12 +34,16 @@ def calculate_model(image_path):
estimated_sqm = extract_total_sqm(output) estimated_sqm = extract_total_sqm(output)
return estimated_sqm, output, predictions_tensor return estimated_sqm, output, predictions_tensor
def improve_img_for_ocr(img: Image): def improve_img_for_ocr(img: Image):
img2 = np.array(img.convert('L')) img2 = np.array(img.convert('L'))
cv2.resize(img2, None, fx=1.2, fy=1.2, interpolation=cv2.INTER_CUBIC) cv2.resize(img2, None, fx=1.2, fy=1.2, interpolation=cv2.INTER_CUBIC)
thresh = cv2.adaptiveThreshold(img2,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY,11,2) thresh = cv2.adaptiveThreshold(
img2, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2
)
return Image.fromarray(thresh) return Image.fromarray(thresh)
def calculate_ocr(image_path): def calculate_ocr(image_path):
img = Image.open(image_path) img = Image.open(image_path)
text = pytesseract.image_to_string(img) text = pytesseract.image_to_string(img)

View file

@ -2,8 +2,7 @@
import enum import enum
from typing import List from typing import List
import requests import requests
# from rec.db import RightmoveListing from rec.db import RightmoveListing
import urllib3 import urllib3
urllib3.disable_warnings() urllib3.disable_warnings()
@ -36,7 +35,10 @@ def detail_query(detail_id: int):
url = f"https://api.rightmove.co.uk/api/property/{detail_id}" url = f"https://api.rightmove.co.uk/api/property/{detail_id}"
response = requests.get(url, params=params, headers=headers, verify=False) response = requests.get(url, params=params, headers=headers, verify=False)
if response.status_code != 200: if response.status_code != 200:
raise Exception(f"id: {detail_id}. Status Code: {response.status_code}. Failed due to: {response.text}") raise Exception(
f"""id: {detail_id}. Status Code: {response.status_code}."""
f"""Failed due to: {response.text}"""
)
return response.json() return response.json()

View file

@ -16,18 +16,34 @@ def transit_route(
monday9am = nextMonday() monday9am = nextMonday()
header = { header = {
"X-Goog-Api-Key": API_KEY, "X-Goog-Api-Key":
"Content-Type": "application/json", API_KEY,
"X-Goog-FieldMask": "routes.distanceMeters,routes.duration,routes.staticDuration,routes.legs.steps.distanceMeters,routes.legs.steps.staticDuration,routes.legs.steps.travelMode", "Content-Type":
"application/json",
"X-Goog-FieldMask":
"routes.distanceMeters,routes.duration,routes.staticDuration,routes.legs.steps.distanceMeters,routes.legs.steps.staticDuration,routes.legs.steps.travelMode",
} }
body = { body = {
"origin": { "origin":
"location": {"latLng": {"latitude": origin_lat, "longitude": origin_lon}} {
}, "location":
"destination": { {
"location": {"latLng": {"latitude": dest_lat, "longitude": dest_lon}} "latLng": {
}, "latitude": origin_lat,
"longitude": origin_lon
}
}
},
"destination":
{
"location": {
"latLng": {
"latitude": dest_lat,
"longitude": dest_lon
}
}
},
"travelMode": "TRANSIT", "travelMode": "TRANSIT",
# "2023-10-15T15:01:23.045123456Z" # "2023-10-15T15:01:23.045123456Z"
"departureTime": monday9am.strftime("%Y-%m-%dT%H:%M:%S.%fZ"), "departureTime": monday9am.strftime("%Y-%m-%dT%H:%M:%S.%fZ"),
@ -97,7 +113,6 @@ if __name__ == "__main__":
extract_time(d) extract_time(d)
# if __name__ == "__main__": # if __name__ == "__main__":
# origin = 51.5635664310333, -0.1107173751570373 # home # origin = 51.5635664310333, -0.1107173751570373 # home
# dest = 51.50475678313417, 0.04915321000190009 # london city airport # dest = 51.50475678313417, 0.04915321000190009 # london city airport