reformat most things

This commit is contained in:
Viktor Barzin 2025-05-07 21:25:40 +00:00
parent bd7c781adb
commit 835494d29f
No known key found for this signature in database
GPG key ID: 4056458DBDBF8863
7 changed files with 85 additions and 50 deletions

View file

@ -16,19 +16,24 @@ for listing in listings:
log.info(f"Removed-Skip: Skipping {listing.identifier} is already removed.")
continue
if miles > 7:
log.info(f"Miles-Skip: Skipping {listing.identifier} as it is {miles} miles away")
log.info(
f"Miles-Skip: Skipping {listing.identifier} as it is {miles} miles away"
)
continue
if listing.path_routing_json().exists():
log.info(f"Path-Skip: Skipping {listing.identifier} as path routing already exists")
log.info(
f"Path-Skip: Skipping {listing.identifier} as path routing already exists"
)
continue
if listing.sqm_ocr is None or listing.sqm_ocr < 30 or listing.sqm_ocr > 200:
log.info(f"Floorplan-Skip: Skipping {listing.identifier} as sqm_ocr is {listing.sqm_ocr}")
log.info(
f"Floorplan-Skip: Skipping {listing.identifier} as sqm_ocr is {listing.sqm_ocr}"
)
continue
filtered_listings.append(listing)
print(f"Filtered listings from {len(listings)} to {len(filtered_listings)}")
for listing in tqdm(filtered_listings):
lat, long = BROCK_STREET_LAT_LONG
listing.calculate_route(lat, long, recalculate=False)

View file

@ -10,4 +10,4 @@ for listing in listings:
recalculate_listings.append(listing)
for listing in tqdm(recalculate_listings):
listing.calculate_sqm_ocr(recalculate=True)
listing.calculate_sqm_ocr(recalculate=True)

View file

@ -6,7 +6,6 @@ from rec import floorplan, routing
import re
import datetime
_DATA_DIR = pathlib.Path("data/rs/")
@ -59,10 +58,10 @@ class Listing:
def path_floorplan_file(self, order, name) -> pathlib.Path:
self.path_floorplan_folder().mkdir(parents=True, exist_ok=True)
return self.path_floorplan_folder() / f"{order}_{name}"
def path_last_seen_listing(self) -> pathlib.Path:
return self.path_listing() / "last_seen.json"
def dump_listing(self, d: dict):
with open(self.path_listing_json(), "w") as f:
json.dump(d, f)
@ -121,7 +120,7 @@ class Listing:
"estimated_sqm": estimated_sqm,
"text": model_output,
}
)
)
with open(self.path_floorplan_ocr_json(), "w") as f:
json.dump(objs, f)
@ -168,7 +167,7 @@ class Listing:
if self._cached is None:
with open(self.path_listing_json()) as f:
return json.load(f)
@property
def detailobject(self):
if self._cached is None:
@ -179,7 +178,7 @@ class Listing:
@property
def price(self) -> float:
return self.detailobject["property"]["price"]
@property
def tenure_type(self) -> str:
return self.detailobject["property"]["tenureType"]
@ -211,7 +210,7 @@ class Listing:
if len(matches):
return float(matches[0])
return None
@property
def updateDaysAgo(self) -> int:
ts = self.detailobject["property"]["updateDate"] / 1000
@ -223,7 +222,7 @@ class Listing:
def last_seen(self) -> int:
if not self.path_last_seen_listing().exists():
return None
with open(self.path_last_seen_listing(), 'r') as f:
datetime_str = json.load(f)
dt = datetime.datetime.fromisoformat(datetime_str)
@ -249,11 +248,11 @@ class Listing:
except:
print(self.identifier)
return False
@property
def isRemoved(self) -> bool:
return not self.detailobject["property"]["visible"]
@property
def status(self) -> str:
if self.isRemoved:
@ -263,25 +262,36 @@ class Listing:
def dict_nicely(self):
return {
"identifier": self.identifier,
"sqm_ocr": self.sqm_ocr,
"price": self.price,
"price_per_sqm": self.price_per_sqm,
"url": self.url,
"bedrooms": self.bedrooms,
"travel_time_fastest": None
if len(self.travel_time) == 0
else self.travel_time[0],
"travel_time_second": None
if len(self.travel_time) < 2
else self.travel_time[1],
"lease_left": self.leaseLeft,
"service_charge": self.serviceCharge,
"development": self.development,
"tenure_type": self.tenure_type,
"updated_days": self.updateDaysAgo,
"status": self.status,
"last_seen": self.last_seen,
"identifier":
self.identifier,
"sqm_ocr":
self.sqm_ocr,
"price":
self.price,
"price_per_sqm":
self.price_per_sqm,
"url":
self.url,
"bedrooms":
self.bedrooms,
"travel_time_fastest":
None if len(self.travel_time) == 0 else self.travel_time[0],
"travel_time_second":
None if len(self.travel_time) < 2 else self.travel_time[1],
"lease_left":
self.leaseLeft,
"service_charge":
self.serviceCharge,
"development":
self.development,
"tenure_type":
self.tenure_type,
"updated_days":
self.updateDaysAgo,
"status":
self.status,
"last_seen":
self.last_seen,
}

View file

@ -6,7 +6,6 @@ from sqlalchemy.orm import declarative_base
engine = create_engine("sqlite:///sqlite.db", echo=True)
session = Session(engine)
Base = declarative_base()

View file

@ -34,12 +34,16 @@ def calculate_model(image_path):
estimated_sqm = extract_total_sqm(output)
return estimated_sqm, output, predictions_tensor
def improve_img_for_ocr(img: Image):
img2 = np.array(img.convert('L'))
cv2.resize(img2, None, fx=1.2, fy=1.2, interpolation=cv2.INTER_CUBIC)
thresh = cv2.adaptiveThreshold(img2,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY,11,2)
thresh = cv2.adaptiveThreshold(
img2, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2
)
return Image.fromarray(thresh)
def calculate_ocr(image_path):
img = Image.open(image_path)
text = pytesseract.image_to_string(img)
@ -51,5 +55,5 @@ def calculate_ocr(image_path):
with open("recalculating.log", "a") as f:
f.write(f"before: {estimated_sqm} after: {estimated_sqm2} - {image_path}\n")
return estimated_sqm2, text2
return estimated_sqm, text

View file

@ -2,8 +2,7 @@
import enum
from typing import List
import requests
# from rec.db import RightmoveListing
from rec.db import RightmoveListing
import urllib3
urllib3.disable_warnings()
@ -36,7 +35,10 @@ def detail_query(detail_id: int):
url = f"https://api.rightmove.co.uk/api/property/{detail_id}"
response = requests.get(url, params=params, headers=headers, verify=False)
if response.status_code != 200:
raise Exception(f"id: {detail_id}. Status Code: {response.status_code}. Failed due to: {response.text}")
raise Exception(
f"""id: {detail_id}. Status Code: {response.status_code}."""
f"""Failed due to: {response.text}"""
)
return response.json()

View file

@ -16,18 +16,34 @@ def transit_route(
monday9am = nextMonday()
header = {
"X-Goog-Api-Key": API_KEY,
"Content-Type": "application/json",
"X-Goog-FieldMask": "routes.distanceMeters,routes.duration,routes.staticDuration,routes.legs.steps.distanceMeters,routes.legs.steps.staticDuration,routes.legs.steps.travelMode",
"X-Goog-Api-Key":
API_KEY,
"Content-Type":
"application/json",
"X-Goog-FieldMask":
"routes.distanceMeters,routes.duration,routes.staticDuration,routes.legs.steps.distanceMeters,routes.legs.steps.staticDuration,routes.legs.steps.travelMode",
}
body = {
"origin": {
"location": {"latLng": {"latitude": origin_lat, "longitude": origin_lon}}
},
"destination": {
"location": {"latLng": {"latitude": dest_lat, "longitude": dest_lon}}
},
"origin":
{
"location":
{
"latLng": {
"latitude": origin_lat,
"longitude": origin_lon
}
}
},
"destination":
{
"location": {
"latLng": {
"latitude": dest_lat,
"longitude": dest_lon
}
}
},
"travelMode": "TRANSIT",
# "2023-10-15T15:01:23.045123456Z"
"departureTime": monday9am.strftime("%Y-%m-%dT%H:%M:%S.%fZ"),
@ -97,7 +113,6 @@ if __name__ == "__main__":
extract_time(d)
# if __name__ == "__main__":
# origin = 51.5635664310333, -0.1107173751570373 # home
# dest = 51.50475678313417, 0.04915321000190009 # london city airport