reformat most things
This commit is contained in:
parent
bd7c781adb
commit
835494d29f
7 changed files with 85 additions and 50 deletions
|
|
@ -16,19 +16,24 @@ for listing in listings:
|
|||
log.info(f"Removed-Skip: Skipping {listing.identifier} is already removed.")
|
||||
continue
|
||||
if miles > 7:
|
||||
log.info(f"Miles-Skip: Skipping {listing.identifier} as it is {miles} miles away")
|
||||
log.info(
|
||||
f"Miles-Skip: Skipping {listing.identifier} as it is {miles} miles away"
|
||||
)
|
||||
continue
|
||||
if listing.path_routing_json().exists():
|
||||
log.info(f"Path-Skip: Skipping {listing.identifier} as path routing already exists")
|
||||
log.info(
|
||||
f"Path-Skip: Skipping {listing.identifier} as path routing already exists"
|
||||
)
|
||||
continue
|
||||
if listing.sqm_ocr is None or listing.sqm_ocr < 30 or listing.sqm_ocr > 200:
|
||||
log.info(f"Floorplan-Skip: Skipping {listing.identifier} as sqm_ocr is {listing.sqm_ocr}")
|
||||
log.info(
|
||||
f"Floorplan-Skip: Skipping {listing.identifier} as sqm_ocr is {listing.sqm_ocr}"
|
||||
)
|
||||
continue
|
||||
filtered_listings.append(listing)
|
||||
|
||||
print(f"Filtered listings from {len(listings)} to {len(filtered_listings)}")
|
||||
|
||||
|
||||
for listing in tqdm(filtered_listings):
|
||||
lat, long = BROCK_STREET_LAT_LONG
|
||||
listing.calculate_route(lat, long, recalculate=False)
|
||||
|
|
|
|||
|
|
@ -10,4 +10,4 @@ for listing in listings:
|
|||
recalculate_listings.append(listing)
|
||||
|
||||
for listing in tqdm(recalculate_listings):
|
||||
listing.calculate_sqm_ocr(recalculate=True)
|
||||
listing.calculate_sqm_ocr(recalculate=True)
|
||||
|
|
|
|||
|
|
@ -6,7 +6,6 @@ from rec import floorplan, routing
|
|||
import re
|
||||
import datetime
|
||||
|
||||
|
||||
_DATA_DIR = pathlib.Path("data/rs/")
|
||||
|
||||
|
||||
|
|
@ -59,10 +58,10 @@ class Listing:
|
|||
def path_floorplan_file(self, order, name) -> pathlib.Path:
|
||||
self.path_floorplan_folder().mkdir(parents=True, exist_ok=True)
|
||||
return self.path_floorplan_folder() / f"{order}_{name}"
|
||||
|
||||
|
||||
def path_last_seen_listing(self) -> pathlib.Path:
|
||||
return self.path_listing() / "last_seen.json"
|
||||
|
||||
|
||||
def dump_listing(self, d: dict):
|
||||
with open(self.path_listing_json(), "w") as f:
|
||||
json.dump(d, f)
|
||||
|
|
@ -121,7 +120,7 @@ class Listing:
|
|||
"estimated_sqm": estimated_sqm,
|
||||
"text": model_output,
|
||||
}
|
||||
)
|
||||
)
|
||||
|
||||
with open(self.path_floorplan_ocr_json(), "w") as f:
|
||||
json.dump(objs, f)
|
||||
|
|
@ -168,7 +167,7 @@ class Listing:
|
|||
if self._cached is None:
|
||||
with open(self.path_listing_json()) as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
@property
|
||||
def detailobject(self):
|
||||
if self._cached is None:
|
||||
|
|
@ -179,7 +178,7 @@ class Listing:
|
|||
@property
|
||||
def price(self) -> float:
|
||||
return self.detailobject["property"]["price"]
|
||||
|
||||
|
||||
@property
|
||||
def tenure_type(self) -> str:
|
||||
return self.detailobject["property"]["tenureType"]
|
||||
|
|
@ -211,7 +210,7 @@ class Listing:
|
|||
if len(matches):
|
||||
return float(matches[0])
|
||||
return None
|
||||
|
||||
|
||||
@property
|
||||
def updateDaysAgo(self) -> int:
|
||||
ts = self.detailobject["property"]["updateDate"] / 1000
|
||||
|
|
@ -223,7 +222,7 @@ class Listing:
|
|||
def last_seen(self) -> int:
|
||||
if not self.path_last_seen_listing().exists():
|
||||
return None
|
||||
|
||||
|
||||
with open(self.path_last_seen_listing(), 'r') as f:
|
||||
datetime_str = json.load(f)
|
||||
dt = datetime.datetime.fromisoformat(datetime_str)
|
||||
|
|
@ -249,11 +248,11 @@ class Listing:
|
|||
except:
|
||||
print(self.identifier)
|
||||
return False
|
||||
|
||||
|
||||
@property
|
||||
def isRemoved(self) -> bool:
|
||||
return not self.detailobject["property"]["visible"]
|
||||
|
||||
|
||||
@property
|
||||
def status(self) -> str:
|
||||
if self.isRemoved:
|
||||
|
|
@ -263,25 +262,36 @@ class Listing:
|
|||
|
||||
def dict_nicely(self):
|
||||
return {
|
||||
"identifier": self.identifier,
|
||||
"sqm_ocr": self.sqm_ocr,
|
||||
"price": self.price,
|
||||
"price_per_sqm": self.price_per_sqm,
|
||||
"url": self.url,
|
||||
"bedrooms": self.bedrooms,
|
||||
"travel_time_fastest": None
|
||||
if len(self.travel_time) == 0
|
||||
else self.travel_time[0],
|
||||
"travel_time_second": None
|
||||
if len(self.travel_time) < 2
|
||||
else self.travel_time[1],
|
||||
"lease_left": self.leaseLeft,
|
||||
"service_charge": self.serviceCharge,
|
||||
"development": self.development,
|
||||
"tenure_type": self.tenure_type,
|
||||
"updated_days": self.updateDaysAgo,
|
||||
"status": self.status,
|
||||
"last_seen": self.last_seen,
|
||||
"identifier":
|
||||
self.identifier,
|
||||
"sqm_ocr":
|
||||
self.sqm_ocr,
|
||||
"price":
|
||||
self.price,
|
||||
"price_per_sqm":
|
||||
self.price_per_sqm,
|
||||
"url":
|
||||
self.url,
|
||||
"bedrooms":
|
||||
self.bedrooms,
|
||||
"travel_time_fastest":
|
||||
None if len(self.travel_time) == 0 else self.travel_time[0],
|
||||
"travel_time_second":
|
||||
None if len(self.travel_time) < 2 else self.travel_time[1],
|
||||
"lease_left":
|
||||
self.leaseLeft,
|
||||
"service_charge":
|
||||
self.serviceCharge,
|
||||
"development":
|
||||
self.development,
|
||||
"tenure_type":
|
||||
self.tenure_type,
|
||||
"updated_days":
|
||||
self.updateDaysAgo,
|
||||
"status":
|
||||
self.status,
|
||||
"last_seen":
|
||||
self.last_seen,
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -6,7 +6,6 @@ from sqlalchemy.orm import declarative_base
|
|||
engine = create_engine("sqlite:///sqlite.db", echo=True)
|
||||
session = Session(engine)
|
||||
|
||||
|
||||
Base = declarative_base()
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -34,12 +34,16 @@ def calculate_model(image_path):
|
|||
estimated_sqm = extract_total_sqm(output)
|
||||
return estimated_sqm, output, predictions_tensor
|
||||
|
||||
|
||||
def improve_img_for_ocr(img: Image):
|
||||
img2 = np.array(img.convert('L'))
|
||||
cv2.resize(img2, None, fx=1.2, fy=1.2, interpolation=cv2.INTER_CUBIC)
|
||||
thresh = cv2.adaptiveThreshold(img2,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY,11,2)
|
||||
thresh = cv2.adaptiveThreshold(
|
||||
img2, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2
|
||||
)
|
||||
return Image.fromarray(thresh)
|
||||
|
||||
|
||||
def calculate_ocr(image_path):
|
||||
img = Image.open(image_path)
|
||||
text = pytesseract.image_to_string(img)
|
||||
|
|
@ -51,5 +55,5 @@ def calculate_ocr(image_path):
|
|||
with open("recalculating.log", "a") as f:
|
||||
f.write(f"before: {estimated_sqm} after: {estimated_sqm2} - {image_path}\n")
|
||||
return estimated_sqm2, text2
|
||||
|
||||
|
||||
return estimated_sqm, text
|
||||
|
|
|
|||
|
|
@ -2,8 +2,7 @@
|
|||
import enum
|
||||
from typing import List
|
||||
import requests
|
||||
# from rec.db import RightmoveListing
|
||||
|
||||
from rec.db import RightmoveListing
|
||||
import urllib3
|
||||
|
||||
urllib3.disable_warnings()
|
||||
|
|
@ -36,7 +35,10 @@ def detail_query(detail_id: int):
|
|||
url = f"https://api.rightmove.co.uk/api/property/{detail_id}"
|
||||
response = requests.get(url, params=params, headers=headers, verify=False)
|
||||
if response.status_code != 200:
|
||||
raise Exception(f"id: {detail_id}. Status Code: {response.status_code}. Failed due to: {response.text}")
|
||||
raise Exception(
|
||||
f"""id: {detail_id}. Status Code: {response.status_code}."""
|
||||
f"""Failed due to: {response.text}"""
|
||||
)
|
||||
|
||||
return response.json()
|
||||
|
||||
|
|
|
|||
|
|
@ -16,18 +16,34 @@ def transit_route(
|
|||
monday9am = nextMonday()
|
||||
|
||||
header = {
|
||||
"X-Goog-Api-Key": API_KEY,
|
||||
"Content-Type": "application/json",
|
||||
"X-Goog-FieldMask": "routes.distanceMeters,routes.duration,routes.staticDuration,routes.legs.steps.distanceMeters,routes.legs.steps.staticDuration,routes.legs.steps.travelMode",
|
||||
"X-Goog-Api-Key":
|
||||
API_KEY,
|
||||
"Content-Type":
|
||||
"application/json",
|
||||
"X-Goog-FieldMask":
|
||||
"routes.distanceMeters,routes.duration,routes.staticDuration,routes.legs.steps.distanceMeters,routes.legs.steps.staticDuration,routes.legs.steps.travelMode",
|
||||
}
|
||||
|
||||
body = {
|
||||
"origin": {
|
||||
"location": {"latLng": {"latitude": origin_lat, "longitude": origin_lon}}
|
||||
},
|
||||
"destination": {
|
||||
"location": {"latLng": {"latitude": dest_lat, "longitude": dest_lon}}
|
||||
},
|
||||
"origin":
|
||||
{
|
||||
"location":
|
||||
{
|
||||
"latLng": {
|
||||
"latitude": origin_lat,
|
||||
"longitude": origin_lon
|
||||
}
|
||||
}
|
||||
},
|
||||
"destination":
|
||||
{
|
||||
"location": {
|
||||
"latLng": {
|
||||
"latitude": dest_lat,
|
||||
"longitude": dest_lon
|
||||
}
|
||||
}
|
||||
},
|
||||
"travelMode": "TRANSIT",
|
||||
# "2023-10-15T15:01:23.045123456Z"
|
||||
"departureTime": monday9am.strftime("%Y-%m-%dT%H:%M:%S.%fZ"),
|
||||
|
|
@ -97,7 +113,6 @@ if __name__ == "__main__":
|
|||
|
||||
extract_time(d)
|
||||
|
||||
|
||||
# if __name__ == "__main__":
|
||||
# origin = 51.5635664310333, -0.1107173751570373 # home
|
||||
# dest = 51.50475678313417, 0.04915321000190009 # london city airport
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue