some cleanups

This commit is contained in:
Viktor Barzin 2025-06-08 20:58:28 +00:00
parent 831c45e1f5
commit 289206afc0
No known key found for this signature in database
GPG key ID: 4056458DBDBF8863
8 changed files with 20 additions and 17 deletions

View file

@ -5,7 +5,6 @@ from typing import Any
from rec.query import detail_query, listing_query, QueryParameters
from rec.districts import get_districts
from repositories import ListingRepository
from sqlalchemy import Engine
from tqdm.asyncio import tqdm
from data_access import Listing
from models import Listing as modelListing
@ -61,13 +60,13 @@ async def dump_listings(
for listing in listings_without_details
if listing.identifier not in all_listing_ids
],
desc="Fetching details",
desc="Fetching details (only missing)",
)
for listing, detail in zip(listings_without_details, listing_details):
listing._details_object = detail
model_listings = await repository.upsert_listings_legacy(listings) # upsert in db
await dump_listings_to_fs(listings)
model_listings = await repository.upsert_listings_legacy(listings) # upsert in db
return model_listings

View file

@ -5,7 +5,6 @@ import aiohttp
from repositories import ListingRepository
from tqdm.asyncio import tqdm
# from data_access import Listing
from models import Listing
# Setting this too high either crashes rightmove or gets us blocked

View file

@ -263,9 +263,13 @@ class Listing:
if self.path_detail_json().exists():
with open(self.path_detail_json()) as f:
self._details_object = json.load(f)
return self._details_object # type: ignore
else:
return {}
return self._details_object # type: ignore
raise ValueError(
f"Detail object for listing {self.identifier} not found."
)
else:
return self._details_object
@property
def price(self) -> float:

View file

@ -44,9 +44,9 @@ def listing_filter_options(func):
)
@click.option(
"--max-bedrooms",
default=5,
default=10,
help="Maximum number of bedrooms",
type=click.IntRange(min=1),
type=click.IntRange(min=1, max=10), # Right move gets unhappy with >10
)
@click.option(
"--min-price",
@ -56,9 +56,9 @@ def listing_filter_options(func):
)
@click.option(
"--max-price",
default=1000000,
default=999_999,
help="Maximum price",
type=click.IntRange(min=0),
type=click.IntRange(min=0, max=40_000), # 40k for renting
)
@click.option(
"--district",
@ -359,7 +359,7 @@ def populate_db(
listings = Listing.get_all_listings(
[path for path in pathlib.Path(data_dir).glob("*/listing.json")]
)
asyncio.run(repository.upsert_listings(listings))
asyncio.run(repository.upsert_listings_legacy(listings))
if __name__ == "__main__":

View file

@ -239,7 +239,7 @@ class QueryParameters:
district_names: set[str] = dataclasses.field(default_factory=set)
radius: float = 0
page_size: int = 500 # items per page
max_days_since_added: int = 30
max_days_since_added: int = 14 # for buy listings
furnish_types: list[FurnishType] | None = None
# The values below are not supported by rightmove
# hence we apply them after fetching

View file

@ -84,7 +84,10 @@ async def listing_query(
7,
14,
]:
raise Exception("Invalid max days. Can only be", [1, 3, 7, 14])
raise Exception(
f"Invalid max days - {max_days_since_added} Can only be got",
[1, 3, 7, 14],
)
params["maxDaysSinceAdded"] = str(max_days_since_added)
if mustNewHome:

View file

@ -2,9 +2,9 @@
set -euxo pipefail
DATA_DIR="data/rs/test"
DATA_DIR="data/rs"
LISTING_FILTER_OPTIONS="--min-price 2000 --max-price 4000 --min-bedrooms 2 --max-bedrooms 4 -t rent --available-from $(date +%Y-%m-%d) --last-seen-days 7 --furnish-types furnished"
LISTING_FILTER_OPTIONS="--min-price 2000 --max-price 4000 --min-bedrooms 2 -t rent --available-from $(date +%Y-%m-%d) --last-seen-days 7 --furnish-types furnished"
#LISTING_FILTER_OPTIONS="--min-price 2000 --max-price 2500 --min-bedrooms 2 --max-bedrooms 4 -t rent --available-from $(date +%Y-%m-%d) --last-seen-days 7 --furnish-types furnished --district Islington" # DEBUG: UNCOMMENT ME WHEN TESTING

View file

@ -1,8 +1,6 @@
import dataclasses
import json
import pathlib
from data_access import Listing
from rec.query import QueryParameters
from repositories.listing_repository import ListingRepository