From 8b2025e700d92964ae2f544590e29e7b2fe8acc1 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Wed, 4 Jun 2025 21:56:26 +0000 Subject: [PATCH] add command to dump existing listing from fs to db --- ...4e3b4590920f_add_more_fields_to_tables.py} | 19 ++--- .../f7486e403e2f_add_more_fields_to_tables.py | 81 +++++++++++++++++++ crawler/data_access.py | 13 +++ crawler/database.py | 2 +- crawler/main.py | 18 ++++- crawler/models/listing.py | 1 + 6 files changed, 121 insertions(+), 13 deletions(-) rename crawler/alembic/versions/{0e804449c31d_create_listing_table.py => 4e3b4590920f_add_more_fields_to_tables.py} (62%) create mode 100644 crawler/alembic/versions/f7486e403e2f_add_more_fields_to_tables.py diff --git a/crawler/alembic/versions/0e804449c31d_create_listing_table.py b/crawler/alembic/versions/4e3b4590920f_add_more_fields_to_tables.py similarity index 62% rename from crawler/alembic/versions/0e804449c31d_create_listing_table.py rename to crawler/alembic/versions/4e3b4590920f_add_more_fields_to_tables.py index 13ccb3a..2956453 100644 --- a/crawler/alembic/versions/0e804449c31d_create_listing_table.py +++ b/crawler/alembic/versions/4e3b4590920f_add_more_fields_to_tables.py @@ -1,8 +1,8 @@ -"""Create listing table +"""add more fields to tables -Revision ID: 0e804449c31d -Revises: -Create Date: 2025-06-03 19:54:41.526943 +Revision ID: 4e3b4590920f +Revises: f7486e403e2f +Create Date: 2025-06-04 21:45:41.383520 """ from typing import Sequence, Union @@ -12,8 +12,8 @@ import sqlalchemy as sa # revision identifiers, used by Alembic. -revision: str = '0e804449c31d' -down_revision: Union[str, None] = None +revision: str = '4e3b4590920f' +down_revision: Union[str, None] = 'f7486e403e2f' branch_labels: Union[str, Sequence[str], None] = None depends_on: Union[str, Sequence[str], None] = None @@ -21,15 +21,12 @@ depends_on: Union[str, Sequence[str], None] = None def upgrade() -> None: """Upgrade schema.""" # ### commands auto generated by Alembic - please adjust! ### - op.create_table('listing', - sa.Column('id', sa.Integer(), nullable=False), - sa.PrimaryKeyConstraint('id') - ) + op.add_column('buylisting', sa.Column('lease_left', sa.Integer(), nullable=True)) # ### end Alembic commands ### def downgrade() -> None: """Downgrade schema.""" # ### commands auto generated by Alembic - please adjust! ### - op.drop_table('listing') + op.drop_column('buylisting', 'lease_left') # ### end Alembic commands ### diff --git a/crawler/alembic/versions/f7486e403e2f_add_more_fields_to_tables.py b/crawler/alembic/versions/f7486e403e2f_add_more_fields_to_tables.py new file mode 100644 index 0000000..66d85c9 --- /dev/null +++ b/crawler/alembic/versions/f7486e403e2f_add_more_fields_to_tables.py @@ -0,0 +1,81 @@ +"""add more fields to tables + +Revision ID: f7486e403e2f +Revises: +Create Date: 2025-06-04 20:54:13.838969 + +""" + +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +import sqlmodel + + +# revision identifiers, used by Alembic. +revision: str = "f7486e403e2f" +down_revision: Union[str, None] = None +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Upgrade schema.""" + # ### commands auto generated by Alembic - please adjust! ### + op.create_table( + "buylisting", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column("price", sa.Float(), nullable=False), + sa.Column("number_of_bedrooms", sa.Integer(), nullable=False), + sa.Column("square_meters", sa.Float(), nullable=True), + sa.Column("agency", sqlmodel.sql.sqltypes.AutoString(), nullable=True), + sa.Column("longtitude", sa.Float(), nullable=False), + sa.Column("latitude", sa.Float(), nullable=False), + sa.Column("price_history", sa.JSON(), nullable=False), + sa.Column( + "listing_site", sa.Enum("RIGHTMOVE", name="listingsite"), nullable=False + ), + sa.Column("last_seen", sa.DateTime(), nullable=False), + sa.Column("photo_thumbnail", sqlmodel.sql.sqltypes.AutoString(), nullable=True), + sa.Column("service_charge", sa.Float(), nullable=True), + sa.Column( + "council_tax_band", sqlmodel.sql.sqltypes.AutoString(), nullable=True + ), + sa.PrimaryKeyConstraint("id"), + ) + op.create_table( + "rentlisting", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column("price", sa.Float(), nullable=False), + sa.Column("number_of_bedrooms", sa.Integer(), nullable=False), + sa.Column("square_meters", sa.Float(), nullable=True), + sa.Column("agency", sqlmodel.sql.sqltypes.AutoString(), nullable=True), + sa.Column( + "council_tax_band", sqlmodel.sql.sqltypes.AutoString(), nullable=True + ), + sa.Column("longtitude", sa.Float(), nullable=False), + sa.Column("latitude", sa.Float(), nullable=False), + sa.Column("price_history", sa.JSON(), nullable=False), + sa.Column( + "listing_site", sa.Enum("RIGHTMOVE", name="listingsite"), nullable=False + ), + sa.Column("last_seen", sa.DateTime(), nullable=False), + sa.Column("photo_thumbnail", sqlmodel.sql.sqltypes.AutoString(), nullable=True), + sa.Column("available_from", sa.DateTime(), nullable=True), + sa.Column( + "furnish_type", + sa.Enum("FURNISHED", "UNFURNISHED", "PART_FURNISHED", name="furnishtype"), + nullable=False, + ), + sa.PrimaryKeyConstraint("id"), + ) + # ### end Alembic commands ### + + +def downgrade() -> None: + """Downgrade schema.""" + # ### commands auto generated by Alembic - please adjust! ### + op.drop_table("rentlisting") + op.drop_table("buylisting") + # ### end Alembic commands ### diff --git a/crawler/data_access.py b/crawler/data_access.py index a2a9838..42de8e1 100644 --- a/crawler/data_access.py +++ b/crawler/data_access.py @@ -4,6 +4,7 @@ from dataclasses import dataclass import json import pathlib from typing import Any, List, Dict +from models.listing import ListingSite from rec import floorplan, routing import re import datetime @@ -379,6 +380,18 @@ class Listing: with open(self.path_price_history(), "r") as f: return json.load(f) + @property + def longtitude(self) -> float: + return self.detailobject["property"]["longitude"] + + @property + def latitude(self) -> float: + return self.detailobject["property"]["latitude"] + + @property + def listing_site(self) -> ListingSite: + return ListingSite.RIGHTMOVE # this class supports only right move + async def dict_nicely(self): travel_time_fastest = {} travel_time_second = {} diff --git a/crawler/database.py b/crawler/database.py index b92f677..cef774d 100644 --- a/crawler/database.py +++ b/crawler/database.py @@ -5,7 +5,7 @@ from sqlalchemy.orm import sessionmaker # DATABASE_URL = "postgresql://user:password@localhost/db_name" DATABASE_URL = "sqlite:///data/wrongmove.db" -engine = create_engine(DATABASE_URL, echo=True) # `echo=True` for debug logs +engine = create_engine(DATABASE_URL, echo=False) # `echo=True` for debug logs SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) diff --git a/crawler/main.py b/crawler/main.py index e9b4dce..307944a 100644 --- a/crawler/main.py +++ b/crawler/main.py @@ -11,8 +11,10 @@ from data_access import Listing import csv_exporter from rec.query import ListingType, FurnishType, QueryParameters from rec.routing import API_KEY_ENVIRONMENT_VARIABLE, TravelMode +from repositories.listing_repositorty import ListingRepository from ui_exporter import export_immoweb as export_immoweb_ui from functools import wraps +from database import engine dump_listings_module = importlib.import_module("1_dump_listings") @@ -319,7 +321,7 @@ def export_csv( @listing_filter_options @click.pass_context def export_immoweb( - ctx, + ctx: click.core.Context, output_file: str, district: list[str], min_bedrooms: int, @@ -350,5 +352,19 @@ def export_immoweb( asyncio.run(export_immoweb_ui(ctx, output_file, query_parameters)) +@cli.command() +@click.pass_context +def populate_db( + ctx: click.core.Context, +): + data_dir = ctx.obj["data_dir"] + click.echo(f"Populating the database with data from {data_dir}") + repository = ListingRepository(engine=engine) + listings = Listing.get_all_listings( + [str(path) for path in pathlib.Path(data_dir).glob("*/listing.json")] + ) + asyncio.run(repository.upsert_listings(listings)) + + if __name__ == "__main__": cli() diff --git a/crawler/models/listing.py b/crawler/models/listing.py index 3855a94..568f72e 100644 --- a/crawler/models/listing.py +++ b/crawler/models/listing.py @@ -37,6 +37,7 @@ class FurnishType(enum.StrEnum): FURNISHED = "furnished" UNFURNISHED = "unfurnished" PART_FURNISHED = "partFurnished" + UNKNOWN = "unknown" class RentListing(Listing, table=True):