migrate dump images command to use model listings
This commit is contained in:
parent
4f5a934fa9
commit
ba87d07cd2
6 changed files with 99 additions and 20 deletions
|
|
@ -65,7 +65,7 @@ async def dump_listings(
|
|||
for listing, detail in zip(listings_without_details, listing_details):
|
||||
listing._details_object = detail
|
||||
|
||||
model_listings = await repository.upsert_listings(listings) # upsert in db
|
||||
model_listings = await repository.upsert_listings_legacy(listings) # upsert in db
|
||||
await dump_listings_to_fs(listings)
|
||||
|
||||
return model_listings
|
||||
|
|
|
|||
|
|
@ -1,36 +1,45 @@
|
|||
import asyncio
|
||||
import json
|
||||
from pathlib import Path
|
||||
import aiohttp
|
||||
from repositories import ListingRepository
|
||||
from tqdm.asyncio import tqdm
|
||||
from data_access import Listing
|
||||
|
||||
# from data_access import Listing
|
||||
from models import Listing
|
||||
|
||||
# Setting this too high either crashes rightmove or gets us blocked
|
||||
semaphore = asyncio.Semaphore(10)
|
||||
|
||||
|
||||
async def dump_images(listing_paths: list[str]):
|
||||
listings = Listing.get_all_listings(listing_paths)
|
||||
await tqdm.gather(*[dump_images_for_listing(listing) for listing in listings])
|
||||
async def dump_images(repository: ListingRepository, image_base_path: Path):
|
||||
listings = await repository.get_listings()
|
||||
updated_listings = await tqdm.gather(
|
||||
*[dump_images_for_listing(listing, image_base_path) for listing in listings]
|
||||
)
|
||||
await repository.upsert_listings(
|
||||
[listing for listing in updated_listings if listing is not None]
|
||||
)
|
||||
|
||||
|
||||
async def dump_images_for_listing(listing: Listing):
|
||||
with open(listing.path_detail_json()) as f:
|
||||
detail = json.load(f)
|
||||
|
||||
for photo in detail["property"]["floorplans"]:
|
||||
url = photo["url"]
|
||||
async def dump_images_for_listing(listing: Listing, base_path: Path) -> Listing | None:
|
||||
all_floorplans = listing.additional_info["property"]["floorplans"]
|
||||
for floorplan in all_floorplans:
|
||||
url = floorplan["url"]
|
||||
picname = url.split("/")[-1]
|
||||
order = photo["order"]
|
||||
p = listing.path_floorplan_file(order, picname)
|
||||
if p.exists():
|
||||
floorplan_path = Path(base_path, str(listing.id), "floorplans", picname)
|
||||
if floorplan_path.exists():
|
||||
continue
|
||||
try:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with semaphore:
|
||||
async with semaphore:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.get(url) as response:
|
||||
if response.status != 200:
|
||||
raise Exception(f"Error for {url}: {response.status}")
|
||||
with open(p, "wb") as f:
|
||||
floorplan_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(floorplan_path, "wb") as f:
|
||||
f.write(await response.read())
|
||||
listing.floorplan_image_paths.append(str(floorplan_path))
|
||||
return listing
|
||||
except Exception as e:
|
||||
tqdm.write(f"Error for {url}: {e}")
|
||||
|
|
|
|||
|
|
@ -0,0 +1,34 @@
|
|||
"""add more fields to tables
|
||||
|
||||
Revision ID: 8a7accc583c9
|
||||
Revises: b2ffa638aafc
|
||||
Create Date: 2025-06-07 13:38:08.805386
|
||||
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = '8a7accc583c9'
|
||||
down_revision: Union[str, None] = 'b2ffa638aafc'
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
"""Upgrade schema."""
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
op.add_column('buylisting', sa.Column('floorplan_image_paths', sa.JSON(), nullable=False))
|
||||
op.add_column('rentlisting', sa.Column('floorplan_image_paths', sa.JSON(), nullable=False))
|
||||
# ### end Alembic commands ###
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
"""Downgrade schema."""
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
op.drop_column('rentlisting', 'floorplan_image_paths')
|
||||
op.drop_column('buylisting', 'floorplan_image_paths')
|
||||
# ### end Alembic commands ###
|
||||
|
|
@ -0,0 +1,32 @@
|
|||
"""add more fields to tables
|
||||
|
||||
Revision ID: b2ffa638aafc
|
||||
Revises: b78e1ed31eed
|
||||
Create Date: 2025-06-07 12:18:28.963851
|
||||
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = 'b2ffa638aafc'
|
||||
down_revision: Union[str, None] = 'b78e1ed31eed'
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
"""Upgrade schema."""
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
pass
|
||||
# ### end Alembic commands ###
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
"""Downgrade schema."""
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
pass
|
||||
# ### end Alembic commands ###
|
||||
|
|
@ -166,8 +166,8 @@ def dump_listings(
|
|||
def dump_images(ctx: click.core.Context):
|
||||
data_dir = ctx.obj["data_dir"]
|
||||
click.echo(f"Running dump_images stored in {data_dir}")
|
||||
listing_paths = sorted(list(pathlib.Path(data_dir).glob("*/listing.json")))
|
||||
asyncio.run(dump_images_module.dump_images(listing_paths))
|
||||
repository = ListingRepository(engine=engine)
|
||||
asyncio.run(dump_images_module.dump_images(repository, image_base_path=data_dir))
|
||||
|
||||
|
||||
@cli.command()
|
||||
|
|
|
|||
|
|
@ -1,8 +1,9 @@
|
|||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
import enum
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List
|
||||
from sqlmodel import JSON, Column, Enum, SQLModel, Field
|
||||
from sqlmodel import JSON, Column, Enum, SQLModel, Field, String, TypeDecorator
|
||||
|
||||
|
||||
@dataclass
|
||||
|
|
@ -31,6 +32,9 @@ class Listing(SQLModel, table=False):
|
|||
listing_site: ListingSite = Field(nullable=False)
|
||||
last_seen: datetime = Field(default_factory=datetime.now, nullable=False)
|
||||
photo_thumbnail: str | None = Field(default=None, nullable=True)
|
||||
floorplan_image_paths: List[str] = Field(
|
||||
default_factory=list, sa_type=JSON, nullable=False
|
||||
)
|
||||
additional_info: Dict[str, Any] = Field(
|
||||
default_factory=dict, sa_type=JSON, nullable=False
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue