41 lines
1.3 KiB
Python
41 lines
1.3 KiB
Python
"""Async PRAW wrapper — yields `RawPost` from a subreddit's top listing.
|
|
|
|
We use asyncpraw because the rest of the pipeline is asyncio-native and
|
|
we want to fan out across 12 subs concurrently via `asyncio.gather`.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
from collections.abc import AsyncIterator
|
|
from datetime import date
|
|
from typing import Any, Literal
|
|
|
|
from fire_planner.examples.models import RawPost
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
TopWhen = Literal["all", "year", "month", "week", "day"]
|
|
REDDIT_BASE = "https://www.reddit.com"
|
|
|
|
|
|
async def fetch_top(
|
|
reddit: Any, # asyncpraw.Reddit
|
|
subreddit: str,
|
|
when: TopWhen,
|
|
limit: int = 1000,
|
|
) -> AsyncIterator[RawPost]:
|
|
"""Yield `RawPost`s from `r/{subreddit}/top/?t={when}` (PRAW 1000 cap)."""
|
|
sub = await reddit.subreddit(subreddit)
|
|
async for submission in sub.top(time_filter=when, limit=limit):
|
|
yield _to_raw_post(submission, subreddit)
|
|
|
|
|
|
def _to_raw_post(submission: Any, source_sub: str) -> RawPost:
|
|
return RawPost(
|
|
reddit_id=submission.id,
|
|
source_sub=source_sub,
|
|
url=f"{REDDIT_BASE}{submission.permalink}",
|
|
title=submission.title or "",
|
|
body=submission.selftext or "",
|
|
created_at=date.fromtimestamp(submission.created_utc),
|
|
)
|