"""Reddit source — fetches hot posts from financial subreddits via asyncpraw.""" import hashlib import logging from datetime import datetime, timezone from shared.schemas.news import RawArticle logger = logging.getLogger(__name__) class RedditSource: """Fetches hot posts from Reddit and converts them to :class:`RawArticle`.""" def __init__( self, subreddits: list[str], client_id: str, client_secret: str, user_agent: str, min_score: int = 10, ) -> None: self.subreddits = subreddits self.client_id = client_id self.client_secret = client_secret self.user_agent = user_agent self.min_score = min_score async def fetch(self) -> list[RawArticle]: """Return hot posts above *min_score* from each configured subreddit. Uses ``asyncpraw`` so the caller must run within an ``async`` context. Each Reddit instance is created and closed within this call to avoid leaking sessions across poll cycles. """ import asyncpraw # lazy import so the dep is optional at import time articles: list[RawArticle] = [] now = datetime.now(timezone.utc) reddit = asyncpraw.Reddit( client_id=self.client_id, client_secret=self.client_secret, user_agent=self.user_agent, ) try: for sub_name in self.subreddits: try: subreddit = await reddit.subreddit(sub_name) async for post in subreddit.hot(limit=25): if post.score < self.min_score: continue content = post.selftext if post.selftext else post.url permalink = post.permalink content_hash = hashlib.sha256(permalink.encode()).hexdigest() published_at = datetime.fromtimestamp(post.created_utc, tz=timezone.utc) articles.append( RawArticle( source="reddit", url=f"https://reddit.com{permalink}", title=post.title, content=content, published_at=published_at, fetched_at=now, content_hash=content_hash, ) ) except Exception: logger.exception("Failed to fetch subreddit r/%s", sub_name) continue finally: await reddit.close() return articles