76 lines
2.7 KiB
Python
76 lines
2.7 KiB
Python
"""Reddit source — fetches hot posts from financial subreddits via asyncpraw."""
|
|
|
|
import hashlib
|
|
import logging
|
|
from datetime import datetime, timezone
|
|
|
|
from shared.schemas.news import RawArticle
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class RedditSource:
|
|
"""Fetches hot posts from Reddit and converts them to :class:`RawArticle`."""
|
|
|
|
def __init__(
|
|
self,
|
|
subreddits: list[str],
|
|
client_id: str,
|
|
client_secret: str,
|
|
user_agent: str,
|
|
min_score: int = 10,
|
|
) -> None:
|
|
self.subreddits = subreddits
|
|
self.client_id = client_id
|
|
self.client_secret = client_secret
|
|
self.user_agent = user_agent
|
|
self.min_score = min_score
|
|
|
|
async def fetch(self) -> list[RawArticle]:
|
|
"""Return hot posts above *min_score* from each configured subreddit.
|
|
|
|
Uses ``asyncpraw`` so the caller must run within an ``async`` context.
|
|
Each Reddit instance is created and closed within this call to avoid
|
|
leaking sessions across poll cycles.
|
|
"""
|
|
import asyncpraw # lazy import so the dep is optional at import time
|
|
|
|
articles: list[RawArticle] = []
|
|
now = datetime.now(timezone.utc)
|
|
|
|
reddit = asyncpraw.Reddit(
|
|
client_id=self.client_id,
|
|
client_secret=self.client_secret,
|
|
user_agent=self.user_agent,
|
|
)
|
|
try:
|
|
for sub_name in self.subreddits:
|
|
try:
|
|
subreddit = await reddit.subreddit(sub_name)
|
|
async for post in subreddit.hot(limit=25):
|
|
if post.score < self.min_score:
|
|
continue
|
|
|
|
content = post.selftext if post.selftext else post.url
|
|
permalink = post.permalink
|
|
content_hash = hashlib.sha256(permalink.encode()).hexdigest()
|
|
published_at = datetime.fromtimestamp(post.created_utc, tz=timezone.utc)
|
|
|
|
articles.append(
|
|
RawArticle(
|
|
source="reddit",
|
|
url=f"https://reddit.com{permalink}",
|
|
title=post.title,
|
|
content=content,
|
|
published_at=published_at,
|
|
fetched_at=now,
|
|
content_hash=content_hash,
|
|
)
|
|
)
|
|
except Exception:
|
|
logger.exception("Failed to fetch subreddit r/%s", sub_name)
|
|
continue
|
|
finally:
|
|
await reddit.close()
|
|
|
|
return articles
|