I1: Add graceful shutdown (SIGTERM/SIGINT) to all 5 background services I2: Fix Dockerfile healthcheck to use curl on /metrics endpoint I3: Fix StreamConsumer.ensure_group() to only catch BUSYGROUP errors I4: Fix SimulatedBroker to reject orders with insufficient cash/shares I5: Move ORM attribute access inside DB session context in trades routes I6: Add Redis-based rate limiting (10 req/min/IP) on all auth endpoints I8: Prevent backtest background task garbage collection I9: Use Numeric(16,6) instead of Float for financial columns in migration I10: Add index on trades.created_at for time-range queries I11: Bind infrastructure ports to 127.0.0.1 in docker-compose I12: Add migrations init service; all app services depend on it I13: Fix user enumeration in login_begin (return options for non-existent users)
175 lines
5.5 KiB
Python
175 lines
5.5 KiB
Python
"""News fetcher service entry point.
|
|
|
|
Polls RSS feeds and Reddit on independent schedules, deduplicates
|
|
articles by content hash (via a Redis SET), and publishes new articles
|
|
to the ``news:raw`` Redis Stream.
|
|
"""
|
|
|
|
import asyncio
|
|
import logging
|
|
import signal
|
|
|
|
from redis.asyncio import Redis
|
|
|
|
from shared.redis_streams import StreamPublisher
|
|
from shared.telemetry import setup_telemetry
|
|
from services.news_fetcher.config import NewsFetcherConfig
|
|
from services.news_fetcher.sources.rss import RSSSource
|
|
from services.news_fetcher.sources.reddit import RedditSource
|
|
from shared.schemas.news import RawArticle
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
SEEN_HASHES_KEY = "news:seen_hashes"
|
|
NEWS_RAW_STREAM = "news:raw"
|
|
|
|
|
|
async def _deduplicate_and_publish(
|
|
articles: list[RawArticle],
|
|
redis: Redis,
|
|
publisher: StreamPublisher,
|
|
articles_fetched_counter,
|
|
fetch_errors_counter,
|
|
) -> int:
|
|
"""Add unseen articles to the ``news:raw`` stream.
|
|
|
|
Returns the number of newly published articles.
|
|
"""
|
|
published = 0
|
|
for article in articles:
|
|
# SADD returns 1 if the member was added (i.e. not already present)
|
|
added = await redis.sadd(SEEN_HASHES_KEY, article.content_hash)
|
|
if added:
|
|
await publisher.publish(article.model_dump(mode="json"))
|
|
published += 1
|
|
if published:
|
|
articles_fetched_counter.add(published)
|
|
return published
|
|
|
|
|
|
async def _poll_rss(
|
|
source: RSSSource,
|
|
interval: int,
|
|
redis: Redis,
|
|
publisher: StreamPublisher,
|
|
articles_fetched_counter,
|
|
fetch_errors_counter,
|
|
shutdown_event: asyncio.Event,
|
|
) -> None:
|
|
"""Continuously poll RSS feeds at *interval* seconds."""
|
|
while not shutdown_event.is_set():
|
|
try:
|
|
logger.info("Polling RSS feeds …")
|
|
articles = await source.fetch()
|
|
count = await _deduplicate_and_publish(
|
|
articles, redis, publisher, articles_fetched_counter, fetch_errors_counter
|
|
)
|
|
logger.info("RSS poll complete: %d new articles published", count)
|
|
except Exception:
|
|
logger.exception("RSS poll cycle failed")
|
|
fetch_errors_counter.add(1)
|
|
try:
|
|
await asyncio.wait_for(shutdown_event.wait(), timeout=interval)
|
|
return # Shutdown signaled
|
|
except asyncio.TimeoutError:
|
|
pass # Normal timeout — continue polling
|
|
|
|
|
|
async def _poll_reddit(
|
|
source: RedditSource,
|
|
interval: int,
|
|
redis: Redis,
|
|
publisher: StreamPublisher,
|
|
articles_fetched_counter,
|
|
fetch_errors_counter,
|
|
shutdown_event: asyncio.Event,
|
|
) -> None:
|
|
"""Continuously poll Reddit at *interval* seconds."""
|
|
while not shutdown_event.is_set():
|
|
try:
|
|
logger.info("Polling Reddit …")
|
|
articles = await source.fetch()
|
|
count = await _deduplicate_and_publish(
|
|
articles, redis, publisher, articles_fetched_counter, fetch_errors_counter
|
|
)
|
|
logger.info("Reddit poll complete: %d new articles published", count)
|
|
except Exception:
|
|
logger.exception("Reddit poll cycle failed")
|
|
fetch_errors_counter.add(1)
|
|
try:
|
|
await asyncio.wait_for(shutdown_event.wait(), timeout=interval)
|
|
return # Shutdown signaled
|
|
except asyncio.TimeoutError:
|
|
pass # Normal timeout — continue polling
|
|
|
|
|
|
async def run() -> None:
|
|
"""Boot the news fetcher and start polling."""
|
|
config = NewsFetcherConfig()
|
|
|
|
logging.basicConfig(level=config.log_level)
|
|
logger.info("Starting news fetcher service")
|
|
|
|
# Telemetry
|
|
meter = setup_telemetry("news-fetcher", config.otel_metrics_port)
|
|
articles_fetched_counter = meter.create_counter(
|
|
"news.articles_fetched",
|
|
description="Total articles fetched and published",
|
|
)
|
|
fetch_errors_counter = meter.create_counter(
|
|
"news.fetch_errors",
|
|
description="Total fetch-cycle errors",
|
|
)
|
|
|
|
# Redis
|
|
redis = Redis.from_url(config.redis_url, decode_responses=True)
|
|
publisher = StreamPublisher(redis, NEWS_RAW_STREAM)
|
|
|
|
# Sources
|
|
rss_source = RSSSource(feeds=config.rss_feeds)
|
|
reddit_source = RedditSource(
|
|
subreddits=config.reddit_subreddits,
|
|
client_id=config.reddit_client_id,
|
|
client_secret=config.reddit_client_secret,
|
|
user_agent=config.reddit_user_agent,
|
|
min_score=config.reddit_min_score,
|
|
)
|
|
|
|
# Graceful shutdown on SIGTERM/SIGINT
|
|
shutdown_event = asyncio.Event()
|
|
loop = asyncio.get_running_loop()
|
|
for sig in (signal.SIGTERM, signal.SIGINT):
|
|
loop.add_signal_handler(sig, shutdown_event.set)
|
|
|
|
# Run pollers concurrently
|
|
try:
|
|
async with asyncio.TaskGroup() as tg:
|
|
tg.create_task(
|
|
_poll_rss(
|
|
rss_source,
|
|
config.rss_poll_interval_seconds,
|
|
redis,
|
|
publisher,
|
|
articles_fetched_counter,
|
|
fetch_errors_counter,
|
|
shutdown_event,
|
|
)
|
|
)
|
|
tg.create_task(
|
|
_poll_reddit(
|
|
reddit_source,
|
|
config.reddit_poll_interval_seconds,
|
|
redis,
|
|
publisher,
|
|
articles_fetched_counter,
|
|
fetch_errors_counter,
|
|
shutdown_event,
|
|
)
|
|
)
|
|
finally:
|
|
await redis.aclose()
|
|
logger.info("News fetcher stopped gracefully")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(run())
|