trading/services/meet_kevin_watcher/pipeline.py
Viktor Barzin ed2195d879 feat(meet-kevin): throttle inter-video LLM calls (30s) to stay under Anthropic RPM
First production run hit Anthropic's per-account rate_limit_error (429) trying
to burn through 16 backfill videos in seconds. The SDK's built-in retry can't
recover because the rate limit window resets slower than the 3 retry attempts.

Added meet_kevin_inter_video_sleep_seconds (default 30s) to PipelineDeps and
main's _process_pending_videos loop. 16 backfill videos now take ~8 min (16 * 30s
sleeps + ~30s per LLM call) instead of bursting into the rate limit.
2026-05-22 20:25:19 +00:00

266 lines
10 KiB
Python

"""Meet Kevin pipeline orchestrator.
Contains the per-video state-machine (process_one_video) and the daily
cost accounting helper (daily_cost_used). Both are designed for
dependency injection so they are fully unit-testable without a real DB
or LLM backend.
Public exports:
PipelineDeps — frozen dataclass carrying all injected callables + config
process_one_video — advance one KevinVideo by one pipeline stage
daily_cost_used — sum today's LLM spend from kevin_analyses
"""
import logging
from dataclasses import dataclass
from datetime import datetime, timezone
from decimal import Decimal
from typing import Any, Callable, Coroutine
from sqlalchemy import func, select
from sqlalchemy.ext.asyncio import AsyncSession
from shared.models.meet_kevin import (
KevinAnalysis,
KevinStockMention,
KevinTranscript,
KevinVideo,
)
from services.meet_kevin_watcher.caption_extractor import CaptionResult
from services.meet_kevin_watcher.llm_analyzer import LlmCallResult
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Dependency-injection container
# ---------------------------------------------------------------------------
@dataclass(frozen=True)
class PipelineDeps:
"""Injected dependencies for the pipeline, making it unit-testable.
All async callables match the signatures of the real implementations
but can be replaced with AsyncMock in tests.
"""
extract_captions: Callable[..., Coroutine[Any, Any, CaptionResult | None]]
"""Async callable: (video_id: str, workdir: str) -> CaptionResult | None"""
analyze: Callable[..., Coroutine[Any, Any, LlmCallResult]]
"""Async callable: (**kwargs) -> LlmCallResult"""
daily_cost_used: Callable[..., Coroutine[Any, Any, Decimal]]
"""Async callable: (session: AsyncSession) -> Decimal"""
model: str
"""LLM model identifier stored in kevin_analyses.model."""
prompt_version: str
"""Prompt version string stored in kevin_analyses.prompt_version."""
daily_cost_cap_usd: Decimal
"""Hard ceiling for total LLM spend per calendar day (UTC)."""
workdir: str
"""Filesystem directory for yt-dlp caption downloads."""
inter_video_sleep_seconds: int = 30
"""Sleep between consecutive videos to stay under the LLM provider RPM limit."""
# ---------------------------------------------------------------------------
# Daily cost accounting
# ---------------------------------------------------------------------------
async def daily_cost_used(session: AsyncSession) -> Decimal:
"""Return total LLM cost incurred today (UTC) from kevin_analyses.
Uses a single SUM query truncated to the start of the current UTC day.
Args:
session: Async SQLAlchemy session.
Returns:
Sum of cost_usd for all analyses created since midnight UTC today,
as a Decimal. Returns Decimal("0") when no rows match.
"""
stmt = select(
func.coalesce(func.sum(KevinAnalysis.cost_usd), 0)
).where(
KevinAnalysis.created_at >= func.date_trunc("day", func.now())
)
result = await session.execute(stmt)
scalar = result.scalar_one()
return Decimal(str(scalar or 0))
# ---------------------------------------------------------------------------
# Per-video pipeline stage runner
# ---------------------------------------------------------------------------
async def process_one_video(
video: KevinVideo,
session: AsyncSession,
deps: PipelineDeps,
) -> str:
"""Advance *video* by one pipeline stage and return the new status string.
Stage transitions:
discovered → extract captions
• None result → status='failed', failure_reason='no_captions'
• CaptionResult → insert KevinTranscript, advance to 'captioned'
captioned → check daily cost cap
• over cap → leave as 'captioned' (retry tomorrow)
• under cap → call analyze()
- success → insert KevinAnalysis + KevinStockMention rows,
advance to 'analyzed', set processed_at
- exception → increment retry_count;
if retry_count >= 3: status='failed'
otherwise leave as 'captioned'
Args:
video: ORM instance (mutated in-place; caller is responsible for commit).
session: Async SQLAlchemy session (add/flush, NOT commit — caller commits).
deps: Injected callables and config.
Returns:
The new status string (e.g. "analyzed", "captioned", "failed").
"""
current_status: str = str(video.status.value) if hasattr(video.status, "value") else str(video.status)
# ------------------------------------------------------------------
# Stage 1: discovered → extract captions
# ------------------------------------------------------------------
if current_status == "discovered":
caption_result: CaptionResult | None = await deps.extract_captions(
video.youtube_video_id, deps.workdir
)
if caption_result is None:
logger.warning("No captions for video %s — marking failed", video.youtube_video_id)
video.status = "failed"
video.failure_reason = "no_captions"
return "failed"
# Determine transcript source from CaptionResult.source field
source_str = caption_result.source # e.g. "youtube", "captions_auto", "captions_manual"
if "manual" in source_str:
transcript_source = "captions_manual"
elif "auto" in source_str or source_str == "youtube":
transcript_source = "captions_auto"
else:
transcript_source = "captions_auto"
transcript = KevinTranscript(
video_id=video.id,
source=transcript_source,
language=caption_result.language,
raw_text=caption_result.raw_text,
segments_json=list(caption_result.segments),
word_count=caption_result.word_count,
)
session.add(transcript)
await session.flush()
video.status = "captioned"
logger.info("Captions extracted for video %s (%d words)", video.youtube_video_id, caption_result.word_count)
current_status = "captioned"
# ------------------------------------------------------------------
# Stage 2: captioned → LLM analysis
# ------------------------------------------------------------------
if current_status == "captioned":
# Check daily cost cap before calling the LLM
cost_so_far: Decimal = await deps.daily_cost_used(session)
if cost_so_far >= deps.daily_cost_cap_usd:
logger.info(
"Daily cost cap $%.4f reached ($%.4f used) — skipping LLM for %s",
deps.daily_cost_cap_usd, cost_so_far, video.youtube_video_id,
)
return "captioned"
# Fetch the transcript for this video to pass to the LLM
from sqlalchemy import select as _select
stmt = _select(KevinTranscript).where(KevinTranscript.video_id == video.id)
result = await session.execute(stmt)
transcript = result.scalar_one()
segments: list[dict] = transcript.segments_json or []
try:
llm_result: LlmCallResult = await deps.analyze(
title=getattr(video, "title", ""),
description=getattr(video, "description", "") or "",
published_at=getattr(video, "published_at", None) or datetime.now(tz=timezone.utc),
transcript_text=transcript.raw_text,
transcript_segments=segments,
)
except Exception as exc:
video.retry_count = (video.retry_count or 0) + 1
if video.retry_count >= 3:
video.status = "failed"
video.failure_reason = f"llm_error: {type(exc).__name__}"
logger.error(
"Video %s failed after %d retries: %s",
video.youtube_video_id, video.retry_count, exc,
)
return "failed"
else:
logger.warning(
"LLM error for video %s (retry %d/3): %s",
video.youtube_video_id, video.retry_count, exc,
)
return "captioned"
analysis = llm_result.analysis
# Persist KevinAnalysis row
db_analysis = KevinAnalysis(
video_id=video.id,
model=deps.model,
prompt_version=deps.prompt_version,
market_outlook_direction=analysis.market_outlook_direction.value,
market_outlook_reasoning=analysis.market_outlook_reasoning,
macro_themes_json=analysis.macro_themes,
key_risks_json=analysis.key_risks,
summary=analysis.summary,
raw_response_json=llm_result.raw_response,
prompt_tokens=llm_result.prompt_tokens,
completion_tokens=llm_result.completion_tokens,
cost_usd=llm_result.cost_usd,
)
session.add(db_analysis)
await session.flush() # get db_analysis.id
# Persist KevinStockMention rows
for ticker in analysis.tickers:
mention = KevinStockMention(
video_id=video.id,
analysis_id=db_analysis.id,
symbol=ticker.symbol,
action=ticker.action.value,
conviction=Decimal(str(ticker.conviction)),
time_horizon=ticker.time_horizon.value,
rationale_quote=ticker.rationale_quote,
video_timestamp_seconds=ticker.video_timestamp_seconds,
)
session.add(mention)
video.status = "analyzed"
video.processed_at = datetime.now(tz=timezone.utc)
logger.info(
"Analysis complete for video %s: %s, %d tickers, cost=$%.4f",
video.youtube_video_id,
analysis.market_outlook_direction.value,
len(analysis.tickers),
llm_result.cost_usd,
)
return "analyzed"
# Unknown status — log and return unchanged
logger.warning("process_one_video: unexpected status %r for video %s", current_status, video.youtube_video_id)
return current_status