feat: add Meet Kevin pydantic schemas (analysis + API shapes)

2026-05-21 19:06:04 +00:00 · 2026-05-21 19:06:04 +00:00 · 75534de71b
commit 75534de71b
parent 8f616e6487
2 changed files with 512 additions and 0 deletions
--- a/shared/schemas/meet_kevin.py
+++ b/shared/schemas/meet_kevin.py
@ -0,0 +1,318 @@
+"""Meet Kevin pipeline Pydantic schemas.
+
+Includes LLM tool-input schemas (MeetKevinTickerMention, MeetKevinAnalysis)
+and API response shapes (TranscriptSegment, VideoSummary, VideoDetail, StockSummary,
+StockMention, StockTimeline, TimelineBucket, PipelineHealth).
+"""
+
+from datetime import datetime
+from enum import Enum
+from typing import Literal
+
+from pydantic import BaseModel, Field, field_validator
+
+
+# =============================================================================
+# Enums
+# =============================================================================
+
+
+class TickerAction(str, Enum):
+    """Action recommendation for a stock ticker."""
+
+    BUY = "buy"
+    SELL = "sell"
+    HOLD = "hold"
+    WATCH = "watch"
+    AVOID = "avoid"
+
+
+class TimeHorizon(str, Enum):
+    """Time horizon for an investment recommendation."""
+
+    INTRADAY = "intraday"
+    DAYS = "days"
+    WEEKS = "weeks"
+    MONTHS = "months"
+    LONG_TERM = "long_term"
+    UNSPECIFIED = "unspecified"
+
+
+class MarketOutlook(str, Enum):
+    """Overall market sentiment direction."""
+
+    BULLISH = "bullish"
+    NEUTRAL = "neutral"
+    BEARISH = "bearish"
+    MIXED = "mixed"
+
+
+class VideoStatus(str, Enum):
+    """Status of a video in the processing pipeline."""
+
+    DISCOVERED = "discovered"
+    CAPTIONED = "captioned"
+    ANALYZED = "analyzed"
+    FAILED = "failed"
+    SKIPPED = "skipped"
+
+
+class TranscriptSource(str, Enum):
+    """Source of transcript captions."""
+
+    CAPTIONS_MANUAL = "captions_manual"
+    CAPTIONS_AUTO = "captions_auto"
+    NONE = "none"
+
+
+# =============================================================================
+# LLM Tool-Input Schemas
+# =============================================================================
+
+
+class MeetKevinTickerMention(BaseModel):
+    """Single stock ticker mention extracted by Claude from a video transcript.
+
+    Used as tool-input for the LLM analyzer and persisted as kevin_stock_mentions.
+    """
+
+    symbol: str = Field(
+        ..., description="Stock ticker symbol (A-Z, 1-6 chars, auto-uppercased)"
+    )
+    action: Literal["buy", "sell", "hold", "watch", "avoid"] = Field(
+        ..., description="Recommendation action"
+    )
+    conviction: float = Field(
+        ..., ge=0.0, le=1.0, description="Confidence in recommendation (0.0-1.0)"
+    )
+    time_horizon: Literal[
+        "intraday", "days", "weeks", "months", "long_term", "unspecified"
+    ] = Field(..., description="Time horizon for the recommendation")
+    rationale_quote: str = Field(
+        ..., description="Short verbatim or paraphrased quote from video"
+    )
+    video_timestamp_seconds: int | None = Field(
+        default=None, description="Timestamp for deep-link target"
+    )
+
+    @field_validator("symbol")
+    @classmethod
+    def uppercase_symbol(cls, v: str) -> str:
+        """Auto-uppercase the ticker symbol."""
+        return v.upper()
+
+    model_config = {"from_attributes": True}
+
+
+class MeetKevinAnalysis(BaseModel):
+    """Complete analysis output from Claude for a single video transcript.
+
+    Used as tool-input for the LLM analyzer and persisted as kevin_analyses.
+    """
+
+    market_outlook_direction: Literal["bullish", "neutral", "bearish", "mixed"] = (
+        Field(..., description="Overall market sentiment direction")
+    )
+    market_outlook_reasoning: str = Field(
+        ..., description="Explanation of market outlook"
+    )
+    macro_themes: list[str] = Field(
+        default_factory=list, description="Macro economic themes discussed"
+    )
+    key_risks: list[str] = Field(
+        default_factory=list, description="Key risks identified"
+    )
+    summary: str = Field(..., description="~200-word summary of analysis")
+    tickers: list[MeetKevinTickerMention] = Field(
+        default_factory=list, description="List of ticker mentions"
+    )
+
+    model_config = {"from_attributes": True}
+
+
+# =============================================================================
+# API Response Schemas
+# =============================================================================
+
+
+class TranscriptSegment(BaseModel):
+    """Single segment from a video transcript with timing."""
+
+    start_seconds: float = Field(..., description="Segment start time in seconds")
+    end_seconds: float = Field(..., description="Segment end time in seconds")
+    text: str = Field(..., description="Segment text content")
+
+    model_config = {"from_attributes": True}
+
+
+class VideoSummary(BaseModel):
+    """Summary view of a video in the feed."""
+
+    id: int = Field(..., description="Database ID")
+    youtube_video_id: str = Field(..., description="YouTube video ID")
+    title: str = Field(..., description="Video title")
+    published_at: datetime = Field(..., description="Publication timestamp")
+    thumbnail_url: str = Field(..., description="Thumbnail image URL")
+    status: Literal["discovered", "captioned", "analyzed", "failed", "skipped"] = (
+        Field(..., description="Processing status")
+    )
+    failure_reason: str | None = Field(
+        default=None, description="Failure reason if status=failed"
+    )
+    ticker_count: int = Field(
+        default=0, description="Number of ticker mentions analyzed"
+    )
+
+    model_config = {"from_attributes": True}
+
+
+class VideoDetail(BaseModel):
+    """Full details of a single video including analysis."""
+
+    id: int = Field(..., description="Database ID")
+    youtube_video_id: str = Field(..., description="YouTube video ID")
+    title: str = Field(..., description="Video title")
+    description: str | None = Field(default=None, description="Video description")
+    published_at: datetime = Field(..., description="Publication timestamp")
+    duration_seconds: int | None = Field(default=None, description="Video duration")
+    thumbnail_url: str = Field(..., description="Thumbnail image URL")
+    status: Literal["discovered", "captioned", "analyzed", "failed", "skipped"] = (
+        Field(..., description="Processing status")
+    )
+    failure_reason: str | None = Field(
+        default=None, description="Failure reason if status=failed"
+    )
+    transcript_source: Literal["captions_manual", "captions_auto", "none"] | None = (
+        Field(default=None, description="Source of captions")
+    )
+    transcript_segments: list[TranscriptSegment] = Field(
+        default_factory=list, description="Transcript segments with timing"
+    )
+    transcript_raw: str | None = Field(
+        default=None, description="Full raw transcript text"
+    )
+    analysis: MeetKevinAnalysis | None = Field(
+        default=None, description="LLM analysis if status=analyzed"
+    )
+
+    model_config = {"from_attributes": True}
+
+
+class StockMention(BaseModel):
+    """Single mention of a stock ticker in a video."""
+
+    video_id: int = Field(..., description="Database ID of video")
+    youtube_video_id: str = Field(..., description="YouTube video ID for linking")
+    published_at: datetime = Field(..., description="Video publication date")
+    action: Literal["buy", "sell", "hold", "watch", "avoid"] = Field(
+        ..., description="Recommendation action"
+    )
+    conviction: float = Field(
+        ..., ge=0.0, le=1.0, description="Confidence in recommendation"
+    )
+    time_horizon: Literal[
+        "intraday", "days", "weeks", "months", "long_term", "unspecified"
+    ] = Field(..., description="Time horizon for recommendation")
+    rationale_quote: str = Field(
+        ..., description="Quote or summary of rationale"
+    )
+    video_timestamp_seconds: int | None = Field(
+        default=None, description="Deep-link timestamp"
+    )
+
+    model_config = {"from_attributes": True}
+
+
+class StockSummary(BaseModel):
+    """Summary of a stock across all mentions."""
+
+    symbol: str = Field(..., description="Stock ticker")
+    mention_count: int = Field(..., description="Total mention count")
+    last_mentioned_at: datetime = Field(
+        ..., description="Timestamp of last mention"
+    )
+    latest_action: Literal["buy", "sell", "hold", "watch", "avoid"] = Field(
+        ..., description="Most recent recommendation"
+    )
+    avg_conviction: float = Field(
+        ..., ge=0.0, le=1.0, description="Average conviction across mentions"
+    )
+    bullish_count: int = Field(
+        default=0, description="Buy + watch count"
+    )
+    bearish_count: int = Field(
+        default=0, description="Sell + avoid count"
+    )
+    neutral_count: int = Field(
+        default=0, description="Hold count"
+    )
+
+    model_config = {"from_attributes": True}
+
+
+class TimelineBucket(BaseModel):
+    """Single time bucket in a sentiment timeline."""
+
+    bucket_date: str = Field(..., description="Date string (YYYY-MM-DD or YYYY-Www)")
+    action: Literal["buy", "sell", "hold", "watch", "avoid"] | None = Field(
+        default=None, description="Most common action in bucket"
+    )
+    avg_conviction: float = Field(
+        default=0.0, ge=0.0, le=1.0, description="Average conviction"
+    )
+    mention_count: int = Field(
+        default=0, description="Count of mentions in bucket"
+    )
+
+    model_config = {"from_attributes": True}
+
+
+class StockTimeline(BaseModel):
+    """Timeline of mentions for a single stock ticker."""
+
+    symbol: str = Field(..., description="Stock ticker")
+    buckets: list[TimelineBucket] = Field(
+        default_factory=list, description="Time-bucketed data"
+    )
+    mentions: list[StockMention] = Field(
+        default_factory=list, description="Chronological mentions (newest first)"
+    )
+
+    model_config = {"from_attributes": True}
+
+
+class PipelineHealth(BaseModel):
+    """Health status of the Meet Kevin pipeline."""
+
+    last_poll_at: datetime | None = Field(
+        default=None, description="Timestamp of last RSS poll"
+    )
+    last_poll_age_seconds: int | None = Field(
+        default=None, description="Seconds since last poll"
+    )
+    videos_discovered_today: int = Field(
+        default=0, description="Videos found in last 24h"
+    )
+    videos_captioned_today: int = Field(
+        default=0, description="Videos with captions processed"
+    )
+    videos_analyzed_today: int = Field(
+        default=0, description="Videos analyzed with LLM"
+    )
+    llm_cost_today_usd: float = Field(
+        default=0.0, description="Total LLM cost today"
+    )
+    daily_cost_cap_usd: float = Field(
+        default=5.0, description="Daily cost limit"
+    )
+    cost_capped: bool = Field(
+        default=False, description="True if cost cap hit today"
+    )
+    pipeline_status: Literal["ok", "warning", "error"] = Field(
+        default="ok", description="Overall health status"
+    )
+    status_message: str | None = Field(
+        default=None, description="Optional status details"
+    )
+
+    model_config = {"from_attributes": True}