feat: add Meet Kevin pydantic schemas (analysis + API shapes)

2026-05-21 19:06:04 +00:00 · 2026-05-21 19:06:04 +00:00 · 75534de71b
commit 75534de71b
parent 8f616e6487
2 changed files with 512 additions and 0 deletions
--- a/shared/schemas/meet_kevin.py
+++ b/shared/schemas/meet_kevin.py
@ -0,0 +1,318 @@
+"""Meet Kevin pipeline Pydantic schemas.
+
+Includes LLM tool-input schemas (MeetKevinTickerMention, MeetKevinAnalysis)
+and API response shapes (TranscriptSegment, VideoSummary, VideoDetail, StockSummary,
+StockMention, StockTimeline, TimelineBucket, PipelineHealth).
+"""
+
+from datetime import datetime
+from enum import Enum
+from typing import Literal
+
+from pydantic import BaseModel, Field, field_validator
+
+
+# =============================================================================
+# Enums
+# =============================================================================
+
+
+class TickerAction(str, Enum):
+    """Action recommendation for a stock ticker."""
+
+    BUY = "buy"
+    SELL = "sell"
+    HOLD = "hold"
+    WATCH = "watch"
+    AVOID = "avoid"
+
+
+class TimeHorizon(str, Enum):
+    """Time horizon for an investment recommendation."""
+
+    INTRADAY = "intraday"
+    DAYS = "days"
+    WEEKS = "weeks"
+    MONTHS = "months"
+    LONG_TERM = "long_term"
+    UNSPECIFIED = "unspecified"
+
+
+class MarketOutlook(str, Enum):
+    """Overall market sentiment direction."""
+
+    BULLISH = "bullish"
+    NEUTRAL = "neutral"
+    BEARISH = "bearish"
+    MIXED = "mixed"
+
+
+class VideoStatus(str, Enum):
+    """Status of a video in the processing pipeline."""
+
+    DISCOVERED = "discovered"
+    CAPTIONED = "captioned"
+    ANALYZED = "analyzed"
+    FAILED = "failed"
+    SKIPPED = "skipped"
+
+
+class TranscriptSource(str, Enum):
+    """Source of transcript captions."""
+
+    CAPTIONS_MANUAL = "captions_manual"
+    CAPTIONS_AUTO = "captions_auto"
+    NONE = "none"
+
+
+# =============================================================================
+# LLM Tool-Input Schemas
+# =============================================================================
+
+
+class MeetKevinTickerMention(BaseModel):
+    """Single stock ticker mention extracted by Claude from a video transcript.
+
+    Used as tool-input for the LLM analyzer and persisted as kevin_stock_mentions.
+    """
+
+    symbol: str = Field(
+        ..., description="Stock ticker symbol (A-Z, 1-6 chars, auto-uppercased)"
+    )
+    action: Literal["buy", "sell", "hold", "watch", "avoid"] = Field(
+        ..., description="Recommendation action"
+    )
+    conviction: float = Field(
+        ..., ge=0.0, le=1.0, description="Confidence in recommendation (0.0-1.0)"
+    )
+    time_horizon: Literal[
+        "intraday", "days", "weeks", "months", "long_term", "unspecified"
+    ] = Field(..., description="Time horizon for the recommendation")
+    rationale_quote: str = Field(
+        ..., description="Short verbatim or paraphrased quote from video"
+    )
+    video_timestamp_seconds: int | None = Field(
+        default=None, description="Timestamp for deep-link target"
+    )
+
+    @field_validator("symbol")
+    @classmethod
+    def uppercase_symbol(cls, v: str) -> str:
+        """Auto-uppercase the ticker symbol."""
+        return v.upper()
+
+    model_config = {"from_attributes": True}
+
+
+class MeetKevinAnalysis(BaseModel):
+    """Complete analysis output from Claude for a single video transcript.
+
+    Used as tool-input for the LLM analyzer and persisted as kevin_analyses.
+    """
+
+    market_outlook_direction: Literal["bullish", "neutral", "bearish", "mixed"] = (
+        Field(..., description="Overall market sentiment direction")
+    )
+    market_outlook_reasoning: str = Field(
+        ..., description="Explanation of market outlook"
+    )
+    macro_themes: list[str] = Field(
+        default_factory=list, description="Macro economic themes discussed"
+    )
+    key_risks: list[str] = Field(
+        default_factory=list, description="Key risks identified"
+    )
+    summary: str = Field(..., description="~200-word summary of analysis")
+    tickers: list[MeetKevinTickerMention] = Field(
+        default_factory=list, description="List of ticker mentions"
+    )
+
+    model_config = {"from_attributes": True}
+
+
+# =============================================================================
+# API Response Schemas
+# =============================================================================
+
+
+class TranscriptSegment(BaseModel):
+    """Single segment from a video transcript with timing."""
+
+    start_seconds: float = Field(..., description="Segment start time in seconds")
+    end_seconds: float = Field(..., description="Segment end time in seconds")
+    text: str = Field(..., description="Segment text content")
+
+    model_config = {"from_attributes": True}
+
+
+class VideoSummary(BaseModel):
+    """Summary view of a video in the feed."""
+
+    id: int = Field(..., description="Database ID")
+    youtube_video_id: str = Field(..., description="YouTube video ID")
+    title: str = Field(..., description="Video title")
+    published_at: datetime = Field(..., description="Publication timestamp")
+    thumbnail_url: str = Field(..., description="Thumbnail image URL")
+    status: Literal["discovered", "captioned", "analyzed", "failed", "skipped"] = (
+        Field(..., description="Processing status")
+    )
+    failure_reason: str | None = Field(
+        default=None, description="Failure reason if status=failed"
+    )
+    ticker_count: int = Field(
+        default=0, description="Number of ticker mentions analyzed"
+    )
+
+    model_config = {"from_attributes": True}
+
+
+class VideoDetail(BaseModel):
+    """Full details of a single video including analysis."""
+
+    id: int = Field(..., description="Database ID")
+    youtube_video_id: str = Field(..., description="YouTube video ID")
+    title: str = Field(..., description="Video title")
+    description: str | None = Field(default=None, description="Video description")
+    published_at: datetime = Field(..., description="Publication timestamp")
+    duration_seconds: int | None = Field(default=None, description="Video duration")
+    thumbnail_url: str = Field(..., description="Thumbnail image URL")
+    status: Literal["discovered", "captioned", "analyzed", "failed", "skipped"] = (
+        Field(..., description="Processing status")
+    )
+    failure_reason: str | None = Field(
+        default=None, description="Failure reason if status=failed"
+    )
+    transcript_source: Literal["captions_manual", "captions_auto", "none"] | None = (
+        Field(default=None, description="Source of captions")
+    )
+    transcript_segments: list[TranscriptSegment] = Field(
+        default_factory=list, description="Transcript segments with timing"
+    )
+    transcript_raw: str | None = Field(
+        default=None, description="Full raw transcript text"
+    )
+    analysis: MeetKevinAnalysis | None = Field(
+        default=None, description="LLM analysis if status=analyzed"
+    )
+
+    model_config = {"from_attributes": True}
+
+
+class StockMention(BaseModel):
+    """Single mention of a stock ticker in a video."""
+
+    video_id: int = Field(..., description="Database ID of video")
+    youtube_video_id: str = Field(..., description="YouTube video ID for linking")
+    published_at: datetime = Field(..., description="Video publication date")
+    action: Literal["buy", "sell", "hold", "watch", "avoid"] = Field(
+        ..., description="Recommendation action"
+    )
+    conviction: float = Field(
+        ..., ge=0.0, le=1.0, description="Confidence in recommendation"
+    )
+    time_horizon: Literal[
+        "intraday", "days", "weeks", "months", "long_term", "unspecified"
+    ] = Field(..., description="Time horizon for recommendation")
+    rationale_quote: str = Field(
+        ..., description="Quote or summary of rationale"
+    )
+    video_timestamp_seconds: int | None = Field(
+        default=None, description="Deep-link timestamp"
+    )
+
+    model_config = {"from_attributes": True}
+
+
+class StockSummary(BaseModel):
+    """Summary of a stock across all mentions."""
+
+    symbol: str = Field(..., description="Stock ticker")
+    mention_count: int = Field(..., description="Total mention count")
+    last_mentioned_at: datetime = Field(
+        ..., description="Timestamp of last mention"
+    )
+    latest_action: Literal["buy", "sell", "hold", "watch", "avoid"] = Field(
+        ..., description="Most recent recommendation"
+    )
+    avg_conviction: float = Field(
+        ..., ge=0.0, le=1.0, description="Average conviction across mentions"
+    )
+    bullish_count: int = Field(
+        default=0, description="Buy + watch count"
+    )
+    bearish_count: int = Field(
+        default=0, description="Sell + avoid count"
+    )
+    neutral_count: int = Field(
+        default=0, description="Hold count"
+    )
+
+    model_config = {"from_attributes": True}
+
+
+class TimelineBucket(BaseModel):
+    """Single time bucket in a sentiment timeline."""
+
+    bucket_date: str = Field(..., description="Date string (YYYY-MM-DD or YYYY-Www)")
+    action: Literal["buy", "sell", "hold", "watch", "avoid"] | None = Field(
+        default=None, description="Most common action in bucket"
+    )
+    avg_conviction: float = Field(
+        default=0.0, ge=0.0, le=1.0, description="Average conviction"
+    )
+    mention_count: int = Field(
+        default=0, description="Count of mentions in bucket"
+    )
+
+    model_config = {"from_attributes": True}
+
+
+class StockTimeline(BaseModel):
+    """Timeline of mentions for a single stock ticker."""
+
+    symbol: str = Field(..., description="Stock ticker")
+    buckets: list[TimelineBucket] = Field(
+        default_factory=list, description="Time-bucketed data"
+    )
+    mentions: list[StockMention] = Field(
+        default_factory=list, description="Chronological mentions (newest first)"
+    )
+
+    model_config = {"from_attributes": True}
+
+
+class PipelineHealth(BaseModel):
+    """Health status of the Meet Kevin pipeline."""
+
+    last_poll_at: datetime | None = Field(
+        default=None, description="Timestamp of last RSS poll"
+    )
+    last_poll_age_seconds: int | None = Field(
+        default=None, description="Seconds since last poll"
+    )
+    videos_discovered_today: int = Field(
+        default=0, description="Videos found in last 24h"
+    )
+    videos_captioned_today: int = Field(
+        default=0, description="Videos with captions processed"
+    )
+    videos_analyzed_today: int = Field(
+        default=0, description="Videos analyzed with LLM"
+    )
+    llm_cost_today_usd: float = Field(
+        default=0.0, description="Total LLM cost today"
+    )
+    daily_cost_cap_usd: float = Field(
+        default=5.0, description="Daily cost limit"
+    )
+    cost_capped: bool = Field(
+        default=False, description="True if cost cap hit today"
+    )
+    pipeline_status: Literal["ok", "warning", "error"] = Field(
+        default="ok", description="Overall health status"
+    )
+    status_message: str | None = Field(
+        default=None, description="Optional status details"
+    )
+
+    model_config = {"from_attributes": True}
--- a/tests/test_schemas.py
+++ b/tests/test_schemas.py
@ -584,3 +584,197 @@ class TestTokenResponse:
        )
        restored = TokenResponse.model_validate_json(t.model_dump_json())
        assert restored == t
+
+
+# ---------------------------------------------------------------------------
+# --- Meet Kevin schemas ---
+# ---------------------------------------------------------------------------
+
+
+class TestMeetKevinTickerMention:
+    def test_valid_ticker_mention(self) -> None:
+        from shared.schemas.meet_kevin import MeetKevinTickerMention
+
+        mention = MeetKevinTickerMention(
+            symbol="AAPL",
+            action="buy",
+            conviction=0.85,
+            time_horizon="months",
+            rationale_quote="Strong earnings growth expected",
+            video_timestamp_seconds=120,
+        )
+        assert mention.symbol == "AAPL"
+        assert mention.conviction == 0.85
+
+    def test_symbol_auto_uppercases(self) -> None:
+        from shared.schemas.meet_kevin import MeetKevinTickerMention
+
+        mention = MeetKevinTickerMention(
+            symbol="tsla",
+            action="hold",
+            conviction=0.5,
+            time_horizon="weeks",
+            rationale_quote="Neutral outlook",
+        )
+        assert mention.symbol == "TSLA"
+
+    def test_conviction_out_of_range_high(self) -> None:
+        from shared.schemas.meet_kevin import MeetKevinTickerMention
+
+        with pytest.raises(ValidationError):
+            MeetKevinTickerMention(
+                symbol="AAPL",
+                action="buy",
+                conviction=1.5,
+                time_horizon="months",
+                rationale_quote="Too confident",
+            )
+
+    def test_conviction_out_of_range_low(self) -> None:
+        from shared.schemas.meet_kevin import MeetKevinTickerMention
+
+        with pytest.raises(ValidationError):
+            MeetKevinTickerMention(
+                symbol="AAPL",
+                action="sell",
+                conviction=-0.1,
+                time_horizon="days",
+                rationale_quote="Negative conviction",
+            )
+
+    def test_conviction_edge_cases(self) -> None:
+        from shared.schemas.meet_kevin import MeetKevinTickerMention
+
+        # Test 0.0
+        m1 = MeetKevinTickerMention(
+            symbol="GOOG",
+            action="avoid",
+            conviction=0.0,
+            time_horizon="unspecified",
+            rationale_quote="No confidence",
+        )
+        assert m1.conviction == 0.0
+
+        # Test 1.0
+        m2 = MeetKevinTickerMention(
+            symbol="MSFT",
+            action="buy",
+            conviction=1.0,
+            time_horizon="long_term",
+            rationale_quote="Maximum confidence",
+        )
+        assert m2.conviction == 1.0
+
+    def test_timestamp_optional(self) -> None:
+        from shared.schemas.meet_kevin import MeetKevinTickerMention
+
+        mention = MeetKevinTickerMention(
+            symbol="NVDA",
+            action="watch",
+            conviction=0.6,
+            time_horizon="intraday",
+            rationale_quote="Monitoring closely",
+        )
+        assert mention.video_timestamp_seconds is None
+
+
+class TestMeetKevinAnalysis:
+    def test_valid_analysis(self) -> None:
+        from shared.schemas.meet_kevin import (
+            MeetKevinAnalysis,
+            MeetKevinTickerMention,
+        )
+
+        analysis = MeetKevinAnalysis(
+            market_outlook_direction="bullish",
+            market_outlook_reasoning="Strong macro tailwinds",
+            macro_themes=["inflation_easing", "ai_acceleration"],
+            key_risks=["geopolitical_uncertainty", "rate_volatility"],
+            summary="Overall positive outlook for tech sector",
+            tickers=[
+                MeetKevinTickerMention(
+                    symbol="AAPL",
+                    action="buy",
+                    conviction=0.85,
+                    time_horizon="months",
+                    rationale_quote="Strong earnings expected",
+                )
+            ],
+        )
+        assert analysis.market_outlook_direction == "bullish"
+        assert len(analysis.tickers) == 1
+        assert analysis.tickers[0].symbol == "AAPL"
+
+    def test_multiple_tickers(self) -> None:
+        from shared.schemas.meet_kevin import (
+            MeetKevinAnalysis,
+            MeetKevinTickerMention,
+        )
+
+        analysis = MeetKevinAnalysis(
+            market_outlook_direction="neutral",
+            market_outlook_reasoning="Mixed signals",
+            macro_themes=["earnings_season"],
+            key_risks=["fed_decisions"],
+            summary="Cautious outlook",
+            tickers=[
+                MeetKevinTickerMention(
+                    symbol="TSLA",
+                    action="buy",
+                    conviction=0.7,
+                    time_horizon="weeks",
+                    rationale_quote="Breakout expected",
+                ),
+                MeetKevinTickerMention(
+                    symbol="GOOG",
+                    action="hold",
+                    conviction=0.5,
+                    time_horizon="months",
+                    rationale_quote="Wait for clarity",
+                ),
+            ],
+        )
+        assert len(analysis.tickers) == 2
+        assert analysis.tickers[0].symbol == "TSLA"
+        assert analysis.tickers[1].symbol == "GOOG"
+
+    def test_empty_tickers_list(self) -> None:
+        from shared.schemas.meet_kevin import MeetKevinAnalysis
+
+        analysis = MeetKevinAnalysis(
+            market_outlook_direction="bearish",
+            market_outlook_reasoning="Recession risk",
+            macro_themes=["inflation"],
+            key_risks=["unemployment"],
+            summary="Negative outlook",
+            tickers=[],
+        )
+        assert len(analysis.tickers) == 0
+
+    def test_json_round_trip(self) -> None:
+        from shared.schemas.meet_kevin import (
+            MeetKevinAnalysis,
+            MeetKevinTickerMention,
+        )
+
+        analysis = MeetKevinAnalysis(
+            market_outlook_direction="mixed",
+            market_outlook_reasoning="Divergent sector performance",
+            macro_themes=["rate_peak", "ai_growth"],
+            key_risks=["credit_stress"],
+            summary="Selective opportunities",
+            tickers=[
+                MeetKevinTickerMention(
+                    symbol="NVIDIA",
+                    action="buy",
+                    conviction=0.95,
+                    time_horizon="long_term",
+                    rationale_quote="AI leader",
+                    video_timestamp_seconds=300,
+                )
+            ],
+        )
+        json_str = analysis.model_dump_json()
+        restored = MeetKevinAnalysis.model_validate_json(json_str)
+        assert restored == analysis
+        assert restored.tickers[0].symbol == "NVIDIA"