diff --git a/shared/schemas/meet_kevin.py b/shared/schemas/meet_kevin.py new file mode 100644 index 0000000..543148a --- /dev/null +++ b/shared/schemas/meet_kevin.py @@ -0,0 +1,318 @@ +"""Meet Kevin pipeline Pydantic schemas. + +Includes LLM tool-input schemas (MeetKevinTickerMention, MeetKevinAnalysis) +and API response shapes (TranscriptSegment, VideoSummary, VideoDetail, StockSummary, +StockMention, StockTimeline, TimelineBucket, PipelineHealth). +""" + +from datetime import datetime +from enum import Enum +from typing import Literal + +from pydantic import BaseModel, Field, field_validator + + +# ============================================================================= +# Enums +# ============================================================================= + + +class TickerAction(str, Enum): + """Action recommendation for a stock ticker.""" + + BUY = "buy" + SELL = "sell" + HOLD = "hold" + WATCH = "watch" + AVOID = "avoid" + + +class TimeHorizon(str, Enum): + """Time horizon for an investment recommendation.""" + + INTRADAY = "intraday" + DAYS = "days" + WEEKS = "weeks" + MONTHS = "months" + LONG_TERM = "long_term" + UNSPECIFIED = "unspecified" + + +class MarketOutlook(str, Enum): + """Overall market sentiment direction.""" + + BULLISH = "bullish" + NEUTRAL = "neutral" + BEARISH = "bearish" + MIXED = "mixed" + + +class VideoStatus(str, Enum): + """Status of a video in the processing pipeline.""" + + DISCOVERED = "discovered" + CAPTIONED = "captioned" + ANALYZED = "analyzed" + FAILED = "failed" + SKIPPED = "skipped" + + +class TranscriptSource(str, Enum): + """Source of transcript captions.""" + + CAPTIONS_MANUAL = "captions_manual" + CAPTIONS_AUTO = "captions_auto" + NONE = "none" + + +# ============================================================================= +# LLM Tool-Input Schemas +# ============================================================================= + + +class MeetKevinTickerMention(BaseModel): + """Single stock ticker mention extracted by Claude from a video transcript. + + Used as tool-input for the LLM analyzer and persisted as kevin_stock_mentions. + """ + + symbol: str = Field( + ..., description="Stock ticker symbol (A-Z, 1-6 chars, auto-uppercased)" + ) + action: Literal["buy", "sell", "hold", "watch", "avoid"] = Field( + ..., description="Recommendation action" + ) + conviction: float = Field( + ..., ge=0.0, le=1.0, description="Confidence in recommendation (0.0-1.0)" + ) + time_horizon: Literal[ + "intraday", "days", "weeks", "months", "long_term", "unspecified" + ] = Field(..., description="Time horizon for the recommendation") + rationale_quote: str = Field( + ..., description="Short verbatim or paraphrased quote from video" + ) + video_timestamp_seconds: int | None = Field( + default=None, description="Timestamp for deep-link target" + ) + + @field_validator("symbol") + @classmethod + def uppercase_symbol(cls, v: str) -> str: + """Auto-uppercase the ticker symbol.""" + return v.upper() + + model_config = {"from_attributes": True} + + +class MeetKevinAnalysis(BaseModel): + """Complete analysis output from Claude for a single video transcript. + + Used as tool-input for the LLM analyzer and persisted as kevin_analyses. + """ + + market_outlook_direction: Literal["bullish", "neutral", "bearish", "mixed"] = ( + Field(..., description="Overall market sentiment direction") + ) + market_outlook_reasoning: str = Field( + ..., description="Explanation of market outlook" + ) + macro_themes: list[str] = Field( + default_factory=list, description="Macro economic themes discussed" + ) + key_risks: list[str] = Field( + default_factory=list, description="Key risks identified" + ) + summary: str = Field(..., description="~200-word summary of analysis") + tickers: list[MeetKevinTickerMention] = Field( + default_factory=list, description="List of ticker mentions" + ) + + model_config = {"from_attributes": True} + + +# ============================================================================= +# API Response Schemas +# ============================================================================= + + +class TranscriptSegment(BaseModel): + """Single segment from a video transcript with timing.""" + + start_seconds: float = Field(..., description="Segment start time in seconds") + end_seconds: float = Field(..., description="Segment end time in seconds") + text: str = Field(..., description="Segment text content") + + model_config = {"from_attributes": True} + + +class VideoSummary(BaseModel): + """Summary view of a video in the feed.""" + + id: int = Field(..., description="Database ID") + youtube_video_id: str = Field(..., description="YouTube video ID") + title: str = Field(..., description="Video title") + published_at: datetime = Field(..., description="Publication timestamp") + thumbnail_url: str = Field(..., description="Thumbnail image URL") + status: Literal["discovered", "captioned", "analyzed", "failed", "skipped"] = ( + Field(..., description="Processing status") + ) + failure_reason: str | None = Field( + default=None, description="Failure reason if status=failed" + ) + ticker_count: int = Field( + default=0, description="Number of ticker mentions analyzed" + ) + + model_config = {"from_attributes": True} + + +class VideoDetail(BaseModel): + """Full details of a single video including analysis.""" + + id: int = Field(..., description="Database ID") + youtube_video_id: str = Field(..., description="YouTube video ID") + title: str = Field(..., description="Video title") + description: str | None = Field(default=None, description="Video description") + published_at: datetime = Field(..., description="Publication timestamp") + duration_seconds: int | None = Field(default=None, description="Video duration") + thumbnail_url: str = Field(..., description="Thumbnail image URL") + status: Literal["discovered", "captioned", "analyzed", "failed", "skipped"] = ( + Field(..., description="Processing status") + ) + failure_reason: str | None = Field( + default=None, description="Failure reason if status=failed" + ) + transcript_source: Literal["captions_manual", "captions_auto", "none"] | None = ( + Field(default=None, description="Source of captions") + ) + transcript_segments: list[TranscriptSegment] = Field( + default_factory=list, description="Transcript segments with timing" + ) + transcript_raw: str | None = Field( + default=None, description="Full raw transcript text" + ) + analysis: MeetKevinAnalysis | None = Field( + default=None, description="LLM analysis if status=analyzed" + ) + + model_config = {"from_attributes": True} + + +class StockMention(BaseModel): + """Single mention of a stock ticker in a video.""" + + video_id: int = Field(..., description="Database ID of video") + youtube_video_id: str = Field(..., description="YouTube video ID for linking") + published_at: datetime = Field(..., description="Video publication date") + action: Literal["buy", "sell", "hold", "watch", "avoid"] = Field( + ..., description="Recommendation action" + ) + conviction: float = Field( + ..., ge=0.0, le=1.0, description="Confidence in recommendation" + ) + time_horizon: Literal[ + "intraday", "days", "weeks", "months", "long_term", "unspecified" + ] = Field(..., description="Time horizon for recommendation") + rationale_quote: str = Field( + ..., description="Quote or summary of rationale" + ) + video_timestamp_seconds: int | None = Field( + default=None, description="Deep-link timestamp" + ) + + model_config = {"from_attributes": True} + + +class StockSummary(BaseModel): + """Summary of a stock across all mentions.""" + + symbol: str = Field(..., description="Stock ticker") + mention_count: int = Field(..., description="Total mention count") + last_mentioned_at: datetime = Field( + ..., description="Timestamp of last mention" + ) + latest_action: Literal["buy", "sell", "hold", "watch", "avoid"] = Field( + ..., description="Most recent recommendation" + ) + avg_conviction: float = Field( + ..., ge=0.0, le=1.0, description="Average conviction across mentions" + ) + bullish_count: int = Field( + default=0, description="Buy + watch count" + ) + bearish_count: int = Field( + default=0, description="Sell + avoid count" + ) + neutral_count: int = Field( + default=0, description="Hold count" + ) + + model_config = {"from_attributes": True} + + +class TimelineBucket(BaseModel): + """Single time bucket in a sentiment timeline.""" + + bucket_date: str = Field(..., description="Date string (YYYY-MM-DD or YYYY-Www)") + action: Literal["buy", "sell", "hold", "watch", "avoid"] | None = Field( + default=None, description="Most common action in bucket" + ) + avg_conviction: float = Field( + default=0.0, ge=0.0, le=1.0, description="Average conviction" + ) + mention_count: int = Field( + default=0, description="Count of mentions in bucket" + ) + + model_config = {"from_attributes": True} + + +class StockTimeline(BaseModel): + """Timeline of mentions for a single stock ticker.""" + + symbol: str = Field(..., description="Stock ticker") + buckets: list[TimelineBucket] = Field( + default_factory=list, description="Time-bucketed data" + ) + mentions: list[StockMention] = Field( + default_factory=list, description="Chronological mentions (newest first)" + ) + + model_config = {"from_attributes": True} + + +class PipelineHealth(BaseModel): + """Health status of the Meet Kevin pipeline.""" + + last_poll_at: datetime | None = Field( + default=None, description="Timestamp of last RSS poll" + ) + last_poll_age_seconds: int | None = Field( + default=None, description="Seconds since last poll" + ) + videos_discovered_today: int = Field( + default=0, description="Videos found in last 24h" + ) + videos_captioned_today: int = Field( + default=0, description="Videos with captions processed" + ) + videos_analyzed_today: int = Field( + default=0, description="Videos analyzed with LLM" + ) + llm_cost_today_usd: float = Field( + default=0.0, description="Total LLM cost today" + ) + daily_cost_cap_usd: float = Field( + default=5.0, description="Daily cost limit" + ) + cost_capped: bool = Field( + default=False, description="True if cost cap hit today" + ) + pipeline_status: Literal["ok", "warning", "error"] = Field( + default="ok", description="Overall health status" + ) + status_message: str | None = Field( + default=None, description="Optional status details" + ) + + model_config = {"from_attributes": True} diff --git a/tests/test_schemas.py b/tests/test_schemas.py index d44d248..d00775a 100644 --- a/tests/test_schemas.py +++ b/tests/test_schemas.py @@ -584,3 +584,197 @@ class TestTokenResponse: ) restored = TokenResponse.model_validate_json(t.model_dump_json()) assert restored == t + + +# --------------------------------------------------------------------------- +# --- Meet Kevin schemas --- +# --------------------------------------------------------------------------- + + +class TestMeetKevinTickerMention: + def test_valid_ticker_mention(self) -> None: + from shared.schemas.meet_kevin import MeetKevinTickerMention + + mention = MeetKevinTickerMention( + symbol="AAPL", + action="buy", + conviction=0.85, + time_horizon="months", + rationale_quote="Strong earnings growth expected", + video_timestamp_seconds=120, + ) + assert mention.symbol == "AAPL" + assert mention.conviction == 0.85 + + def test_symbol_auto_uppercases(self) -> None: + from shared.schemas.meet_kevin import MeetKevinTickerMention + + mention = MeetKevinTickerMention( + symbol="tsla", + action="hold", + conviction=0.5, + time_horizon="weeks", + rationale_quote="Neutral outlook", + ) + assert mention.symbol == "TSLA" + + def test_conviction_out_of_range_high(self) -> None: + from shared.schemas.meet_kevin import MeetKevinTickerMention + + with pytest.raises(ValidationError): + MeetKevinTickerMention( + symbol="AAPL", + action="buy", + conviction=1.5, + time_horizon="months", + rationale_quote="Too confident", + ) + + def test_conviction_out_of_range_low(self) -> None: + from shared.schemas.meet_kevin import MeetKevinTickerMention + + with pytest.raises(ValidationError): + MeetKevinTickerMention( + symbol="AAPL", + action="sell", + conviction=-0.1, + time_horizon="days", + rationale_quote="Negative conviction", + ) + + def test_conviction_edge_cases(self) -> None: + from shared.schemas.meet_kevin import MeetKevinTickerMention + + # Test 0.0 + m1 = MeetKevinTickerMention( + symbol="GOOG", + action="avoid", + conviction=0.0, + time_horizon="unspecified", + rationale_quote="No confidence", + ) + assert m1.conviction == 0.0 + + # Test 1.0 + m2 = MeetKevinTickerMention( + symbol="MSFT", + action="buy", + conviction=1.0, + time_horizon="long_term", + rationale_quote="Maximum confidence", + ) + assert m2.conviction == 1.0 + + def test_timestamp_optional(self) -> None: + from shared.schemas.meet_kevin import MeetKevinTickerMention + + mention = MeetKevinTickerMention( + symbol="NVDA", + action="watch", + conviction=0.6, + time_horizon="intraday", + rationale_quote="Monitoring closely", + ) + assert mention.video_timestamp_seconds is None + + +class TestMeetKevinAnalysis: + def test_valid_analysis(self) -> None: + from shared.schemas.meet_kevin import ( + MeetKevinAnalysis, + MeetKevinTickerMention, + ) + + analysis = MeetKevinAnalysis( + market_outlook_direction="bullish", + market_outlook_reasoning="Strong macro tailwinds", + macro_themes=["inflation_easing", "ai_acceleration"], + key_risks=["geopolitical_uncertainty", "rate_volatility"], + summary="Overall positive outlook for tech sector", + tickers=[ + MeetKevinTickerMention( + symbol="AAPL", + action="buy", + conviction=0.85, + time_horizon="months", + rationale_quote="Strong earnings expected", + ) + ], + ) + assert analysis.market_outlook_direction == "bullish" + assert len(analysis.tickers) == 1 + assert analysis.tickers[0].symbol == "AAPL" + + def test_multiple_tickers(self) -> None: + from shared.schemas.meet_kevin import ( + MeetKevinAnalysis, + MeetKevinTickerMention, + ) + + analysis = MeetKevinAnalysis( + market_outlook_direction="neutral", + market_outlook_reasoning="Mixed signals", + macro_themes=["earnings_season"], + key_risks=["fed_decisions"], + summary="Cautious outlook", + tickers=[ + MeetKevinTickerMention( + symbol="TSLA", + action="buy", + conviction=0.7, + time_horizon="weeks", + rationale_quote="Breakout expected", + ), + MeetKevinTickerMention( + symbol="GOOG", + action="hold", + conviction=0.5, + time_horizon="months", + rationale_quote="Wait for clarity", + ), + ], + ) + assert len(analysis.tickers) == 2 + assert analysis.tickers[0].symbol == "TSLA" + assert analysis.tickers[1].symbol == "GOOG" + + def test_empty_tickers_list(self) -> None: + from shared.schemas.meet_kevin import MeetKevinAnalysis + + analysis = MeetKevinAnalysis( + market_outlook_direction="bearish", + market_outlook_reasoning="Recession risk", + macro_themes=["inflation"], + key_risks=["unemployment"], + summary="Negative outlook", + tickers=[], + ) + assert len(analysis.tickers) == 0 + + def test_json_round_trip(self) -> None: + from shared.schemas.meet_kevin import ( + MeetKevinAnalysis, + MeetKevinTickerMention, + ) + + analysis = MeetKevinAnalysis( + market_outlook_direction="mixed", + market_outlook_reasoning="Divergent sector performance", + macro_themes=["rate_peak", "ai_growth"], + key_risks=["credit_stress"], + summary="Selective opportunities", + tickers=[ + MeetKevinTickerMention( + symbol="NVIDIA", + action="buy", + conviction=0.95, + time_horizon="long_term", + rationale_quote="AI leader", + video_timestamp_seconds=300, + ) + ], + ) + json_str = analysis.model_dump_json() + restored = MeetKevinAnalysis.model_validate_json(json_str) + assert restored == analysis + assert restored.tickers[0].symbol == "NVIDIA"