feat: add Meet Kevin pydantic schemas (analysis + API shapes)

This commit is contained in:
Viktor Barzin 2026-05-21 19:06:04 +00:00
parent 8f616e6487
commit 75534de71b
2 changed files with 512 additions and 0 deletions

View file

@ -0,0 +1,318 @@
"""Meet Kevin pipeline Pydantic schemas.
Includes LLM tool-input schemas (MeetKevinTickerMention, MeetKevinAnalysis)
and API response shapes (TranscriptSegment, VideoSummary, VideoDetail, StockSummary,
StockMention, StockTimeline, TimelineBucket, PipelineHealth).
"""
from datetime import datetime
from enum import Enum
from typing import Literal
from pydantic import BaseModel, Field, field_validator
# =============================================================================
# Enums
# =============================================================================
class TickerAction(str, Enum):
"""Action recommendation for a stock ticker."""
BUY = "buy"
SELL = "sell"
HOLD = "hold"
WATCH = "watch"
AVOID = "avoid"
class TimeHorizon(str, Enum):
"""Time horizon for an investment recommendation."""
INTRADAY = "intraday"
DAYS = "days"
WEEKS = "weeks"
MONTHS = "months"
LONG_TERM = "long_term"
UNSPECIFIED = "unspecified"
class MarketOutlook(str, Enum):
"""Overall market sentiment direction."""
BULLISH = "bullish"
NEUTRAL = "neutral"
BEARISH = "bearish"
MIXED = "mixed"
class VideoStatus(str, Enum):
"""Status of a video in the processing pipeline."""
DISCOVERED = "discovered"
CAPTIONED = "captioned"
ANALYZED = "analyzed"
FAILED = "failed"
SKIPPED = "skipped"
class TranscriptSource(str, Enum):
"""Source of transcript captions."""
CAPTIONS_MANUAL = "captions_manual"
CAPTIONS_AUTO = "captions_auto"
NONE = "none"
# =============================================================================
# LLM Tool-Input Schemas
# =============================================================================
class MeetKevinTickerMention(BaseModel):
"""Single stock ticker mention extracted by Claude from a video transcript.
Used as tool-input for the LLM analyzer and persisted as kevin_stock_mentions.
"""
symbol: str = Field(
..., description="Stock ticker symbol (A-Z, 1-6 chars, auto-uppercased)"
)
action: Literal["buy", "sell", "hold", "watch", "avoid"] = Field(
..., description="Recommendation action"
)
conviction: float = Field(
..., ge=0.0, le=1.0, description="Confidence in recommendation (0.0-1.0)"
)
time_horizon: Literal[
"intraday", "days", "weeks", "months", "long_term", "unspecified"
] = Field(..., description="Time horizon for the recommendation")
rationale_quote: str = Field(
..., description="Short verbatim or paraphrased quote from video"
)
video_timestamp_seconds: int | None = Field(
default=None, description="Timestamp for deep-link target"
)
@field_validator("symbol")
@classmethod
def uppercase_symbol(cls, v: str) -> str:
"""Auto-uppercase the ticker symbol."""
return v.upper()
model_config = {"from_attributes": True}
class MeetKevinAnalysis(BaseModel):
"""Complete analysis output from Claude for a single video transcript.
Used as tool-input for the LLM analyzer and persisted as kevin_analyses.
"""
market_outlook_direction: Literal["bullish", "neutral", "bearish", "mixed"] = (
Field(..., description="Overall market sentiment direction")
)
market_outlook_reasoning: str = Field(
..., description="Explanation of market outlook"
)
macro_themes: list[str] = Field(
default_factory=list, description="Macro economic themes discussed"
)
key_risks: list[str] = Field(
default_factory=list, description="Key risks identified"
)
summary: str = Field(..., description="~200-word summary of analysis")
tickers: list[MeetKevinTickerMention] = Field(
default_factory=list, description="List of ticker mentions"
)
model_config = {"from_attributes": True}
# =============================================================================
# API Response Schemas
# =============================================================================
class TranscriptSegment(BaseModel):
"""Single segment from a video transcript with timing."""
start_seconds: float = Field(..., description="Segment start time in seconds")
end_seconds: float = Field(..., description="Segment end time in seconds")
text: str = Field(..., description="Segment text content")
model_config = {"from_attributes": True}
class VideoSummary(BaseModel):
"""Summary view of a video in the feed."""
id: int = Field(..., description="Database ID")
youtube_video_id: str = Field(..., description="YouTube video ID")
title: str = Field(..., description="Video title")
published_at: datetime = Field(..., description="Publication timestamp")
thumbnail_url: str = Field(..., description="Thumbnail image URL")
status: Literal["discovered", "captioned", "analyzed", "failed", "skipped"] = (
Field(..., description="Processing status")
)
failure_reason: str | None = Field(
default=None, description="Failure reason if status=failed"
)
ticker_count: int = Field(
default=0, description="Number of ticker mentions analyzed"
)
model_config = {"from_attributes": True}
class VideoDetail(BaseModel):
"""Full details of a single video including analysis."""
id: int = Field(..., description="Database ID")
youtube_video_id: str = Field(..., description="YouTube video ID")
title: str = Field(..., description="Video title")
description: str | None = Field(default=None, description="Video description")
published_at: datetime = Field(..., description="Publication timestamp")
duration_seconds: int | None = Field(default=None, description="Video duration")
thumbnail_url: str = Field(..., description="Thumbnail image URL")
status: Literal["discovered", "captioned", "analyzed", "failed", "skipped"] = (
Field(..., description="Processing status")
)
failure_reason: str | None = Field(
default=None, description="Failure reason if status=failed"
)
transcript_source: Literal["captions_manual", "captions_auto", "none"] | None = (
Field(default=None, description="Source of captions")
)
transcript_segments: list[TranscriptSegment] = Field(
default_factory=list, description="Transcript segments with timing"
)
transcript_raw: str | None = Field(
default=None, description="Full raw transcript text"
)
analysis: MeetKevinAnalysis | None = Field(
default=None, description="LLM analysis if status=analyzed"
)
model_config = {"from_attributes": True}
class StockMention(BaseModel):
"""Single mention of a stock ticker in a video."""
video_id: int = Field(..., description="Database ID of video")
youtube_video_id: str = Field(..., description="YouTube video ID for linking")
published_at: datetime = Field(..., description="Video publication date")
action: Literal["buy", "sell", "hold", "watch", "avoid"] = Field(
..., description="Recommendation action"
)
conviction: float = Field(
..., ge=0.0, le=1.0, description="Confidence in recommendation"
)
time_horizon: Literal[
"intraday", "days", "weeks", "months", "long_term", "unspecified"
] = Field(..., description="Time horizon for recommendation")
rationale_quote: str = Field(
..., description="Quote or summary of rationale"
)
video_timestamp_seconds: int | None = Field(
default=None, description="Deep-link timestamp"
)
model_config = {"from_attributes": True}
class StockSummary(BaseModel):
"""Summary of a stock across all mentions."""
symbol: str = Field(..., description="Stock ticker")
mention_count: int = Field(..., description="Total mention count")
last_mentioned_at: datetime = Field(
..., description="Timestamp of last mention"
)
latest_action: Literal["buy", "sell", "hold", "watch", "avoid"] = Field(
..., description="Most recent recommendation"
)
avg_conviction: float = Field(
..., ge=0.0, le=1.0, description="Average conviction across mentions"
)
bullish_count: int = Field(
default=0, description="Buy + watch count"
)
bearish_count: int = Field(
default=0, description="Sell + avoid count"
)
neutral_count: int = Field(
default=0, description="Hold count"
)
model_config = {"from_attributes": True}
class TimelineBucket(BaseModel):
"""Single time bucket in a sentiment timeline."""
bucket_date: str = Field(..., description="Date string (YYYY-MM-DD or YYYY-Www)")
action: Literal["buy", "sell", "hold", "watch", "avoid"] | None = Field(
default=None, description="Most common action in bucket"
)
avg_conviction: float = Field(
default=0.0, ge=0.0, le=1.0, description="Average conviction"
)
mention_count: int = Field(
default=0, description="Count of mentions in bucket"
)
model_config = {"from_attributes": True}
class StockTimeline(BaseModel):
"""Timeline of mentions for a single stock ticker."""
symbol: str = Field(..., description="Stock ticker")
buckets: list[TimelineBucket] = Field(
default_factory=list, description="Time-bucketed data"
)
mentions: list[StockMention] = Field(
default_factory=list, description="Chronological mentions (newest first)"
)
model_config = {"from_attributes": True}
class PipelineHealth(BaseModel):
"""Health status of the Meet Kevin pipeline."""
last_poll_at: datetime | None = Field(
default=None, description="Timestamp of last RSS poll"
)
last_poll_age_seconds: int | None = Field(
default=None, description="Seconds since last poll"
)
videos_discovered_today: int = Field(
default=0, description="Videos found in last 24h"
)
videos_captioned_today: int = Field(
default=0, description="Videos with captions processed"
)
videos_analyzed_today: int = Field(
default=0, description="Videos analyzed with LLM"
)
llm_cost_today_usd: float = Field(
default=0.0, description="Total LLM cost today"
)
daily_cost_cap_usd: float = Field(
default=5.0, description="Daily cost limit"
)
cost_capped: bool = Field(
default=False, description="True if cost cap hit today"
)
pipeline_status: Literal["ok", "warning", "error"] = Field(
default="ok", description="Overall health status"
)
status_message: str | None = Field(
default=None, description="Optional status details"
)
model_config = {"from_attributes": True}

View file

@ -584,3 +584,197 @@ class TestTokenResponse:
)
restored = TokenResponse.model_validate_json(t.model_dump_json())
assert restored == t
# ---------------------------------------------------------------------------
# --- Meet Kevin schemas ---
# ---------------------------------------------------------------------------
class TestMeetKevinTickerMention:
def test_valid_ticker_mention(self) -> None:
from shared.schemas.meet_kevin import MeetKevinTickerMention
mention = MeetKevinTickerMention(
symbol="AAPL",
action="buy",
conviction=0.85,
time_horizon="months",
rationale_quote="Strong earnings growth expected",
video_timestamp_seconds=120,
)
assert mention.symbol == "AAPL"
assert mention.conviction == 0.85
def test_symbol_auto_uppercases(self) -> None:
from shared.schemas.meet_kevin import MeetKevinTickerMention
mention = MeetKevinTickerMention(
symbol="tsla",
action="hold",
conviction=0.5,
time_horizon="weeks",
rationale_quote="Neutral outlook",
)
assert mention.symbol == "TSLA"
def test_conviction_out_of_range_high(self) -> None:
from shared.schemas.meet_kevin import MeetKevinTickerMention
with pytest.raises(ValidationError):
MeetKevinTickerMention(
symbol="AAPL",
action="buy",
conviction=1.5,
time_horizon="months",
rationale_quote="Too confident",
)
def test_conviction_out_of_range_low(self) -> None:
from shared.schemas.meet_kevin import MeetKevinTickerMention
with pytest.raises(ValidationError):
MeetKevinTickerMention(
symbol="AAPL",
action="sell",
conviction=-0.1,
time_horizon="days",
rationale_quote="Negative conviction",
)
def test_conviction_edge_cases(self) -> None:
from shared.schemas.meet_kevin import MeetKevinTickerMention
# Test 0.0
m1 = MeetKevinTickerMention(
symbol="GOOG",
action="avoid",
conviction=0.0,
time_horizon="unspecified",
rationale_quote="No confidence",
)
assert m1.conviction == 0.0
# Test 1.0
m2 = MeetKevinTickerMention(
symbol="MSFT",
action="buy",
conviction=1.0,
time_horizon="long_term",
rationale_quote="Maximum confidence",
)
assert m2.conviction == 1.0
def test_timestamp_optional(self) -> None:
from shared.schemas.meet_kevin import MeetKevinTickerMention
mention = MeetKevinTickerMention(
symbol="NVDA",
action="watch",
conviction=0.6,
time_horizon="intraday",
rationale_quote="Monitoring closely",
)
assert mention.video_timestamp_seconds is None
class TestMeetKevinAnalysis:
def test_valid_analysis(self) -> None:
from shared.schemas.meet_kevin import (
MeetKevinAnalysis,
MeetKevinTickerMention,
)
analysis = MeetKevinAnalysis(
market_outlook_direction="bullish",
market_outlook_reasoning="Strong macro tailwinds",
macro_themes=["inflation_easing", "ai_acceleration"],
key_risks=["geopolitical_uncertainty", "rate_volatility"],
summary="Overall positive outlook for tech sector",
tickers=[
MeetKevinTickerMention(
symbol="AAPL",
action="buy",
conviction=0.85,
time_horizon="months",
rationale_quote="Strong earnings expected",
)
],
)
assert analysis.market_outlook_direction == "bullish"
assert len(analysis.tickers) == 1
assert analysis.tickers[0].symbol == "AAPL"
def test_multiple_tickers(self) -> None:
from shared.schemas.meet_kevin import (
MeetKevinAnalysis,
MeetKevinTickerMention,
)
analysis = MeetKevinAnalysis(
market_outlook_direction="neutral",
market_outlook_reasoning="Mixed signals",
macro_themes=["earnings_season"],
key_risks=["fed_decisions"],
summary="Cautious outlook",
tickers=[
MeetKevinTickerMention(
symbol="TSLA",
action="buy",
conviction=0.7,
time_horizon="weeks",
rationale_quote="Breakout expected",
),
MeetKevinTickerMention(
symbol="GOOG",
action="hold",
conviction=0.5,
time_horizon="months",
rationale_quote="Wait for clarity",
),
],
)
assert len(analysis.tickers) == 2
assert analysis.tickers[0].symbol == "TSLA"
assert analysis.tickers[1].symbol == "GOOG"
def test_empty_tickers_list(self) -> None:
from shared.schemas.meet_kevin import MeetKevinAnalysis
analysis = MeetKevinAnalysis(
market_outlook_direction="bearish",
market_outlook_reasoning="Recession risk",
macro_themes=["inflation"],
key_risks=["unemployment"],
summary="Negative outlook",
tickers=[],
)
assert len(analysis.tickers) == 0
def test_json_round_trip(self) -> None:
from shared.schemas.meet_kevin import (
MeetKevinAnalysis,
MeetKevinTickerMention,
)
analysis = MeetKevinAnalysis(
market_outlook_direction="mixed",
market_outlook_reasoning="Divergent sector performance",
macro_themes=["rate_peak", "ai_growth"],
key_risks=["credit_stress"],
summary="Selective opportunities",
tickers=[
MeetKevinTickerMention(
symbol="NVIDIA",
action="buy",
conviction=0.95,
time_horizon="long_term",
rationale_quote="AI leader",
video_timestamp_seconds=300,
)
],
)
json_str = analysis.model_dump_json()
restored = MeetKevinAnalysis.model_validate_json(json_str)
assert restored == analysis
assert restored.tickers[0].symbol == "NVIDIA"