feat: add Meet Kevin pydantic schemas (analysis + API shapes)
This commit is contained in:
parent
8f616e6487
commit
75534de71b
2 changed files with 512 additions and 0 deletions
318
shared/schemas/meet_kevin.py
Normal file
318
shared/schemas/meet_kevin.py
Normal file
|
|
@ -0,0 +1,318 @@
|
|||
"""Meet Kevin pipeline Pydantic schemas.
|
||||
|
||||
Includes LLM tool-input schemas (MeetKevinTickerMention, MeetKevinAnalysis)
|
||||
and API response shapes (TranscriptSegment, VideoSummary, VideoDetail, StockSummary,
|
||||
StockMention, StockTimeline, TimelineBucket, PipelineHealth).
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from typing import Literal
|
||||
|
||||
from pydantic import BaseModel, Field, field_validator
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Enums
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class TickerAction(str, Enum):
|
||||
"""Action recommendation for a stock ticker."""
|
||||
|
||||
BUY = "buy"
|
||||
SELL = "sell"
|
||||
HOLD = "hold"
|
||||
WATCH = "watch"
|
||||
AVOID = "avoid"
|
||||
|
||||
|
||||
class TimeHorizon(str, Enum):
|
||||
"""Time horizon for an investment recommendation."""
|
||||
|
||||
INTRADAY = "intraday"
|
||||
DAYS = "days"
|
||||
WEEKS = "weeks"
|
||||
MONTHS = "months"
|
||||
LONG_TERM = "long_term"
|
||||
UNSPECIFIED = "unspecified"
|
||||
|
||||
|
||||
class MarketOutlook(str, Enum):
|
||||
"""Overall market sentiment direction."""
|
||||
|
||||
BULLISH = "bullish"
|
||||
NEUTRAL = "neutral"
|
||||
BEARISH = "bearish"
|
||||
MIXED = "mixed"
|
||||
|
||||
|
||||
class VideoStatus(str, Enum):
|
||||
"""Status of a video in the processing pipeline."""
|
||||
|
||||
DISCOVERED = "discovered"
|
||||
CAPTIONED = "captioned"
|
||||
ANALYZED = "analyzed"
|
||||
FAILED = "failed"
|
||||
SKIPPED = "skipped"
|
||||
|
||||
|
||||
class TranscriptSource(str, Enum):
|
||||
"""Source of transcript captions."""
|
||||
|
||||
CAPTIONS_MANUAL = "captions_manual"
|
||||
CAPTIONS_AUTO = "captions_auto"
|
||||
NONE = "none"
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# LLM Tool-Input Schemas
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class MeetKevinTickerMention(BaseModel):
|
||||
"""Single stock ticker mention extracted by Claude from a video transcript.
|
||||
|
||||
Used as tool-input for the LLM analyzer and persisted as kevin_stock_mentions.
|
||||
"""
|
||||
|
||||
symbol: str = Field(
|
||||
..., description="Stock ticker symbol (A-Z, 1-6 chars, auto-uppercased)"
|
||||
)
|
||||
action: Literal["buy", "sell", "hold", "watch", "avoid"] = Field(
|
||||
..., description="Recommendation action"
|
||||
)
|
||||
conviction: float = Field(
|
||||
..., ge=0.0, le=1.0, description="Confidence in recommendation (0.0-1.0)"
|
||||
)
|
||||
time_horizon: Literal[
|
||||
"intraday", "days", "weeks", "months", "long_term", "unspecified"
|
||||
] = Field(..., description="Time horizon for the recommendation")
|
||||
rationale_quote: str = Field(
|
||||
..., description="Short verbatim or paraphrased quote from video"
|
||||
)
|
||||
video_timestamp_seconds: int | None = Field(
|
||||
default=None, description="Timestamp for deep-link target"
|
||||
)
|
||||
|
||||
@field_validator("symbol")
|
||||
@classmethod
|
||||
def uppercase_symbol(cls, v: str) -> str:
|
||||
"""Auto-uppercase the ticker symbol."""
|
||||
return v.upper()
|
||||
|
||||
model_config = {"from_attributes": True}
|
||||
|
||||
|
||||
class MeetKevinAnalysis(BaseModel):
|
||||
"""Complete analysis output from Claude for a single video transcript.
|
||||
|
||||
Used as tool-input for the LLM analyzer and persisted as kevin_analyses.
|
||||
"""
|
||||
|
||||
market_outlook_direction: Literal["bullish", "neutral", "bearish", "mixed"] = (
|
||||
Field(..., description="Overall market sentiment direction")
|
||||
)
|
||||
market_outlook_reasoning: str = Field(
|
||||
..., description="Explanation of market outlook"
|
||||
)
|
||||
macro_themes: list[str] = Field(
|
||||
default_factory=list, description="Macro economic themes discussed"
|
||||
)
|
||||
key_risks: list[str] = Field(
|
||||
default_factory=list, description="Key risks identified"
|
||||
)
|
||||
summary: str = Field(..., description="~200-word summary of analysis")
|
||||
tickers: list[MeetKevinTickerMention] = Field(
|
||||
default_factory=list, description="List of ticker mentions"
|
||||
)
|
||||
|
||||
model_config = {"from_attributes": True}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# API Response Schemas
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class TranscriptSegment(BaseModel):
|
||||
"""Single segment from a video transcript with timing."""
|
||||
|
||||
start_seconds: float = Field(..., description="Segment start time in seconds")
|
||||
end_seconds: float = Field(..., description="Segment end time in seconds")
|
||||
text: str = Field(..., description="Segment text content")
|
||||
|
||||
model_config = {"from_attributes": True}
|
||||
|
||||
|
||||
class VideoSummary(BaseModel):
|
||||
"""Summary view of a video in the feed."""
|
||||
|
||||
id: int = Field(..., description="Database ID")
|
||||
youtube_video_id: str = Field(..., description="YouTube video ID")
|
||||
title: str = Field(..., description="Video title")
|
||||
published_at: datetime = Field(..., description="Publication timestamp")
|
||||
thumbnail_url: str = Field(..., description="Thumbnail image URL")
|
||||
status: Literal["discovered", "captioned", "analyzed", "failed", "skipped"] = (
|
||||
Field(..., description="Processing status")
|
||||
)
|
||||
failure_reason: str | None = Field(
|
||||
default=None, description="Failure reason if status=failed"
|
||||
)
|
||||
ticker_count: int = Field(
|
||||
default=0, description="Number of ticker mentions analyzed"
|
||||
)
|
||||
|
||||
model_config = {"from_attributes": True}
|
||||
|
||||
|
||||
class VideoDetail(BaseModel):
|
||||
"""Full details of a single video including analysis."""
|
||||
|
||||
id: int = Field(..., description="Database ID")
|
||||
youtube_video_id: str = Field(..., description="YouTube video ID")
|
||||
title: str = Field(..., description="Video title")
|
||||
description: str | None = Field(default=None, description="Video description")
|
||||
published_at: datetime = Field(..., description="Publication timestamp")
|
||||
duration_seconds: int | None = Field(default=None, description="Video duration")
|
||||
thumbnail_url: str = Field(..., description="Thumbnail image URL")
|
||||
status: Literal["discovered", "captioned", "analyzed", "failed", "skipped"] = (
|
||||
Field(..., description="Processing status")
|
||||
)
|
||||
failure_reason: str | None = Field(
|
||||
default=None, description="Failure reason if status=failed"
|
||||
)
|
||||
transcript_source: Literal["captions_manual", "captions_auto", "none"] | None = (
|
||||
Field(default=None, description="Source of captions")
|
||||
)
|
||||
transcript_segments: list[TranscriptSegment] = Field(
|
||||
default_factory=list, description="Transcript segments with timing"
|
||||
)
|
||||
transcript_raw: str | None = Field(
|
||||
default=None, description="Full raw transcript text"
|
||||
)
|
||||
analysis: MeetKevinAnalysis | None = Field(
|
||||
default=None, description="LLM analysis if status=analyzed"
|
||||
)
|
||||
|
||||
model_config = {"from_attributes": True}
|
||||
|
||||
|
||||
class StockMention(BaseModel):
|
||||
"""Single mention of a stock ticker in a video."""
|
||||
|
||||
video_id: int = Field(..., description="Database ID of video")
|
||||
youtube_video_id: str = Field(..., description="YouTube video ID for linking")
|
||||
published_at: datetime = Field(..., description="Video publication date")
|
||||
action: Literal["buy", "sell", "hold", "watch", "avoid"] = Field(
|
||||
..., description="Recommendation action"
|
||||
)
|
||||
conviction: float = Field(
|
||||
..., ge=0.0, le=1.0, description="Confidence in recommendation"
|
||||
)
|
||||
time_horizon: Literal[
|
||||
"intraday", "days", "weeks", "months", "long_term", "unspecified"
|
||||
] = Field(..., description="Time horizon for recommendation")
|
||||
rationale_quote: str = Field(
|
||||
..., description="Quote or summary of rationale"
|
||||
)
|
||||
video_timestamp_seconds: int | None = Field(
|
||||
default=None, description="Deep-link timestamp"
|
||||
)
|
||||
|
||||
model_config = {"from_attributes": True}
|
||||
|
||||
|
||||
class StockSummary(BaseModel):
|
||||
"""Summary of a stock across all mentions."""
|
||||
|
||||
symbol: str = Field(..., description="Stock ticker")
|
||||
mention_count: int = Field(..., description="Total mention count")
|
||||
last_mentioned_at: datetime = Field(
|
||||
..., description="Timestamp of last mention"
|
||||
)
|
||||
latest_action: Literal["buy", "sell", "hold", "watch", "avoid"] = Field(
|
||||
..., description="Most recent recommendation"
|
||||
)
|
||||
avg_conviction: float = Field(
|
||||
..., ge=0.0, le=1.0, description="Average conviction across mentions"
|
||||
)
|
||||
bullish_count: int = Field(
|
||||
default=0, description="Buy + watch count"
|
||||
)
|
||||
bearish_count: int = Field(
|
||||
default=0, description="Sell + avoid count"
|
||||
)
|
||||
neutral_count: int = Field(
|
||||
default=0, description="Hold count"
|
||||
)
|
||||
|
||||
model_config = {"from_attributes": True}
|
||||
|
||||
|
||||
class TimelineBucket(BaseModel):
|
||||
"""Single time bucket in a sentiment timeline."""
|
||||
|
||||
bucket_date: str = Field(..., description="Date string (YYYY-MM-DD or YYYY-Www)")
|
||||
action: Literal["buy", "sell", "hold", "watch", "avoid"] | None = Field(
|
||||
default=None, description="Most common action in bucket"
|
||||
)
|
||||
avg_conviction: float = Field(
|
||||
default=0.0, ge=0.0, le=1.0, description="Average conviction"
|
||||
)
|
||||
mention_count: int = Field(
|
||||
default=0, description="Count of mentions in bucket"
|
||||
)
|
||||
|
||||
model_config = {"from_attributes": True}
|
||||
|
||||
|
||||
class StockTimeline(BaseModel):
|
||||
"""Timeline of mentions for a single stock ticker."""
|
||||
|
||||
symbol: str = Field(..., description="Stock ticker")
|
||||
buckets: list[TimelineBucket] = Field(
|
||||
default_factory=list, description="Time-bucketed data"
|
||||
)
|
||||
mentions: list[StockMention] = Field(
|
||||
default_factory=list, description="Chronological mentions (newest first)"
|
||||
)
|
||||
|
||||
model_config = {"from_attributes": True}
|
||||
|
||||
|
||||
class PipelineHealth(BaseModel):
|
||||
"""Health status of the Meet Kevin pipeline."""
|
||||
|
||||
last_poll_at: datetime | None = Field(
|
||||
default=None, description="Timestamp of last RSS poll"
|
||||
)
|
||||
last_poll_age_seconds: int | None = Field(
|
||||
default=None, description="Seconds since last poll"
|
||||
)
|
||||
videos_discovered_today: int = Field(
|
||||
default=0, description="Videos found in last 24h"
|
||||
)
|
||||
videos_captioned_today: int = Field(
|
||||
default=0, description="Videos with captions processed"
|
||||
)
|
||||
videos_analyzed_today: int = Field(
|
||||
default=0, description="Videos analyzed with LLM"
|
||||
)
|
||||
llm_cost_today_usd: float = Field(
|
||||
default=0.0, description="Total LLM cost today"
|
||||
)
|
||||
daily_cost_cap_usd: float = Field(
|
||||
default=5.0, description="Daily cost limit"
|
||||
)
|
||||
cost_capped: bool = Field(
|
||||
default=False, description="True if cost cap hit today"
|
||||
)
|
||||
pipeline_status: Literal["ok", "warning", "error"] = Field(
|
||||
default="ok", description="Overall health status"
|
||||
)
|
||||
status_message: str | None = Field(
|
||||
default=None, description="Optional status details"
|
||||
)
|
||||
|
||||
model_config = {"from_attributes": True}
|
||||
|
|
@ -584,3 +584,197 @@ class TestTokenResponse:
|
|||
)
|
||||
restored = TokenResponse.model_validate_json(t.model_dump_json())
|
||||
assert restored == t
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# --- Meet Kevin schemas ---
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestMeetKevinTickerMention:
|
||||
def test_valid_ticker_mention(self) -> None:
|
||||
from shared.schemas.meet_kevin import MeetKevinTickerMention
|
||||
|
||||
mention = MeetKevinTickerMention(
|
||||
symbol="AAPL",
|
||||
action="buy",
|
||||
conviction=0.85,
|
||||
time_horizon="months",
|
||||
rationale_quote="Strong earnings growth expected",
|
||||
video_timestamp_seconds=120,
|
||||
)
|
||||
assert mention.symbol == "AAPL"
|
||||
assert mention.conviction == 0.85
|
||||
|
||||
def test_symbol_auto_uppercases(self) -> None:
|
||||
from shared.schemas.meet_kevin import MeetKevinTickerMention
|
||||
|
||||
mention = MeetKevinTickerMention(
|
||||
symbol="tsla",
|
||||
action="hold",
|
||||
conviction=0.5,
|
||||
time_horizon="weeks",
|
||||
rationale_quote="Neutral outlook",
|
||||
)
|
||||
assert mention.symbol == "TSLA"
|
||||
|
||||
def test_conviction_out_of_range_high(self) -> None:
|
||||
from shared.schemas.meet_kevin import MeetKevinTickerMention
|
||||
|
||||
with pytest.raises(ValidationError):
|
||||
MeetKevinTickerMention(
|
||||
symbol="AAPL",
|
||||
action="buy",
|
||||
conviction=1.5,
|
||||
time_horizon="months",
|
||||
rationale_quote="Too confident",
|
||||
)
|
||||
|
||||
def test_conviction_out_of_range_low(self) -> None:
|
||||
from shared.schemas.meet_kevin import MeetKevinTickerMention
|
||||
|
||||
with pytest.raises(ValidationError):
|
||||
MeetKevinTickerMention(
|
||||
symbol="AAPL",
|
||||
action="sell",
|
||||
conviction=-0.1,
|
||||
time_horizon="days",
|
||||
rationale_quote="Negative conviction",
|
||||
)
|
||||
|
||||
def test_conviction_edge_cases(self) -> None:
|
||||
from shared.schemas.meet_kevin import MeetKevinTickerMention
|
||||
|
||||
# Test 0.0
|
||||
m1 = MeetKevinTickerMention(
|
||||
symbol="GOOG",
|
||||
action="avoid",
|
||||
conviction=0.0,
|
||||
time_horizon="unspecified",
|
||||
rationale_quote="No confidence",
|
||||
)
|
||||
assert m1.conviction == 0.0
|
||||
|
||||
# Test 1.0
|
||||
m2 = MeetKevinTickerMention(
|
||||
symbol="MSFT",
|
||||
action="buy",
|
||||
conviction=1.0,
|
||||
time_horizon="long_term",
|
||||
rationale_quote="Maximum confidence",
|
||||
)
|
||||
assert m2.conviction == 1.0
|
||||
|
||||
def test_timestamp_optional(self) -> None:
|
||||
from shared.schemas.meet_kevin import MeetKevinTickerMention
|
||||
|
||||
mention = MeetKevinTickerMention(
|
||||
symbol="NVDA",
|
||||
action="watch",
|
||||
conviction=0.6,
|
||||
time_horizon="intraday",
|
||||
rationale_quote="Monitoring closely",
|
||||
)
|
||||
assert mention.video_timestamp_seconds is None
|
||||
|
||||
|
||||
class TestMeetKevinAnalysis:
|
||||
def test_valid_analysis(self) -> None:
|
||||
from shared.schemas.meet_kevin import (
|
||||
MeetKevinAnalysis,
|
||||
MeetKevinTickerMention,
|
||||
)
|
||||
|
||||
analysis = MeetKevinAnalysis(
|
||||
market_outlook_direction="bullish",
|
||||
market_outlook_reasoning="Strong macro tailwinds",
|
||||
macro_themes=["inflation_easing", "ai_acceleration"],
|
||||
key_risks=["geopolitical_uncertainty", "rate_volatility"],
|
||||
summary="Overall positive outlook for tech sector",
|
||||
tickers=[
|
||||
MeetKevinTickerMention(
|
||||
symbol="AAPL",
|
||||
action="buy",
|
||||
conviction=0.85,
|
||||
time_horizon="months",
|
||||
rationale_quote="Strong earnings expected",
|
||||
)
|
||||
],
|
||||
)
|
||||
assert analysis.market_outlook_direction == "bullish"
|
||||
assert len(analysis.tickers) == 1
|
||||
assert analysis.tickers[0].symbol == "AAPL"
|
||||
|
||||
def test_multiple_tickers(self) -> None:
|
||||
from shared.schemas.meet_kevin import (
|
||||
MeetKevinAnalysis,
|
||||
MeetKevinTickerMention,
|
||||
)
|
||||
|
||||
analysis = MeetKevinAnalysis(
|
||||
market_outlook_direction="neutral",
|
||||
market_outlook_reasoning="Mixed signals",
|
||||
macro_themes=["earnings_season"],
|
||||
key_risks=["fed_decisions"],
|
||||
summary="Cautious outlook",
|
||||
tickers=[
|
||||
MeetKevinTickerMention(
|
||||
symbol="TSLA",
|
||||
action="buy",
|
||||
conviction=0.7,
|
||||
time_horizon="weeks",
|
||||
rationale_quote="Breakout expected",
|
||||
),
|
||||
MeetKevinTickerMention(
|
||||
symbol="GOOG",
|
||||
action="hold",
|
||||
conviction=0.5,
|
||||
time_horizon="months",
|
||||
rationale_quote="Wait for clarity",
|
||||
),
|
||||
],
|
||||
)
|
||||
assert len(analysis.tickers) == 2
|
||||
assert analysis.tickers[0].symbol == "TSLA"
|
||||
assert analysis.tickers[1].symbol == "GOOG"
|
||||
|
||||
def test_empty_tickers_list(self) -> None:
|
||||
from shared.schemas.meet_kevin import MeetKevinAnalysis
|
||||
|
||||
analysis = MeetKevinAnalysis(
|
||||
market_outlook_direction="bearish",
|
||||
market_outlook_reasoning="Recession risk",
|
||||
macro_themes=["inflation"],
|
||||
key_risks=["unemployment"],
|
||||
summary="Negative outlook",
|
||||
tickers=[],
|
||||
)
|
||||
assert len(analysis.tickers) == 0
|
||||
|
||||
def test_json_round_trip(self) -> None:
|
||||
from shared.schemas.meet_kevin import (
|
||||
MeetKevinAnalysis,
|
||||
MeetKevinTickerMention,
|
||||
)
|
||||
|
||||
analysis = MeetKevinAnalysis(
|
||||
market_outlook_direction="mixed",
|
||||
market_outlook_reasoning="Divergent sector performance",
|
||||
macro_themes=["rate_peak", "ai_growth"],
|
||||
key_risks=["credit_stress"],
|
||||
summary="Selective opportunities",
|
||||
tickers=[
|
||||
MeetKevinTickerMention(
|
||||
symbol="NVIDIA",
|
||||
action="buy",
|
||||
conviction=0.95,
|
||||
time_horizon="long_term",
|
||||
rationale_quote="AI leader",
|
||||
video_timestamp_seconds=300,
|
||||
)
|
||||
],
|
||||
)
|
||||
json_str = analysis.model_dump_json()
|
||||
restored = MeetKevinAnalysis.model_validate_json(json_str)
|
||||
assert restored == analysis
|
||||
assert restored.tickers[0].symbol == "NVIDIA"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue