- Replace all Literal[...] type annotations with corresponding enum classes (TickerAction, TimeHorizon, MarketOutlook, VideoStatus, TranscriptSource) for MeetKevinTickerMention, MeetKevinAnalysis, and API response models (VideoSummary, VideoDetail, StockMention, StockSummary, TimelineBucket) - Add min_length=1, max_length=10 validation to MeetKevinTickerMention.symbol - Split test_conviction_edge_cases into two separate boundary tests - Strengthen test_valid_ticker_mention with assertions for all 6 fields - Trim no-information docstrings from TranscriptSegment, StockTimeline - All 60 schema tests pass
306 lines
9.8 KiB
Python
306 lines
9.8 KiB
Python
"""Meet Kevin pipeline Pydantic schemas.
|
|
|
|
Includes LLM tool-input schemas (MeetKevinTickerMention, MeetKevinAnalysis)
|
|
and API response shapes (TranscriptSegment, VideoSummary, VideoDetail, StockSummary,
|
|
StockMention, StockTimeline, TimelineBucket, PipelineHealth).
|
|
"""
|
|
|
|
from datetime import datetime
|
|
from enum import Enum
|
|
from typing import Literal
|
|
|
|
from pydantic import BaseModel, Field, field_validator
|
|
|
|
|
|
# =============================================================================
|
|
# Enums
|
|
# =============================================================================
|
|
|
|
|
|
class TickerAction(str, Enum):
|
|
"""Action recommendation for a stock ticker."""
|
|
|
|
BUY = "buy"
|
|
SELL = "sell"
|
|
HOLD = "hold"
|
|
WATCH = "watch"
|
|
AVOID = "avoid"
|
|
|
|
|
|
class TimeHorizon(str, Enum):
|
|
"""Time horizon for an investment recommendation."""
|
|
|
|
INTRADAY = "intraday"
|
|
DAYS = "days"
|
|
WEEKS = "weeks"
|
|
MONTHS = "months"
|
|
LONG_TERM = "long_term"
|
|
UNSPECIFIED = "unspecified"
|
|
|
|
|
|
class MarketOutlook(str, Enum):
|
|
"""Overall market sentiment direction."""
|
|
|
|
BULLISH = "bullish"
|
|
NEUTRAL = "neutral"
|
|
BEARISH = "bearish"
|
|
MIXED = "mixed"
|
|
|
|
|
|
class VideoStatus(str, Enum):
|
|
"""Status of a video in the processing pipeline."""
|
|
|
|
DISCOVERED = "discovered"
|
|
CAPTIONED = "captioned"
|
|
ANALYZED = "analyzed"
|
|
FAILED = "failed"
|
|
SKIPPED = "skipped"
|
|
|
|
|
|
class TranscriptSource(str, Enum):
|
|
"""Source of transcript captions."""
|
|
|
|
CAPTIONS_MANUAL = "captions_manual"
|
|
CAPTIONS_AUTO = "captions_auto"
|
|
NONE = "none"
|
|
|
|
|
|
# =============================================================================
|
|
# LLM Tool-Input Schemas
|
|
# =============================================================================
|
|
|
|
|
|
class MeetKevinTickerMention(BaseModel):
|
|
"""Single stock ticker mention extracted by Claude from a video transcript.
|
|
|
|
Used as tool-input for the LLM analyzer and persisted as kevin_stock_mentions.
|
|
"""
|
|
|
|
symbol: str = Field(
|
|
..., min_length=1, max_length=10, description="Stock ticker symbol (A-Z, 1-6 chars, auto-uppercased)"
|
|
)
|
|
action: TickerAction = Field(
|
|
..., description="Recommendation action"
|
|
)
|
|
conviction: float = Field(
|
|
..., ge=0.0, le=1.0, description="Confidence in recommendation (0.0-1.0)"
|
|
)
|
|
time_horizon: TimeHorizon = Field(..., description="Time horizon for the recommendation")
|
|
rationale_quote: str = Field(
|
|
..., description="Short verbatim or paraphrased quote from video"
|
|
)
|
|
video_timestamp_seconds: int | None = Field(
|
|
default=None, description="Timestamp for deep-link target"
|
|
)
|
|
|
|
@field_validator("symbol")
|
|
@classmethod
|
|
def uppercase_symbol(cls, v: str) -> str:
|
|
"""Auto-uppercase the ticker symbol."""
|
|
return v.upper()
|
|
|
|
model_config = {"from_attributes": True}
|
|
|
|
|
|
class MeetKevinAnalysis(BaseModel):
|
|
"""Complete analysis output from Claude for a single video transcript.
|
|
|
|
Used as tool-input for the LLM analyzer and persisted as kevin_analyses.
|
|
"""
|
|
|
|
market_outlook_direction: MarketOutlook = (
|
|
Field(..., description="Overall market sentiment direction")
|
|
)
|
|
market_outlook_reasoning: str = Field(
|
|
..., description="Explanation of market outlook"
|
|
)
|
|
macro_themes: list[str] = Field(
|
|
default_factory=list, description="Macro economic themes discussed"
|
|
)
|
|
key_risks: list[str] = Field(
|
|
default_factory=list, description="Key risks identified"
|
|
)
|
|
summary: str = Field(..., description="~200-word summary of analysis")
|
|
tickers: list[MeetKevinTickerMention] = Field(
|
|
default_factory=list, description="List of ticker mentions"
|
|
)
|
|
|
|
model_config = {"from_attributes": True}
|
|
|
|
|
|
# =============================================================================
|
|
# API Response Schemas
|
|
# =============================================================================
|
|
|
|
|
|
class TranscriptSegment(BaseModel):
|
|
start_seconds: float = Field(..., description="Segment start time in seconds")
|
|
end_seconds: float = Field(..., description="Segment end time in seconds")
|
|
text: str = Field(..., description="Segment text content")
|
|
|
|
model_config = {"from_attributes": True}
|
|
|
|
|
|
class VideoSummary(BaseModel):
|
|
"""Summary view of a video in the feed."""
|
|
|
|
id: int = Field(..., description="Database ID")
|
|
youtube_video_id: str = Field(..., description="YouTube video ID")
|
|
title: str = Field(..., description="Video title")
|
|
published_at: datetime = Field(..., description="Publication timestamp")
|
|
thumbnail_url: str = Field(..., description="Thumbnail image URL")
|
|
status: VideoStatus = (
|
|
Field(..., description="Processing status")
|
|
)
|
|
failure_reason: str | None = Field(
|
|
default=None, description="Failure reason if status=failed"
|
|
)
|
|
ticker_count: int = Field(
|
|
default=0, description="Number of ticker mentions analyzed"
|
|
)
|
|
|
|
model_config = {"from_attributes": True}
|
|
|
|
|
|
class VideoDetail(BaseModel):
|
|
"""Full details of a single video including analysis."""
|
|
|
|
id: int = Field(..., description="Database ID")
|
|
youtube_video_id: str = Field(..., description="YouTube video ID")
|
|
title: str = Field(..., description="Video title")
|
|
description: str | None = Field(default=None, description="Video description")
|
|
published_at: datetime = Field(..., description="Publication timestamp")
|
|
duration_seconds: int | None = Field(default=None, description="Video duration")
|
|
thumbnail_url: str = Field(..., description="Thumbnail image URL")
|
|
status: VideoStatus = (
|
|
Field(..., description="Processing status")
|
|
)
|
|
failure_reason: str | None = Field(
|
|
default=None, description="Failure reason if status=failed"
|
|
)
|
|
transcript_source: TranscriptSource | None = (
|
|
Field(default=None, description="Source of captions")
|
|
)
|
|
transcript_segments: list[TranscriptSegment] = Field(
|
|
default_factory=list, description="Transcript segments with timing"
|
|
)
|
|
transcript_raw: str | None = Field(
|
|
default=None, description="Full raw transcript text"
|
|
)
|
|
analysis: MeetKevinAnalysis | None = Field(
|
|
default=None, description="LLM analysis if status=analyzed"
|
|
)
|
|
|
|
model_config = {"from_attributes": True}
|
|
|
|
|
|
class StockMention(BaseModel):
|
|
"""Single mention of a stock ticker in a video."""
|
|
|
|
video_id: int = Field(..., description="Database ID of video")
|
|
youtube_video_id: str = Field(..., description="YouTube video ID for linking")
|
|
published_at: datetime = Field(..., description="Video publication date")
|
|
action: TickerAction = Field(
|
|
..., description="Recommendation action"
|
|
)
|
|
conviction: float = Field(
|
|
..., ge=0.0, le=1.0, description="Confidence in recommendation"
|
|
)
|
|
time_horizon: TimeHorizon = Field(..., description="Time horizon for recommendation")
|
|
rationale_quote: str = Field(
|
|
..., description="Quote or summary of rationale"
|
|
)
|
|
video_timestamp_seconds: int | None = Field(
|
|
default=None, description="Deep-link timestamp"
|
|
)
|
|
|
|
model_config = {"from_attributes": True}
|
|
|
|
|
|
class StockSummary(BaseModel):
|
|
symbol: str = Field(..., description="Stock ticker")
|
|
mention_count: int = Field(..., description="Total mention count")
|
|
last_mentioned_at: datetime = Field(
|
|
..., description="Timestamp of last mention"
|
|
)
|
|
latest_action: TickerAction = Field(
|
|
..., description="Most recent recommendation"
|
|
)
|
|
avg_conviction: float = Field(
|
|
..., ge=0.0, le=1.0, description="Average conviction across mentions"
|
|
)
|
|
bullish_count: int = Field(
|
|
default=0, description="Buy + watch count"
|
|
)
|
|
bearish_count: int = Field(
|
|
default=0, description="Sell + avoid count"
|
|
)
|
|
neutral_count: int = Field(
|
|
default=0, description="Hold count"
|
|
)
|
|
|
|
model_config = {"from_attributes": True}
|
|
|
|
|
|
class TimelineBucket(BaseModel):
|
|
bucket_date: str = Field(..., description="Date string (YYYY-MM-DD or YYYY-Www)")
|
|
action: TickerAction | None = Field(
|
|
default=None, description="Most common action in bucket"
|
|
)
|
|
avg_conviction: float = Field(
|
|
default=0.0, ge=0.0, le=1.0, description="Average conviction"
|
|
)
|
|
mention_count: int = Field(
|
|
default=0, description="Count of mentions in bucket"
|
|
)
|
|
|
|
model_config = {"from_attributes": True}
|
|
|
|
|
|
class StockTimeline(BaseModel):
|
|
symbol: str = Field(..., description="Stock ticker")
|
|
buckets: list[TimelineBucket] = Field(
|
|
default_factory=list, description="Time-bucketed data"
|
|
)
|
|
mentions: list[StockMention] = Field(
|
|
default_factory=list, description="Chronological mentions (newest first)"
|
|
)
|
|
|
|
model_config = {"from_attributes": True}
|
|
|
|
|
|
class PipelineHealth(BaseModel):
|
|
"""Health status of the Meet Kevin pipeline."""
|
|
|
|
last_poll_at: datetime | None = Field(
|
|
default=None, description="Timestamp of last RSS poll"
|
|
)
|
|
last_poll_age_seconds: int | None = Field(
|
|
default=None, description="Seconds since last poll"
|
|
)
|
|
videos_discovered_today: int = Field(
|
|
default=0, description="Videos found in last 24h"
|
|
)
|
|
videos_captioned_today: int = Field(
|
|
default=0, description="Videos with captions processed"
|
|
)
|
|
videos_analyzed_today: int = Field(
|
|
default=0, description="Videos analyzed with LLM"
|
|
)
|
|
llm_cost_today_usd: float = Field(
|
|
default=0.0, description="Total LLM cost today"
|
|
)
|
|
daily_cost_cap_usd: float = Field(
|
|
default=5.0, description="Daily cost limit"
|
|
)
|
|
cost_capped: bool = Field(
|
|
default=False, description="True if cost cap hit today"
|
|
)
|
|
pipeline_status: Literal["ok", "warning", "error"] = Field(
|
|
default="ok", description="Overall health status"
|
|
)
|
|
status_message: str | None = Field(
|
|
default=None, description="Optional status details"
|
|
)
|
|
|
|
model_config = {"from_attributes": True}
|