trading/shared/schemas/meet_kevin.py
Viktor Barzin 8a412e6ae9 fix(schemas): use enum types as field types + enforce symbol length
- Replace all Literal[...] type annotations with corresponding enum classes
  (TickerAction, TimeHorizon, MarketOutlook, VideoStatus, TranscriptSource)
  for MeetKevinTickerMention, MeetKevinAnalysis, and API response models
  (VideoSummary, VideoDetail, StockMention, StockSummary, TimelineBucket)
- Add min_length=1, max_length=10 validation to MeetKevinTickerMention.symbol
- Split test_conviction_edge_cases into two separate boundary tests
- Strengthen test_valid_ticker_mention with assertions for all 6 fields
- Trim no-information docstrings from TranscriptSegment, StockTimeline
- All 60 schema tests pass
2026-05-21 19:15:59 +00:00

306 lines
9.8 KiB
Python

"""Meet Kevin pipeline Pydantic schemas.
Includes LLM tool-input schemas (MeetKevinTickerMention, MeetKevinAnalysis)
and API response shapes (TranscriptSegment, VideoSummary, VideoDetail, StockSummary,
StockMention, StockTimeline, TimelineBucket, PipelineHealth).
"""
from datetime import datetime
from enum import Enum
from typing import Literal
from pydantic import BaseModel, Field, field_validator
# =============================================================================
# Enums
# =============================================================================
class TickerAction(str, Enum):
"""Action recommendation for a stock ticker."""
BUY = "buy"
SELL = "sell"
HOLD = "hold"
WATCH = "watch"
AVOID = "avoid"
class TimeHorizon(str, Enum):
"""Time horizon for an investment recommendation."""
INTRADAY = "intraday"
DAYS = "days"
WEEKS = "weeks"
MONTHS = "months"
LONG_TERM = "long_term"
UNSPECIFIED = "unspecified"
class MarketOutlook(str, Enum):
"""Overall market sentiment direction."""
BULLISH = "bullish"
NEUTRAL = "neutral"
BEARISH = "bearish"
MIXED = "mixed"
class VideoStatus(str, Enum):
"""Status of a video in the processing pipeline."""
DISCOVERED = "discovered"
CAPTIONED = "captioned"
ANALYZED = "analyzed"
FAILED = "failed"
SKIPPED = "skipped"
class TranscriptSource(str, Enum):
"""Source of transcript captions."""
CAPTIONS_MANUAL = "captions_manual"
CAPTIONS_AUTO = "captions_auto"
NONE = "none"
# =============================================================================
# LLM Tool-Input Schemas
# =============================================================================
class MeetKevinTickerMention(BaseModel):
"""Single stock ticker mention extracted by Claude from a video transcript.
Used as tool-input for the LLM analyzer and persisted as kevin_stock_mentions.
"""
symbol: str = Field(
..., min_length=1, max_length=10, description="Stock ticker symbol (A-Z, 1-6 chars, auto-uppercased)"
)
action: TickerAction = Field(
..., description="Recommendation action"
)
conviction: float = Field(
..., ge=0.0, le=1.0, description="Confidence in recommendation (0.0-1.0)"
)
time_horizon: TimeHorizon = Field(..., description="Time horizon for the recommendation")
rationale_quote: str = Field(
..., description="Short verbatim or paraphrased quote from video"
)
video_timestamp_seconds: int | None = Field(
default=None, description="Timestamp for deep-link target"
)
@field_validator("symbol")
@classmethod
def uppercase_symbol(cls, v: str) -> str:
"""Auto-uppercase the ticker symbol."""
return v.upper()
model_config = {"from_attributes": True}
class MeetKevinAnalysis(BaseModel):
"""Complete analysis output from Claude for a single video transcript.
Used as tool-input for the LLM analyzer and persisted as kevin_analyses.
"""
market_outlook_direction: MarketOutlook = (
Field(..., description="Overall market sentiment direction")
)
market_outlook_reasoning: str = Field(
..., description="Explanation of market outlook"
)
macro_themes: list[str] = Field(
default_factory=list, description="Macro economic themes discussed"
)
key_risks: list[str] = Field(
default_factory=list, description="Key risks identified"
)
summary: str = Field(..., description="~200-word summary of analysis")
tickers: list[MeetKevinTickerMention] = Field(
default_factory=list, description="List of ticker mentions"
)
model_config = {"from_attributes": True}
# =============================================================================
# API Response Schemas
# =============================================================================
class TranscriptSegment(BaseModel):
start_seconds: float = Field(..., description="Segment start time in seconds")
end_seconds: float = Field(..., description="Segment end time in seconds")
text: str = Field(..., description="Segment text content")
model_config = {"from_attributes": True}
class VideoSummary(BaseModel):
"""Summary view of a video in the feed."""
id: int = Field(..., description="Database ID")
youtube_video_id: str = Field(..., description="YouTube video ID")
title: str = Field(..., description="Video title")
published_at: datetime = Field(..., description="Publication timestamp")
thumbnail_url: str = Field(..., description="Thumbnail image URL")
status: VideoStatus = (
Field(..., description="Processing status")
)
failure_reason: str | None = Field(
default=None, description="Failure reason if status=failed"
)
ticker_count: int = Field(
default=0, description="Number of ticker mentions analyzed"
)
model_config = {"from_attributes": True}
class VideoDetail(BaseModel):
"""Full details of a single video including analysis."""
id: int = Field(..., description="Database ID")
youtube_video_id: str = Field(..., description="YouTube video ID")
title: str = Field(..., description="Video title")
description: str | None = Field(default=None, description="Video description")
published_at: datetime = Field(..., description="Publication timestamp")
duration_seconds: int | None = Field(default=None, description="Video duration")
thumbnail_url: str = Field(..., description="Thumbnail image URL")
status: VideoStatus = (
Field(..., description="Processing status")
)
failure_reason: str | None = Field(
default=None, description="Failure reason if status=failed"
)
transcript_source: TranscriptSource | None = (
Field(default=None, description="Source of captions")
)
transcript_segments: list[TranscriptSegment] = Field(
default_factory=list, description="Transcript segments with timing"
)
transcript_raw: str | None = Field(
default=None, description="Full raw transcript text"
)
analysis: MeetKevinAnalysis | None = Field(
default=None, description="LLM analysis if status=analyzed"
)
model_config = {"from_attributes": True}
class StockMention(BaseModel):
"""Single mention of a stock ticker in a video."""
video_id: int = Field(..., description="Database ID of video")
youtube_video_id: str = Field(..., description="YouTube video ID for linking")
published_at: datetime = Field(..., description="Video publication date")
action: TickerAction = Field(
..., description="Recommendation action"
)
conviction: float = Field(
..., ge=0.0, le=1.0, description="Confidence in recommendation"
)
time_horizon: TimeHorizon = Field(..., description="Time horizon for recommendation")
rationale_quote: str = Field(
..., description="Quote or summary of rationale"
)
video_timestamp_seconds: int | None = Field(
default=None, description="Deep-link timestamp"
)
model_config = {"from_attributes": True}
class StockSummary(BaseModel):
symbol: str = Field(..., description="Stock ticker")
mention_count: int = Field(..., description="Total mention count")
last_mentioned_at: datetime = Field(
..., description="Timestamp of last mention"
)
latest_action: TickerAction = Field(
..., description="Most recent recommendation"
)
avg_conviction: float = Field(
..., ge=0.0, le=1.0, description="Average conviction across mentions"
)
bullish_count: int = Field(
default=0, description="Buy + watch count"
)
bearish_count: int = Field(
default=0, description="Sell + avoid count"
)
neutral_count: int = Field(
default=0, description="Hold count"
)
model_config = {"from_attributes": True}
class TimelineBucket(BaseModel):
bucket_date: str = Field(..., description="Date string (YYYY-MM-DD or YYYY-Www)")
action: TickerAction | None = Field(
default=None, description="Most common action in bucket"
)
avg_conviction: float = Field(
default=0.0, ge=0.0, le=1.0, description="Average conviction"
)
mention_count: int = Field(
default=0, description="Count of mentions in bucket"
)
model_config = {"from_attributes": True}
class StockTimeline(BaseModel):
symbol: str = Field(..., description="Stock ticker")
buckets: list[TimelineBucket] = Field(
default_factory=list, description="Time-bucketed data"
)
mentions: list[StockMention] = Field(
default_factory=list, description="Chronological mentions (newest first)"
)
model_config = {"from_attributes": True}
class PipelineHealth(BaseModel):
"""Health status of the Meet Kevin pipeline."""
last_poll_at: datetime | None = Field(
default=None, description="Timestamp of last RSS poll"
)
last_poll_age_seconds: int | None = Field(
default=None, description="Seconds since last poll"
)
videos_discovered_today: int = Field(
default=0, description="Videos found in last 24h"
)
videos_captioned_today: int = Field(
default=0, description="Videos with captions processed"
)
videos_analyzed_today: int = Field(
default=0, description="Videos analyzed with LLM"
)
llm_cost_today_usd: float = Field(
default=0.0, description="Total LLM cost today"
)
daily_cost_cap_usd: float = Field(
default=5.0, description="Daily cost limit"
)
cost_capped: bool = Field(
default=False, description="True if cost cap hit today"
)
pipeline_status: Literal["ok", "warning", "error"] = Field(
default="ok", description="Overall health status"
)
status_message: str | None = Field(
default=None, description="Optional status details"
)
model_config = {"from_attributes": True}