feat: add Meet Kevin pydantic schemas (analysis + API shapes)

This commit is contained in:
Viktor Barzin 2026-05-21 19:06:04 +00:00
parent 8f616e6487
commit 75534de71b
2 changed files with 512 additions and 0 deletions

View file

@ -0,0 +1,318 @@
"""Meet Kevin pipeline Pydantic schemas.
Includes LLM tool-input schemas (MeetKevinTickerMention, MeetKevinAnalysis)
and API response shapes (TranscriptSegment, VideoSummary, VideoDetail, StockSummary,
StockMention, StockTimeline, TimelineBucket, PipelineHealth).
"""
from datetime import datetime
from enum import Enum
from typing import Literal
from pydantic import BaseModel, Field, field_validator
# =============================================================================
# Enums
# =============================================================================
class TickerAction(str, Enum):
"""Action recommendation for a stock ticker."""
BUY = "buy"
SELL = "sell"
HOLD = "hold"
WATCH = "watch"
AVOID = "avoid"
class TimeHorizon(str, Enum):
"""Time horizon for an investment recommendation."""
INTRADAY = "intraday"
DAYS = "days"
WEEKS = "weeks"
MONTHS = "months"
LONG_TERM = "long_term"
UNSPECIFIED = "unspecified"
class MarketOutlook(str, Enum):
"""Overall market sentiment direction."""
BULLISH = "bullish"
NEUTRAL = "neutral"
BEARISH = "bearish"
MIXED = "mixed"
class VideoStatus(str, Enum):
"""Status of a video in the processing pipeline."""
DISCOVERED = "discovered"
CAPTIONED = "captioned"
ANALYZED = "analyzed"
FAILED = "failed"
SKIPPED = "skipped"
class TranscriptSource(str, Enum):
"""Source of transcript captions."""
CAPTIONS_MANUAL = "captions_manual"
CAPTIONS_AUTO = "captions_auto"
NONE = "none"
# =============================================================================
# LLM Tool-Input Schemas
# =============================================================================
class MeetKevinTickerMention(BaseModel):
"""Single stock ticker mention extracted by Claude from a video transcript.
Used as tool-input for the LLM analyzer and persisted as kevin_stock_mentions.
"""
symbol: str = Field(
..., description="Stock ticker symbol (A-Z, 1-6 chars, auto-uppercased)"
)
action: Literal["buy", "sell", "hold", "watch", "avoid"] = Field(
..., description="Recommendation action"
)
conviction: float = Field(
..., ge=0.0, le=1.0, description="Confidence in recommendation (0.0-1.0)"
)
time_horizon: Literal[
"intraday", "days", "weeks", "months", "long_term", "unspecified"
] = Field(..., description="Time horizon for the recommendation")
rationale_quote: str = Field(
..., description="Short verbatim or paraphrased quote from video"
)
video_timestamp_seconds: int | None = Field(
default=None, description="Timestamp for deep-link target"
)
@field_validator("symbol")
@classmethod
def uppercase_symbol(cls, v: str) -> str:
"""Auto-uppercase the ticker symbol."""
return v.upper()
model_config = {"from_attributes": True}
class MeetKevinAnalysis(BaseModel):
"""Complete analysis output from Claude for a single video transcript.
Used as tool-input for the LLM analyzer and persisted as kevin_analyses.
"""
market_outlook_direction: Literal["bullish", "neutral", "bearish", "mixed"] = (
Field(..., description="Overall market sentiment direction")
)
market_outlook_reasoning: str = Field(
..., description="Explanation of market outlook"
)
macro_themes: list[str] = Field(
default_factory=list, description="Macro economic themes discussed"
)
key_risks: list[str] = Field(
default_factory=list, description="Key risks identified"
)
summary: str = Field(..., description="~200-word summary of analysis")
tickers: list[MeetKevinTickerMention] = Field(
default_factory=list, description="List of ticker mentions"
)
model_config = {"from_attributes": True}
# =============================================================================
# API Response Schemas
# =============================================================================
class TranscriptSegment(BaseModel):
"""Single segment from a video transcript with timing."""
start_seconds: float = Field(..., description="Segment start time in seconds")
end_seconds: float = Field(..., description="Segment end time in seconds")
text: str = Field(..., description="Segment text content")
model_config = {"from_attributes": True}
class VideoSummary(BaseModel):
"""Summary view of a video in the feed."""
id: int = Field(..., description="Database ID")
youtube_video_id: str = Field(..., description="YouTube video ID")
title: str = Field(..., description="Video title")
published_at: datetime = Field(..., description="Publication timestamp")
thumbnail_url: str = Field(..., description="Thumbnail image URL")
status: Literal["discovered", "captioned", "analyzed", "failed", "skipped"] = (
Field(..., description="Processing status")
)
failure_reason: str | None = Field(
default=None, description="Failure reason if status=failed"
)
ticker_count: int = Field(
default=0, description="Number of ticker mentions analyzed"
)
model_config = {"from_attributes": True}
class VideoDetail(BaseModel):
"""Full details of a single video including analysis."""
id: int = Field(..., description="Database ID")
youtube_video_id: str = Field(..., description="YouTube video ID")
title: str = Field(..., description="Video title")
description: str | None = Field(default=None, description="Video description")
published_at: datetime = Field(..., description="Publication timestamp")
duration_seconds: int | None = Field(default=None, description="Video duration")
thumbnail_url: str = Field(..., description="Thumbnail image URL")
status: Literal["discovered", "captioned", "analyzed", "failed", "skipped"] = (
Field(..., description="Processing status")
)
failure_reason: str | None = Field(
default=None, description="Failure reason if status=failed"
)
transcript_source: Literal["captions_manual", "captions_auto", "none"] | None = (
Field(default=None, description="Source of captions")
)
transcript_segments: list[TranscriptSegment] = Field(
default_factory=list, description="Transcript segments with timing"
)
transcript_raw: str | None = Field(
default=None, description="Full raw transcript text"
)
analysis: MeetKevinAnalysis | None = Field(
default=None, description="LLM analysis if status=analyzed"
)
model_config = {"from_attributes": True}
class StockMention(BaseModel):
"""Single mention of a stock ticker in a video."""
video_id: int = Field(..., description="Database ID of video")
youtube_video_id: str = Field(..., description="YouTube video ID for linking")
published_at: datetime = Field(..., description="Video publication date")
action: Literal["buy", "sell", "hold", "watch", "avoid"] = Field(
..., description="Recommendation action"
)
conviction: float = Field(
..., ge=0.0, le=1.0, description="Confidence in recommendation"
)
time_horizon: Literal[
"intraday", "days", "weeks", "months", "long_term", "unspecified"
] = Field(..., description="Time horizon for recommendation")
rationale_quote: str = Field(
..., description="Quote or summary of rationale"
)
video_timestamp_seconds: int | None = Field(
default=None, description="Deep-link timestamp"
)
model_config = {"from_attributes": True}
class StockSummary(BaseModel):
"""Summary of a stock across all mentions."""
symbol: str = Field(..., description="Stock ticker")
mention_count: int = Field(..., description="Total mention count")
last_mentioned_at: datetime = Field(
..., description="Timestamp of last mention"
)
latest_action: Literal["buy", "sell", "hold", "watch", "avoid"] = Field(
..., description="Most recent recommendation"
)
avg_conviction: float = Field(
..., ge=0.0, le=1.0, description="Average conviction across mentions"
)
bullish_count: int = Field(
default=0, description="Buy + watch count"
)
bearish_count: int = Field(
default=0, description="Sell + avoid count"
)
neutral_count: int = Field(
default=0, description="Hold count"
)
model_config = {"from_attributes": True}
class TimelineBucket(BaseModel):
"""Single time bucket in a sentiment timeline."""
bucket_date: str = Field(..., description="Date string (YYYY-MM-DD or YYYY-Www)")
action: Literal["buy", "sell", "hold", "watch", "avoid"] | None = Field(
default=None, description="Most common action in bucket"
)
avg_conviction: float = Field(
default=0.0, ge=0.0, le=1.0, description="Average conviction"
)
mention_count: int = Field(
default=0, description="Count of mentions in bucket"
)
model_config = {"from_attributes": True}
class StockTimeline(BaseModel):
"""Timeline of mentions for a single stock ticker."""
symbol: str = Field(..., description="Stock ticker")
buckets: list[TimelineBucket] = Field(
default_factory=list, description="Time-bucketed data"
)
mentions: list[StockMention] = Field(
default_factory=list, description="Chronological mentions (newest first)"
)
model_config = {"from_attributes": True}
class PipelineHealth(BaseModel):
"""Health status of the Meet Kevin pipeline."""
last_poll_at: datetime | None = Field(
default=None, description="Timestamp of last RSS poll"
)
last_poll_age_seconds: int | None = Field(
default=None, description="Seconds since last poll"
)
videos_discovered_today: int = Field(
default=0, description="Videos found in last 24h"
)
videos_captioned_today: int = Field(
default=0, description="Videos with captions processed"
)
videos_analyzed_today: int = Field(
default=0, description="Videos analyzed with LLM"
)
llm_cost_today_usd: float = Field(
default=0.0, description="Total LLM cost today"
)
daily_cost_cap_usd: float = Field(
default=5.0, description="Daily cost limit"
)
cost_capped: bool = Field(
default=False, description="True if cost cap hit today"
)
pipeline_status: Literal["ok", "warning", "error"] = Field(
default="ok", description="Overall health status"
)
status_message: str | None = Field(
default=None, description="Optional status details"
)
model_config = {"from_attributes": True}