feat: add Meet Kevin pydantic schemas (analysis + API shapes)
This commit is contained in:
parent
8f616e6487
commit
75534de71b
2 changed files with 512 additions and 0 deletions
318
shared/schemas/meet_kevin.py
Normal file
318
shared/schemas/meet_kevin.py
Normal file
|
|
@ -0,0 +1,318 @@
|
|||
"""Meet Kevin pipeline Pydantic schemas.
|
||||
|
||||
Includes LLM tool-input schemas (MeetKevinTickerMention, MeetKevinAnalysis)
|
||||
and API response shapes (TranscriptSegment, VideoSummary, VideoDetail, StockSummary,
|
||||
StockMention, StockTimeline, TimelineBucket, PipelineHealth).
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from typing import Literal
|
||||
|
||||
from pydantic import BaseModel, Field, field_validator
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Enums
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class TickerAction(str, Enum):
|
||||
"""Action recommendation for a stock ticker."""
|
||||
|
||||
BUY = "buy"
|
||||
SELL = "sell"
|
||||
HOLD = "hold"
|
||||
WATCH = "watch"
|
||||
AVOID = "avoid"
|
||||
|
||||
|
||||
class TimeHorizon(str, Enum):
|
||||
"""Time horizon for an investment recommendation."""
|
||||
|
||||
INTRADAY = "intraday"
|
||||
DAYS = "days"
|
||||
WEEKS = "weeks"
|
||||
MONTHS = "months"
|
||||
LONG_TERM = "long_term"
|
||||
UNSPECIFIED = "unspecified"
|
||||
|
||||
|
||||
class MarketOutlook(str, Enum):
|
||||
"""Overall market sentiment direction."""
|
||||
|
||||
BULLISH = "bullish"
|
||||
NEUTRAL = "neutral"
|
||||
BEARISH = "bearish"
|
||||
MIXED = "mixed"
|
||||
|
||||
|
||||
class VideoStatus(str, Enum):
|
||||
"""Status of a video in the processing pipeline."""
|
||||
|
||||
DISCOVERED = "discovered"
|
||||
CAPTIONED = "captioned"
|
||||
ANALYZED = "analyzed"
|
||||
FAILED = "failed"
|
||||
SKIPPED = "skipped"
|
||||
|
||||
|
||||
class TranscriptSource(str, Enum):
|
||||
"""Source of transcript captions."""
|
||||
|
||||
CAPTIONS_MANUAL = "captions_manual"
|
||||
CAPTIONS_AUTO = "captions_auto"
|
||||
NONE = "none"
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# LLM Tool-Input Schemas
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class MeetKevinTickerMention(BaseModel):
|
||||
"""Single stock ticker mention extracted by Claude from a video transcript.
|
||||
|
||||
Used as tool-input for the LLM analyzer and persisted as kevin_stock_mentions.
|
||||
"""
|
||||
|
||||
symbol: str = Field(
|
||||
..., description="Stock ticker symbol (A-Z, 1-6 chars, auto-uppercased)"
|
||||
)
|
||||
action: Literal["buy", "sell", "hold", "watch", "avoid"] = Field(
|
||||
..., description="Recommendation action"
|
||||
)
|
||||
conviction: float = Field(
|
||||
..., ge=0.0, le=1.0, description="Confidence in recommendation (0.0-1.0)"
|
||||
)
|
||||
time_horizon: Literal[
|
||||
"intraday", "days", "weeks", "months", "long_term", "unspecified"
|
||||
] = Field(..., description="Time horizon for the recommendation")
|
||||
rationale_quote: str = Field(
|
||||
..., description="Short verbatim or paraphrased quote from video"
|
||||
)
|
||||
video_timestamp_seconds: int | None = Field(
|
||||
default=None, description="Timestamp for deep-link target"
|
||||
)
|
||||
|
||||
@field_validator("symbol")
|
||||
@classmethod
|
||||
def uppercase_symbol(cls, v: str) -> str:
|
||||
"""Auto-uppercase the ticker symbol."""
|
||||
return v.upper()
|
||||
|
||||
model_config = {"from_attributes": True}
|
||||
|
||||
|
||||
class MeetKevinAnalysis(BaseModel):
|
||||
"""Complete analysis output from Claude for a single video transcript.
|
||||
|
||||
Used as tool-input for the LLM analyzer and persisted as kevin_analyses.
|
||||
"""
|
||||
|
||||
market_outlook_direction: Literal["bullish", "neutral", "bearish", "mixed"] = (
|
||||
Field(..., description="Overall market sentiment direction")
|
||||
)
|
||||
market_outlook_reasoning: str = Field(
|
||||
..., description="Explanation of market outlook"
|
||||
)
|
||||
macro_themes: list[str] = Field(
|
||||
default_factory=list, description="Macro economic themes discussed"
|
||||
)
|
||||
key_risks: list[str] = Field(
|
||||
default_factory=list, description="Key risks identified"
|
||||
)
|
||||
summary: str = Field(..., description="~200-word summary of analysis")
|
||||
tickers: list[MeetKevinTickerMention] = Field(
|
||||
default_factory=list, description="List of ticker mentions"
|
||||
)
|
||||
|
||||
model_config = {"from_attributes": True}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# API Response Schemas
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class TranscriptSegment(BaseModel):
|
||||
"""Single segment from a video transcript with timing."""
|
||||
|
||||
start_seconds: float = Field(..., description="Segment start time in seconds")
|
||||
end_seconds: float = Field(..., description="Segment end time in seconds")
|
||||
text: str = Field(..., description="Segment text content")
|
||||
|
||||
model_config = {"from_attributes": True}
|
||||
|
||||
|
||||
class VideoSummary(BaseModel):
|
||||
"""Summary view of a video in the feed."""
|
||||
|
||||
id: int = Field(..., description="Database ID")
|
||||
youtube_video_id: str = Field(..., description="YouTube video ID")
|
||||
title: str = Field(..., description="Video title")
|
||||
published_at: datetime = Field(..., description="Publication timestamp")
|
||||
thumbnail_url: str = Field(..., description="Thumbnail image URL")
|
||||
status: Literal["discovered", "captioned", "analyzed", "failed", "skipped"] = (
|
||||
Field(..., description="Processing status")
|
||||
)
|
||||
failure_reason: str | None = Field(
|
||||
default=None, description="Failure reason if status=failed"
|
||||
)
|
||||
ticker_count: int = Field(
|
||||
default=0, description="Number of ticker mentions analyzed"
|
||||
)
|
||||
|
||||
model_config = {"from_attributes": True}
|
||||
|
||||
|
||||
class VideoDetail(BaseModel):
|
||||
"""Full details of a single video including analysis."""
|
||||
|
||||
id: int = Field(..., description="Database ID")
|
||||
youtube_video_id: str = Field(..., description="YouTube video ID")
|
||||
title: str = Field(..., description="Video title")
|
||||
description: str | None = Field(default=None, description="Video description")
|
||||
published_at: datetime = Field(..., description="Publication timestamp")
|
||||
duration_seconds: int | None = Field(default=None, description="Video duration")
|
||||
thumbnail_url: str = Field(..., description="Thumbnail image URL")
|
||||
status: Literal["discovered", "captioned", "analyzed", "failed", "skipped"] = (
|
||||
Field(..., description="Processing status")
|
||||
)
|
||||
failure_reason: str | None = Field(
|
||||
default=None, description="Failure reason if status=failed"
|
||||
)
|
||||
transcript_source: Literal["captions_manual", "captions_auto", "none"] | None = (
|
||||
Field(default=None, description="Source of captions")
|
||||
)
|
||||
transcript_segments: list[TranscriptSegment] = Field(
|
||||
default_factory=list, description="Transcript segments with timing"
|
||||
)
|
||||
transcript_raw: str | None = Field(
|
||||
default=None, description="Full raw transcript text"
|
||||
)
|
||||
analysis: MeetKevinAnalysis | None = Field(
|
||||
default=None, description="LLM analysis if status=analyzed"
|
||||
)
|
||||
|
||||
model_config = {"from_attributes": True}
|
||||
|
||||
|
||||
class StockMention(BaseModel):
|
||||
"""Single mention of a stock ticker in a video."""
|
||||
|
||||
video_id: int = Field(..., description="Database ID of video")
|
||||
youtube_video_id: str = Field(..., description="YouTube video ID for linking")
|
||||
published_at: datetime = Field(..., description="Video publication date")
|
||||
action: Literal["buy", "sell", "hold", "watch", "avoid"] = Field(
|
||||
..., description="Recommendation action"
|
||||
)
|
||||
conviction: float = Field(
|
||||
..., ge=0.0, le=1.0, description="Confidence in recommendation"
|
||||
)
|
||||
time_horizon: Literal[
|
||||
"intraday", "days", "weeks", "months", "long_term", "unspecified"
|
||||
] = Field(..., description="Time horizon for recommendation")
|
||||
rationale_quote: str = Field(
|
||||
..., description="Quote or summary of rationale"
|
||||
)
|
||||
video_timestamp_seconds: int | None = Field(
|
||||
default=None, description="Deep-link timestamp"
|
||||
)
|
||||
|
||||
model_config = {"from_attributes": True}
|
||||
|
||||
|
||||
class StockSummary(BaseModel):
|
||||
"""Summary of a stock across all mentions."""
|
||||
|
||||
symbol: str = Field(..., description="Stock ticker")
|
||||
mention_count: int = Field(..., description="Total mention count")
|
||||
last_mentioned_at: datetime = Field(
|
||||
..., description="Timestamp of last mention"
|
||||
)
|
||||
latest_action: Literal["buy", "sell", "hold", "watch", "avoid"] = Field(
|
||||
..., description="Most recent recommendation"
|
||||
)
|
||||
avg_conviction: float = Field(
|
||||
..., ge=0.0, le=1.0, description="Average conviction across mentions"
|
||||
)
|
||||
bullish_count: int = Field(
|
||||
default=0, description="Buy + watch count"
|
||||
)
|
||||
bearish_count: int = Field(
|
||||
default=0, description="Sell + avoid count"
|
||||
)
|
||||
neutral_count: int = Field(
|
||||
default=0, description="Hold count"
|
||||
)
|
||||
|
||||
model_config = {"from_attributes": True}
|
||||
|
||||
|
||||
class TimelineBucket(BaseModel):
|
||||
"""Single time bucket in a sentiment timeline."""
|
||||
|
||||
bucket_date: str = Field(..., description="Date string (YYYY-MM-DD or YYYY-Www)")
|
||||
action: Literal["buy", "sell", "hold", "watch", "avoid"] | None = Field(
|
||||
default=None, description="Most common action in bucket"
|
||||
)
|
||||
avg_conviction: float = Field(
|
||||
default=0.0, ge=0.0, le=1.0, description="Average conviction"
|
||||
)
|
||||
mention_count: int = Field(
|
||||
default=0, description="Count of mentions in bucket"
|
||||
)
|
||||
|
||||
model_config = {"from_attributes": True}
|
||||
|
||||
|
||||
class StockTimeline(BaseModel):
|
||||
"""Timeline of mentions for a single stock ticker."""
|
||||
|
||||
symbol: str = Field(..., description="Stock ticker")
|
||||
buckets: list[TimelineBucket] = Field(
|
||||
default_factory=list, description="Time-bucketed data"
|
||||
)
|
||||
mentions: list[StockMention] = Field(
|
||||
default_factory=list, description="Chronological mentions (newest first)"
|
||||
)
|
||||
|
||||
model_config = {"from_attributes": True}
|
||||
|
||||
|
||||
class PipelineHealth(BaseModel):
|
||||
"""Health status of the Meet Kevin pipeline."""
|
||||
|
||||
last_poll_at: datetime | None = Field(
|
||||
default=None, description="Timestamp of last RSS poll"
|
||||
)
|
||||
last_poll_age_seconds: int | None = Field(
|
||||
default=None, description="Seconds since last poll"
|
||||
)
|
||||
videos_discovered_today: int = Field(
|
||||
default=0, description="Videos found in last 24h"
|
||||
)
|
||||
videos_captioned_today: int = Field(
|
||||
default=0, description="Videos with captions processed"
|
||||
)
|
||||
videos_analyzed_today: int = Field(
|
||||
default=0, description="Videos analyzed with LLM"
|
||||
)
|
||||
llm_cost_today_usd: float = Field(
|
||||
default=0.0, description="Total LLM cost today"
|
||||
)
|
||||
daily_cost_cap_usd: float = Field(
|
||||
default=5.0, description="Daily cost limit"
|
||||
)
|
||||
cost_capped: bool = Field(
|
||||
default=False, description="True if cost cap hit today"
|
||||
)
|
||||
pipeline_status: Literal["ok", "warning", "error"] = Field(
|
||||
default="ok", description="Overall health status"
|
||||
)
|
||||
status_message: str | None = Field(
|
||||
default=None, description="Optional status details"
|
||||
)
|
||||
|
||||
model_config = {"from_attributes": True}
|
||||
Loading…
Add table
Add a link
Reference in a new issue