trading/shared/schemas/meet_kevin.py
Viktor Barzin 41ab95ec4d
Some checks failed
ci/woodpecker/push/woodpecker Pipeline was canceled
feat(meet-kevin): prompt v2 — forward-looking action + expected_move field
User reported that the old prompt could emit 'sell' on backward-looking
capitulation ('Kevin sold after a 20% drop') — exactly the false signal
to avoid. v2 reframes every per-ticker field as forward-looking and
adds an explicit expected_move enum for the trading bot to weight.

Changes:
- New ExpectedMove enum (up_strong/up_mild/sideways/down_mild/
  down_strong/unknown) in shared/schemas + shared/models, with
  matching kevin_expected_move Postgres enum + column on
  kevin_stock_mentions (migration e5f6a7b8c9d0). NOT NULL with
  server_default 'unknown' so existing rows backfill cleanly.
- SYSTEM_PROMPT rewritten: action semantics now require a FORWARD
  view; reactive sells get downgraded to 'watch' or skipped; the
  rationale_quote must contain forward reasoning. Quality
  checklist updated.
- _ANALYSIS_TOOL JSON schema gains expected_move (required).
- prompt_version v1 → v2 in config + infra + ad-hoc CLI default.
- pipeline.py persists ticker.expected_move into the new column.

Migration safety: the column is NOT NULL DEFAULT 'unknown' so 96
existing mentions auto-fill with 'unknown' (no forward call known
for backward analyses) without breaking any reads.

Cost to backfill the 27 existing analyses with v2 prompt: ~$3 LLM
spend. A follow-up reanalyze script will replay them after this
ships.
2026-05-28 21:40:07 +00:00

327 lines
11 KiB
Python

"""Meet Kevin pipeline Pydantic schemas.
Includes LLM tool-input schemas (MeetKevinTickerMention, MeetKevinAnalysis)
and API response shapes (TranscriptSegment, VideoSummary, VideoDetail, StockSummary,
StockMention, StockTimeline, TimelineBucket, PipelineHealth).
"""
from datetime import datetime
from enum import Enum
from typing import Literal
from pydantic import BaseModel, Field, field_validator
# =============================================================================
# Enums
# =============================================================================
class TickerAction(str, Enum):
"""Action recommendation for a stock ticker."""
BUY = "buy"
SELL = "sell"
HOLD = "hold"
WATCH = "watch"
AVOID = "avoid"
class TimeHorizon(str, Enum):
"""Time horizon for an investment recommendation."""
INTRADAY = "intraday"
DAYS = "days"
WEEKS = "weeks"
MONTHS = "months"
LONG_TERM = "long_term"
UNSPECIFIED = "unspecified"
class MarketOutlook(str, Enum):
"""Overall market sentiment direction."""
BULLISH = "bullish"
NEUTRAL = "neutral"
BEARISH = "bearish"
MIXED = "mixed"
class ExpectedMove(str, Enum):
"""Forward-looking directional view on a ticker over its time_horizon.
Independent of `action` — action records what Kevin recommends doing,
expected_move records where the LLM thinks the stock is going next.
This is the field the bridge should weight when deciding whether to
paper-trade.
"""
UP_STRONG = "up_strong" # >= +5% over the horizon
UP_MILD = "up_mild" # +1% to +5%
SIDEWAYS = "sideways" # -1% to +1%
DOWN_MILD = "down_mild" # -5% to -1%
DOWN_STRONG = "down_strong" # <= -5%
UNKNOWN = "unknown" # Kevin made no directional call
class VideoStatus(str, Enum):
"""Status of a video in the processing pipeline."""
DISCOVERED = "discovered"
CAPTIONED = "captioned"
ANALYZED = "analyzed"
FAILED = "failed"
SKIPPED = "skipped"
class TranscriptSource(str, Enum):
"""Source of transcript captions."""
CAPTIONS_MANUAL = "captions_manual"
CAPTIONS_AUTO = "captions_auto"
NONE = "none"
# =============================================================================
# LLM Tool-Input Schemas
# =============================================================================
class MeetKevinTickerMention(BaseModel):
"""Single stock ticker mention extracted by Claude from a video transcript.
Used as tool-input for the LLM analyzer and persisted as kevin_stock_mentions.
"""
symbol: str = Field(
..., min_length=1, max_length=10, description="Stock ticker symbol (A-Z, 1-6 chars, auto-uppercased)"
)
action: TickerAction = Field(
..., description="Recommendation action"
)
conviction: float = Field(
..., ge=0.0, le=1.0, description="Confidence in recommendation (0.0-1.0)"
)
time_horizon: TimeHorizon = Field(..., description="Time horizon for the recommendation")
rationale_quote: str = Field(
..., description="Short verbatim or paraphrased quote from video"
)
video_timestamp_seconds: int | None = Field(
default=None, description="Timestamp for deep-link target"
)
expected_move: ExpectedMove = Field(
default=ExpectedMove.UNKNOWN,
description="Forward-looking directional view over time_horizon",
)
@field_validator("symbol")
@classmethod
def uppercase_symbol(cls, v: str) -> str:
"""Auto-uppercase the ticker symbol."""
return v.upper()
model_config = {"from_attributes": True}
class MeetKevinAnalysis(BaseModel):
"""Complete analysis output from Claude for a single video transcript.
Used as tool-input for the LLM analyzer and persisted as kevin_analyses.
"""
market_outlook_direction: MarketOutlook = (
Field(..., description="Overall market sentiment direction")
)
market_outlook_reasoning: str = Field(
..., description="Explanation of market outlook"
)
macro_themes: list[str] = Field(
default_factory=list, description="Macro economic themes discussed"
)
key_risks: list[str] = Field(
default_factory=list, description="Key risks identified"
)
summary: str = Field(..., description="~200-word summary of analysis")
tickers: list[MeetKevinTickerMention] = Field(
default_factory=list, description="List of ticker mentions"
)
model_config = {"from_attributes": True}
# =============================================================================
# API Response Schemas
# =============================================================================
class TranscriptSegment(BaseModel):
start_seconds: float = Field(..., description="Segment start time in seconds")
end_seconds: float = Field(..., description="Segment end time in seconds")
text: str = Field(..., description="Segment text content")
model_config = {"from_attributes": True}
class VideoSummary(BaseModel):
"""Summary view of a video in the feed."""
id: int = Field(..., description="Database ID")
youtube_video_id: str = Field(..., description="YouTube video ID")
title: str = Field(..., description="Video title")
published_at: datetime = Field(..., description="Publication timestamp")
thumbnail_url: str = Field(..., description="Thumbnail image URL")
status: VideoStatus = (
Field(..., description="Processing status")
)
failure_reason: str | None = Field(
default=None, description="Failure reason if status=failed"
)
ticker_count: int = Field(
default=0, description="Number of ticker mentions analyzed"
)
model_config = {"from_attributes": True}
class VideoDetail(BaseModel):
"""Full details of a single video including analysis."""
id: int = Field(..., description="Database ID")
youtube_video_id: str = Field(..., description="YouTube video ID")
title: str = Field(..., description="Video title")
description: str | None = Field(default=None, description="Video description")
published_at: datetime = Field(..., description="Publication timestamp")
duration_seconds: int | None = Field(default=None, description="Video duration")
thumbnail_url: str = Field(..., description="Thumbnail image URL")
status: VideoStatus = (
Field(..., description="Processing status")
)
failure_reason: str | None = Field(
default=None, description="Failure reason if status=failed"
)
transcript_source: TranscriptSource | None = (
Field(default=None, description="Source of captions")
)
transcript_segments: list[TranscriptSegment] = Field(
default_factory=list, description="Transcript segments with timing"
)
transcript_raw: str | None = Field(
default=None, description="Full raw transcript text"
)
analysis: MeetKevinAnalysis | None = Field(
default=None, description="LLM analysis if status=analyzed"
)
model_config = {"from_attributes": True}
class StockMention(BaseModel):
"""Single mention of a stock ticker in a video."""
video_id: int = Field(..., description="Database ID of video")
youtube_video_id: str = Field(..., description="YouTube video ID for linking")
published_at: datetime = Field(..., description="Video publication date")
action: TickerAction = Field(
..., description="Recommendation action"
)
conviction: float = Field(
..., ge=0.0, le=1.0, description="Confidence in recommendation"
)
time_horizon: TimeHorizon = Field(..., description="Time horizon for recommendation")
rationale_quote: str = Field(
..., description="Quote or summary of rationale"
)
video_timestamp_seconds: int | None = Field(
default=None, description="Deep-link timestamp"
)
model_config = {"from_attributes": True}
class StockSummary(BaseModel):
symbol: str = Field(..., description="Stock ticker")
mention_count: int = Field(..., description="Total mention count")
last_mentioned_at: datetime = Field(
..., description="Timestamp of last mention"
)
latest_action: TickerAction = Field(
..., description="Most recent recommendation"
)
avg_conviction: float = Field(
..., ge=0.0, le=1.0, description="Average conviction across mentions"
)
bullish_count: int = Field(
default=0, description="Buy + watch count"
)
bearish_count: int = Field(
default=0, description="Sell + avoid count"
)
neutral_count: int = Field(
default=0, description="Hold count"
)
model_config = {"from_attributes": True}
class TimelineBucket(BaseModel):
bucket_date: str = Field(..., description="Date string (YYYY-MM-DD or YYYY-Www)")
action: TickerAction | None = Field(
default=None, description="Most common action in bucket"
)
avg_conviction: float = Field(
default=0.0, ge=0.0, le=1.0, description="Average conviction"
)
mention_count: int = Field(
default=0, description="Count of mentions in bucket"
)
model_config = {"from_attributes": True}
class StockTimeline(BaseModel):
symbol: str = Field(..., description="Stock ticker")
buckets: list[TimelineBucket] = Field(
default_factory=list, description="Time-bucketed data"
)
mentions: list[StockMention] = Field(
default_factory=list, description="Chronological mentions (newest first)"
)
model_config = {"from_attributes": True}
class PipelineHealth(BaseModel):
"""Health status of the Meet Kevin pipeline."""
last_poll_at: datetime | None = Field(
default=None, description="Timestamp of last RSS poll"
)
last_poll_age_seconds: int | None = Field(
default=None, description="Seconds since last poll"
)
videos_discovered_today: int = Field(
default=0, description="Videos found in last 24h"
)
videos_captioned_today: int = Field(
default=0, description="Videos with captions processed"
)
videos_analyzed_today: int = Field(
default=0, description="Videos analyzed with LLM"
)
llm_cost_today_usd: float = Field(
default=0.0, description="Total LLM cost today"
)
daily_cost_cap_usd: float = Field(
default=5.0, description="Daily cost limit"
)
cost_capped: bool = Field(
default=False, description="True if cost cap hit today"
)
pipeline_status: Literal["ok", "warning", "error"] = Field(
default="ok", description="Overall health status"
)
status_message: str | None = Field(
default=None, description="Optional status details"
)
model_config = {"from_attributes": True}