diff --git a/shared/models/__init__.py b/shared/models/__init__.py index dda1c6a..5a3cbfe 100644 --- a/shared/models/__init__.py +++ b/shared/models/__init__.py @@ -16,6 +16,13 @@ from shared.models.learning import LearningAdjustment, TradeOutcome from shared.models.auth import User, UserCredential from shared.models.timeseries import MarketData, PortfolioSnapshot, StrategyMetric from shared.models.fundamentals import Fundamentals +from shared.models.meet_kevin import ( + KevinChannel, + KevinVideo, + KevinTranscript, + KevinAnalysis, + KevinStockMention, +) __all__ = [ "Base", @@ -44,4 +51,10 @@ __all__ = [ "StrategyMetric", # Fundamentals "Fundamentals", + # Meet Kevin + "KevinChannel", + "KevinVideo", + "KevinTranscript", + "KevinAnalysis", + "KevinStockMention", ] diff --git a/shared/models/meet_kevin.py b/shared/models/meet_kevin.py new file mode 100644 index 0000000..cbbc646 --- /dev/null +++ b/shared/models/meet_kevin.py @@ -0,0 +1,248 @@ +"""Meet Kevin YouTube channel models: Channel, Video, Transcript, Analysis, StockMention.""" + +import enum +from datetime import datetime +from decimal import Decimal + +from sqlalchemy import ( + BigInteger, + Boolean, + DateTime, + Enum as SAEnum, + ForeignKey, + Integer, + Numeric, + String, + Text, + Index, +) +from sqlalchemy.dialects.postgresql import JSONB +from sqlalchemy.orm import Mapped, mapped_column, relationship + +from shared.models.base import Base, TimestampMixin + + +# --------------------------------------------------------------------------- +# Enums +# --------------------------------------------------------------------------- + + +class KevinVideoStatus(str, enum.Enum): + """Video processing status in the pipeline.""" + + DISCOVERED = "discovered" + CAPTIONED = "captioned" + ANALYZED = "analyzed" + FAILED = "failed" + SKIPPED = "skipped" + + +class KevinTranscriptSource(str, enum.Enum): + """Source of transcript captions.""" + + CAPTIONS_MANUAL = "captions_manual" + CAPTIONS_AUTO = "captions_auto" + NONE = "none" + + +class KevinMarketOutlook(str, enum.Enum): + """Kevin's overall market direction sentiment.""" + + BULLISH = "bullish" + NEUTRAL = "neutral" + BEARISH = "bearish" + MIXED = "mixed" + + +class KevinTickerAction(str, enum.Enum): + """Recommended action on a ticker.""" + + BUY = "buy" + SELL = "sell" + HOLD = "hold" + WATCH = "watch" + AVOID = "avoid" + + +class KevinTimeHorizon(str, enum.Enum): + """Time horizon for a recommendation.""" + + INTRADAY = "intraday" + DAYS = "days" + WEEKS = "weeks" + MONTHS = "months" + LONG_TERM = "long_term" + UNSPECIFIED = "unspecified" + + +# --------------------------------------------------------------------------- +# Models +# --------------------------------------------------------------------------- + + +class KevinChannel(TimestampMixin, Base): + """YouTube channel configuration and polling metadata.""" + + __tablename__ = "kevin_channels" + + id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True) + youtube_channel_id: Mapped[str] = mapped_column( + String(255), unique=True, nullable=False, index=True + ) + title: Mapped[str] = mapped_column(String(255), nullable=False) + poll_enabled: Mapped[bool] = mapped_column(Boolean, default=True, nullable=False) + poll_interval_seconds: Mapped[int] = mapped_column(Integer, default=10800) + daily_cost_cap_usd: Mapped[Decimal] = mapped_column( + Numeric(8, 2), default=Decimal("5.00") + ) + last_polled_at: Mapped[datetime | None] = mapped_column( + DateTime(timezone=True), nullable=True + ) + + # Relationships + videos: Mapped[list["KevinVideo"]] = relationship( + back_populates="channel", cascade="all, delete-orphan" + ) + + +class KevinVideo(TimestampMixin, Base): + """YouTube video metadata and processing status.""" + + __tablename__ = "kevin_videos" + + id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True) + channel_id: Mapped[int] = mapped_column( + BigInteger, ForeignKey("kevin_channels.id"), nullable=False, index=True + ) + youtube_video_id: Mapped[str] = mapped_column( + String(255), unique=True, nullable=False, index=True + ) + title: Mapped[str] = mapped_column(String(500), nullable=False) + description: Mapped[str | None] = mapped_column(Text, nullable=True) + published_at: Mapped[datetime | None] = mapped_column( + DateTime(timezone=True), nullable=True, index=True + ) + duration_seconds: Mapped[int | None] = mapped_column(Integer, nullable=True) + thumbnail_url: Mapped[str | None] = mapped_column(Text, nullable=True) + status: Mapped[KevinVideoStatus] = mapped_column( + SAEnum(KevinVideoStatus, name="kevin_video_status"), + nullable=False, + default=KevinVideoStatus.DISCOVERED, + index=True, + ) + failure_reason: Mapped[str | None] = mapped_column(String(500), nullable=True) + retry_count: Mapped[int] = mapped_column(Integer, default=0) + processed_at: Mapped[datetime | None] = mapped_column( + DateTime(timezone=True), nullable=True + ) + + # Relationships + channel: Mapped[KevinChannel] = relationship(back_populates="videos") + transcript: Mapped["KevinTranscript | None"] = relationship( + back_populates="video", uselist=False, cascade="all, delete-orphan" + ) + analyses: Mapped[list["KevinAnalysis"]] = relationship( + back_populates="video", cascade="all, delete-orphan" + ) + mentions: Mapped[list["KevinStockMention"]] = relationship( + back_populates="video", cascade="all, delete-orphan" + ) + + +class KevinTranscript(Base): + """Extracted transcript from video captions.""" + + __tablename__ = "kevin_transcripts" + + id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True) + video_id: Mapped[int] = mapped_column( + BigInteger, ForeignKey("kevin_videos.id"), unique=True, nullable=False + ) + source: Mapped[KevinTranscriptSource] = mapped_column( + SAEnum(KevinTranscriptSource, name="kevin_transcript_source"), nullable=False + ) + language: Mapped[str] = mapped_column(String(8), nullable=False) + raw_text: Mapped[str] = mapped_column(Text, nullable=False) + segments_json: Mapped[dict | None] = mapped_column(JSONB, nullable=True) + word_count: Mapped[int] = mapped_column(Integer, nullable=False) + created_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), server_default="now()", nullable=False + ) + + # Relationships + video: Mapped[KevinVideo] = relationship(back_populates="transcript") + + +class KevinAnalysis(Base): + """LLM analysis result for a video.""" + + __tablename__ = "kevin_analyses" + + id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True) + video_id: Mapped[int] = mapped_column( + BigInteger, ForeignKey("kevin_videos.id"), nullable=False, index=True + ) + model: Mapped[str] = mapped_column(String(100), nullable=False) + prompt_version: Mapped[str] = mapped_column(String(50), nullable=False) + market_outlook_direction: Mapped[KevinMarketOutlook] = mapped_column( + SAEnum(KevinMarketOutlook, name="kevin_market_outlook"), nullable=False + ) + market_outlook_reasoning: Mapped[str] = mapped_column(Text, nullable=False) + macro_themes_json: Mapped[dict | None] = mapped_column(JSONB, nullable=True) + key_risks_json: Mapped[dict | None] = mapped_column(JSONB, nullable=True) + summary: Mapped[str] = mapped_column(Text, nullable=False) + raw_response_json: Mapped[dict | None] = mapped_column(JSONB, nullable=True) + prompt_tokens: Mapped[int] = mapped_column(Integer, nullable=False) + completion_tokens: Mapped[int] = mapped_column(Integer, nullable=False) + cost_usd: Mapped[Decimal] = mapped_column(Numeric(10, 4), nullable=False) + created_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), server_default="now()", nullable=False + ) + + # Relationships + video: Mapped[KevinVideo] = relationship(back_populates="analyses") + mentions: Mapped[list["KevinStockMention"]] = relationship( + back_populates="analysis", cascade="all, delete-orphan" + ) + + +class KevinStockMention(Base): + """Per-ticker recommendation extracted from analysis.""" + + __tablename__ = "kevin_stock_mentions" + + id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True) + video_id: Mapped[int] = mapped_column( + BigInteger, ForeignKey("kevin_videos.id"), nullable=False, index=True + ) + analysis_id: Mapped[int] = mapped_column( + BigInteger, ForeignKey("kevin_analyses.id"), nullable=False + ) + symbol: Mapped[str] = mapped_column( + String(16), nullable=False, index=True + ) + action: Mapped[KevinTickerAction] = mapped_column( + SAEnum(KevinTickerAction, name="kevin_ticker_action"), nullable=False + ) + conviction: Mapped[Decimal] = mapped_column(Numeric(4, 3), nullable=False) + time_horizon: Mapped[KevinTimeHorizon] = mapped_column( + SAEnum(KevinTimeHorizon, name="kevin_time_horizon"), nullable=False + ) + rationale_quote: Mapped[str] = mapped_column(Text, nullable=False) + video_timestamp_seconds: Mapped[int | None] = mapped_column( + Integer, nullable=True + ) + created_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), server_default="now()", nullable=False + ) + + # Relationships + video: Mapped[KevinVideo] = relationship(back_populates="mentions") + analysis: Mapped[KevinAnalysis] = relationship(back_populates="mentions") + + +# Create composite indexes as per spec +__table_args__ = ( + Index("ix_kevin_stock_mentions_symbol_created", "symbol", "created_at"), + Index("ix_kevin_videos_published", "published_at"), +) diff --git a/tests/test_models.py b/tests/test_models.py index 97e8fb4..bac7b41 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -351,6 +351,11 @@ class TestMetadata: "portfolio_snapshots", "strategy_metrics", "fundamentals", + "kevin_channels", + "kevin_videos", + "kevin_transcripts", + "kevin_analyses", + "kevin_stock_mentions", } assert expected.issubset(table_names) @@ -360,6 +365,37 @@ class TestMetadata: assert "updated_at" in Strategy.__table__.columns +class TestMeetKevinModels: + def test_meet_kevin_models_importable(self) -> None: + """Test that Meet Kevin models are importable and have correct table names and enums.""" + from shared.models import ( + KevinChannel, + KevinVideo, + KevinTranscript, + KevinAnalysis, + KevinStockMention, + ) + + # Check table names + assert KevinChannel.__tablename__ == "kevin_channels" + assert KevinVideo.__tablename__ == "kevin_videos" + assert KevinTranscript.__tablename__ == "kevin_transcripts" + assert KevinAnalysis.__tablename__ == "kevin_analyses" + assert KevinStockMention.__tablename__ == "kevin_stock_mentions" + + # Check that KevinVideo.status is an Enum containing "DISCOVERED" + status_col = KevinVideo.__table__.c.status + assert status_col is not None + assert "DISCOVERED" in status_col.type.enums + + # Check relationships exist + assert hasattr(KevinChannel, "videos") + assert hasattr(KevinVideo, "channel") + assert hasattr(KevinVideo, "transcript") + assert hasattr(KevinVideo, "analyses") + assert hasattr(KevinVideo, "mentions") + + class TestDbFactory: def test_create_db_returns_engine_and_session(self) -> None: config = BaseConfig()