feat(scripts): ad-hoc Meet Kevin video analyzer CLI

Given a YouTube video ID or URL, runs the same caption-extraction + LLM-analysis pipeline the watcher uses in prod, then prints the extracted tickers + actions sorted by action priority and conviction descending. No DB writes, no Redis publish — strictly observational. Two entry points: - scripts/analyze_kevin_video.py — pure Python; needs yt-dlp + Anthropic OAuth token in env. Local laptop yt-dlp tends to hit "Sign in to confirm you're not a bot" rate-limits; running inside a cluster pod avoids this. - scripts/kevin-analyze.sh — wrapper that finds the running trading-bot-workers pod and execs the Python script in the meet-kevin-watcher container. Easiest invocation. Example: $ ./scripts/kevin-analyze.sh poUJIZRmFew === Meet Kevin analysis — poUJIZRmFew === Market outlook: bullish TICKERS (12): SYMBOL ACTION CONV HORIZON RATIONALE APPF buy 85.0% long_term Apploven's looking fantastic ... SOX buy 80.0% months Semiconductor ETF momentum ... ...
2026-05-27 12:05:59 +00:00 · 2026-05-27 12:05:59 +00:00 · a93cb06898
commit a93cb06898
parent 065b634b99
2 changed files with 203 additions and 0 deletions
--- a/scripts/analyze_kevin_video.py
+++ b/scripts/analyze_kevin_video.py
@ -0,0 +1,176 @@
+"""Ad-hoc analysis of a single Meet Kevin video.
+
+Usage:
+    python scripts/analyze_kevin_video.py <video-id-or-url>
+
+Pulls captions via yt-dlp, runs the same Claude prompt the watcher uses
+in production, and prints the resulting tickers + actions. No DB writes,
+no Redis publish — strictly observational.
+
+Env vars (or pass via flags):
+    TRADING_ANTHROPIC_OAUTH_TOKEN   — required (matches what the pod uses)
+    TRADING_MEET_KEVIN_LLM_MODEL    — default: claude-haiku-4-5-20251001
+    TRADING_MEET_KEVIN_PROMPT_VERSION — default: v1
+"""
+
+from __future__ import annotations
+
+import argparse
+import asyncio
+import json
+import os
+import re
+import subprocess
+import sys
+import tempfile
+from datetime import datetime, timezone
+from decimal import Decimal
+
+from anthropic import AsyncAnthropic
+
+# repo root on sys.path so `services.*` and `shared.*` import cleanly
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from services.meet_kevin_watcher.caption_extractor import extract_captions
+from services.meet_kevin_watcher.llm_analyzer import LlmAnalyzer
+
+
+_YOUTUBE_ID = re.compile(r"^[A-Za-z0-9_-]{11}$")
+
+
+def parse_video_id(s: str) -> str:
+    """Accept a bare 11-char YouTube ID or any URL containing v=ID."""
+    if _YOUTUBE_ID.match(s):
+        return s
+    m = re.search(r"(?:v=|youtu\.be/|/shorts/)([A-Za-z0-9_-]{11})", s)
+    if m:
+        return m.group(1)
+    raise SystemExit(f"could not parse YouTube video ID from: {s!r}")
+
+
+def fetch_video_metadata(video_id: str) -> dict:
+    """Pull title/description/upload_date via yt-dlp --dump-json (no download)."""
+    url = f"https://www.youtube.com/watch?v={video_id}"
+    try:
+        out = subprocess.check_output(
+            ["yt-dlp", "--skip-download", "--dump-json", url],
+            stderr=subprocess.DEVNULL,
+            timeout=30,
+        )
+    except (subprocess.CalledProcessError, FileNotFoundError, subprocess.TimeoutExpired):
+        return {}
+    try:
+        return json.loads(out.decode("utf-8"))
+    except json.JSONDecodeError:
+        return {}
+
+
+def fmt_pct(d: Decimal | float) -> str:
+    return f"{float(d) * 100:5.1f}%"
+
+
+def print_analysis(video_id: str, captions, result) -> None:
+    a = result.analysis
+    print(f"\n=== Meet Kevin analysis — {video_id} ===")
+    print(f"  Model:        {result.raw_response.get('tool_name', '?')}")
+    print(
+        f"  Tokens:       in={result.prompt_tokens} out={result.completion_tokens}"
+        f"   Cost: ${float(result.cost_usd):.4f}"
+    )
+    print(f"  Transcript:   {len(captions.raw_text)} chars, "
+          f"{len(captions.segments)} segments")
+    print(f"\n  Market outlook: {a.market_outlook_direction.value}")
+    print(f"  Reasoning:      {a.market_outlook_reasoning[:200]}")
+    if a.macro_themes:
+        print(f"  Macro themes:   {', '.join(a.macro_themes)}")
+    if a.key_risks:
+        print(f"  Key risks:      {', '.join(a.key_risks)}")
+    print(f"\n  Summary:")
+    for line in a.summary.split("\n"):
+        print(f"    {line}")
+
+    if not a.tickers:
+        print("\n  TICKERS: (none extracted)")
+        return
+
+    print(f"\n  TICKERS ({len(a.tickers)}):")
+    print(f"    {'SYMBOL':<8} {'ACTION':<7} {'CONV':>6} {'HORIZON':<11} {'RATIONALE'}")
+    print(f"    {'-'*8} {'-'*7} {'-'*6} {'-'*11} {'-'*60}")
+    # sort by action priority (buy/sell first), then conviction desc
+    action_rank = {"buy": 0, "sell": 1, "hold": 2, "watch": 3, "avoid": 4}
+    sorted_t = sorted(
+        a.tickers,
+        key=lambda t: (
+            action_rank.get(t.action.value, 99),
+            -float(t.conviction),
+        ),
+    )
+    for t in sorted_t:
+        rationale = (t.rationale_quote or "").replace("\n", " ")[:60]
+        print(
+            f"    {t.symbol:<8} "
+            f"{t.action.value:<7} "
+            f"{fmt_pct(t.conviction):>6} "
+            f"{t.time_horizon.value:<11} "
+            f"{rationale}"
+        )
+
+
+async def main() -> None:
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument("video", help="YouTube video ID or URL")
+    parser.add_argument(
+        "--model",
+        default=os.environ.get(
+            "TRADING_MEET_KEVIN_LLM_MODEL", "claude-haiku-4-5-20251001"
+        ),
+    )
+    parser.add_argument(
+        "--prompt-version",
+        default=os.environ.get("TRADING_MEET_KEVIN_PROMPT_VERSION", "v1"),
+    )
+    args = parser.parse_args()
+
+    token = os.environ.get("TRADING_ANTHROPIC_OAUTH_TOKEN")
+    if not token:
+        raise SystemExit("TRADING_ANTHROPIC_OAUTH_TOKEN is required")
+
+    video_id = parse_video_id(args.video)
+    print(f"Analyzing video {video_id} with model {args.model}...")
+
+    metadata = fetch_video_metadata(video_id)
+    title = metadata.get("title") or video_id
+    description = metadata.get("description") or ""
+    upload_date = metadata.get("upload_date")  # YYYYMMDD
+    if upload_date:
+        published_at = datetime.strptime(upload_date, "%Y%m%d").replace(
+            tzinfo=timezone.utc
+        )
+    else:
+        published_at = datetime.now(timezone.utc)
+
+    with tempfile.TemporaryDirectory(prefix=f"kevin-{video_id}-") as workdir:
+        captions = await extract_captions(video_id, workdir)
+        if captions is None or not captions.raw_text.strip():
+            raise SystemExit(f"no captions available for {video_id}")
+
+        client = AsyncAnthropic(auth_token=token)
+        analyzer = LlmAnalyzer(
+            client=client,
+            model=args.model,
+            prompt_version=args.prompt_version,
+        )
+
+        result = await analyzer.analyze(
+            title=title,
+            description=description,
+            published_at=published_at,
+            transcript_text=captions.raw_text,
+            transcript_segments=[dict(s) for s in captions.segments],
+        )
+
+    print_analysis(video_id, captions, result)
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/scripts/kevin-analyze.sh
+++ b/scripts/kevin-analyze.sh
@ -0,0 +1,27 @@
+#!/usr/bin/env bash
+# Ad-hoc Meet Kevin video analyzer wrapper.
+#
+# Usage:
+#   ./scripts/kevin-analyze.sh <video-id-or-url>
+#
+# Picks the running meet-kevin-watcher container (which already has
+# yt-dlp + ffmpeg + the Anthropic token + the right Python env) and
+# runs scripts/analyze_kevin_video.py inside it.
+
+set -euo pipefail
+
+if [[ $# -ne 1 ]]; then
+    echo "usage: $0 <youtube-video-id-or-url>" >&2
+    exit 1
+fi
+
+POD=$(kubectl -n trading-bot get pod -l app=trading-bot-workers \
+    -o jsonpath='{.items[0].metadata.name}')
+
+if [[ -z "$POD" ]]; then
+    echo "no trading-bot-workers pod found" >&2
+    exit 1
+fi
+
+exec kubectl -n trading-bot exec "$POD" -c meet-kevin-watcher -- \
+    python -m scripts.analyze_kevin_video "$1"