feat(scripts): ad-hoc Meet Kevin video analyzer CLI
Some checks failed
ci/woodpecker/push/woodpecker Pipeline was canceled

Given a YouTube video ID or URL, runs the same caption-extraction +
LLM-analysis pipeline the watcher uses in prod, then prints the
extracted tickers + actions sorted by action priority and conviction
descending. No DB writes, no Redis publish — strictly observational.

Two entry points:
  - scripts/analyze_kevin_video.py — pure Python; needs yt-dlp +
    Anthropic OAuth token in env. Local laptop yt-dlp tends to hit
    "Sign in to confirm you're not a bot" rate-limits; running inside
    a cluster pod avoids this.
  - scripts/kevin-analyze.sh — wrapper that finds the running
    trading-bot-workers pod and execs the Python script in the
    meet-kevin-watcher container. Easiest invocation.

Example:
  $ ./scripts/kevin-analyze.sh poUJIZRmFew
  === Meet Kevin analysis — poUJIZRmFew ===
    Market outlook: bullish
    TICKERS (12):
      SYMBOL   ACTION    CONV HORIZON     RATIONALE
      APPF     buy      85.0% long_term   Apploven's looking fantastic ...
      SOX      buy      80.0% months      Semiconductor ETF momentum ...
      ...
This commit is contained in:
Viktor Barzin 2026-05-27 12:05:59 +00:00
parent 065b634b99
commit a93cb06898
2 changed files with 203 additions and 0 deletions

View file

@ -0,0 +1,176 @@
"""Ad-hoc analysis of a single Meet Kevin video.
Usage:
python scripts/analyze_kevin_video.py <video-id-or-url>
Pulls captions via yt-dlp, runs the same Claude prompt the watcher uses
in production, and prints the resulting tickers + actions. No DB writes,
no Redis publish strictly observational.
Env vars (or pass via flags):
TRADING_ANTHROPIC_OAUTH_TOKEN required (matches what the pod uses)
TRADING_MEET_KEVIN_LLM_MODEL default: claude-haiku-4-5-20251001
TRADING_MEET_KEVIN_PROMPT_VERSION default: v1
"""
from __future__ import annotations
import argparse
import asyncio
import json
import os
import re
import subprocess
import sys
import tempfile
from datetime import datetime, timezone
from decimal import Decimal
from anthropic import AsyncAnthropic
# repo root on sys.path so `services.*` and `shared.*` import cleanly
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from services.meet_kevin_watcher.caption_extractor import extract_captions
from services.meet_kevin_watcher.llm_analyzer import LlmAnalyzer
_YOUTUBE_ID = re.compile(r"^[A-Za-z0-9_-]{11}$")
def parse_video_id(s: str) -> str:
"""Accept a bare 11-char YouTube ID or any URL containing v=ID."""
if _YOUTUBE_ID.match(s):
return s
m = re.search(r"(?:v=|youtu\.be/|/shorts/)([A-Za-z0-9_-]{11})", s)
if m:
return m.group(1)
raise SystemExit(f"could not parse YouTube video ID from: {s!r}")
def fetch_video_metadata(video_id: str) -> dict:
"""Pull title/description/upload_date via yt-dlp --dump-json (no download)."""
url = f"https://www.youtube.com/watch?v={video_id}"
try:
out = subprocess.check_output(
["yt-dlp", "--skip-download", "--dump-json", url],
stderr=subprocess.DEVNULL,
timeout=30,
)
except (subprocess.CalledProcessError, FileNotFoundError, subprocess.TimeoutExpired):
return {}
try:
return json.loads(out.decode("utf-8"))
except json.JSONDecodeError:
return {}
def fmt_pct(d: Decimal | float) -> str:
return f"{float(d) * 100:5.1f}%"
def print_analysis(video_id: str, captions, result) -> None:
a = result.analysis
print(f"\n=== Meet Kevin analysis — {video_id} ===")
print(f" Model: {result.raw_response.get('tool_name', '?')}")
print(
f" Tokens: in={result.prompt_tokens} out={result.completion_tokens}"
f" Cost: ${float(result.cost_usd):.4f}"
)
print(f" Transcript: {len(captions.raw_text)} chars, "
f"{len(captions.segments)} segments")
print(f"\n Market outlook: {a.market_outlook_direction.value}")
print(f" Reasoning: {a.market_outlook_reasoning[:200]}")
if a.macro_themes:
print(f" Macro themes: {', '.join(a.macro_themes)}")
if a.key_risks:
print(f" Key risks: {', '.join(a.key_risks)}")
print(f"\n Summary:")
for line in a.summary.split("\n"):
print(f" {line}")
if not a.tickers:
print("\n TICKERS: (none extracted)")
return
print(f"\n TICKERS ({len(a.tickers)}):")
print(f" {'SYMBOL':<8} {'ACTION':<7} {'CONV':>6} {'HORIZON':<11} {'RATIONALE'}")
print(f" {'-'*8} {'-'*7} {'-'*6} {'-'*11} {'-'*60}")
# sort by action priority (buy/sell first), then conviction desc
action_rank = {"buy": 0, "sell": 1, "hold": 2, "watch": 3, "avoid": 4}
sorted_t = sorted(
a.tickers,
key=lambda t: (
action_rank.get(t.action.value, 99),
-float(t.conviction),
),
)
for t in sorted_t:
rationale = (t.rationale_quote or "").replace("\n", " ")[:60]
print(
f" {t.symbol:<8} "
f"{t.action.value:<7} "
f"{fmt_pct(t.conviction):>6} "
f"{t.time_horizon.value:<11} "
f"{rationale}"
)
async def main() -> None:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("video", help="YouTube video ID or URL")
parser.add_argument(
"--model",
default=os.environ.get(
"TRADING_MEET_KEVIN_LLM_MODEL", "claude-haiku-4-5-20251001"
),
)
parser.add_argument(
"--prompt-version",
default=os.environ.get("TRADING_MEET_KEVIN_PROMPT_VERSION", "v1"),
)
args = parser.parse_args()
token = os.environ.get("TRADING_ANTHROPIC_OAUTH_TOKEN")
if not token:
raise SystemExit("TRADING_ANTHROPIC_OAUTH_TOKEN is required")
video_id = parse_video_id(args.video)
print(f"Analyzing video {video_id} with model {args.model}...")
metadata = fetch_video_metadata(video_id)
title = metadata.get("title") or video_id
description = metadata.get("description") or ""
upload_date = metadata.get("upload_date") # YYYYMMDD
if upload_date:
published_at = datetime.strptime(upload_date, "%Y%m%d").replace(
tzinfo=timezone.utc
)
else:
published_at = datetime.now(timezone.utc)
with tempfile.TemporaryDirectory(prefix=f"kevin-{video_id}-") as workdir:
captions = await extract_captions(video_id, workdir)
if captions is None or not captions.raw_text.strip():
raise SystemExit(f"no captions available for {video_id}")
client = AsyncAnthropic(auth_token=token)
analyzer = LlmAnalyzer(
client=client,
model=args.model,
prompt_version=args.prompt_version,
)
result = await analyzer.analyze(
title=title,
description=description,
published_at=published_at,
transcript_text=captions.raw_text,
transcript_segments=[dict(s) for s in captions.segments],
)
print_analysis(video_id, captions, result)
if __name__ == "__main__":
asyncio.run(main())

27
scripts/kevin-analyze.sh Executable file
View file

@ -0,0 +1,27 @@
#!/usr/bin/env bash
# Ad-hoc Meet Kevin video analyzer wrapper.
#
# Usage:
# ./scripts/kevin-analyze.sh <video-id-or-url>
#
# Picks the running meet-kevin-watcher container (which already has
# yt-dlp + ffmpeg + the Anthropic token + the right Python env) and
# runs scripts/analyze_kevin_video.py inside it.
set -euo pipefail
if [[ $# -ne 1 ]]; then
echo "usage: $0 <youtube-video-id-or-url>" >&2
exit 1
fi
POD=$(kubectl -n trading-bot get pod -l app=trading-bot-workers \
-o jsonpath='{.items[0].metadata.name}')
if [[ -z "$POD" ]]; then
echo "no trading-bot-workers pod found" >&2
exit 1
fi
exec kubectl -n trading-bot exec "$POD" -c meet-kevin-watcher -- \
python -m scripts.analyze_kevin_video "$1"