From a93cb06898073322f9790ab0e4f9e76273be6be7 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Wed, 27 May 2026 12:05:59 +0000 Subject: [PATCH] feat(scripts): ad-hoc Meet Kevin video analyzer CLI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Given a YouTube video ID or URL, runs the same caption-extraction + LLM-analysis pipeline the watcher uses in prod, then prints the extracted tickers + actions sorted by action priority and conviction descending. No DB writes, no Redis publish — strictly observational. Two entry points: - scripts/analyze_kevin_video.py — pure Python; needs yt-dlp + Anthropic OAuth token in env. Local laptop yt-dlp tends to hit "Sign in to confirm you're not a bot" rate-limits; running inside a cluster pod avoids this. - scripts/kevin-analyze.sh — wrapper that finds the running trading-bot-workers pod and execs the Python script in the meet-kevin-watcher container. Easiest invocation. Example: $ ./scripts/kevin-analyze.sh poUJIZRmFew === Meet Kevin analysis — poUJIZRmFew === Market outlook: bullish TICKERS (12): SYMBOL ACTION CONV HORIZON RATIONALE APPF buy 85.0% long_term Apploven's looking fantastic ... SOX buy 80.0% months Semiconductor ETF momentum ... ... --- scripts/analyze_kevin_video.py | 176 +++++++++++++++++++++++++++++++++ scripts/kevin-analyze.sh | 27 +++++ 2 files changed, 203 insertions(+) create mode 100644 scripts/analyze_kevin_video.py create mode 100755 scripts/kevin-analyze.sh diff --git a/scripts/analyze_kevin_video.py b/scripts/analyze_kevin_video.py new file mode 100644 index 0000000..9c8afc7 --- /dev/null +++ b/scripts/analyze_kevin_video.py @@ -0,0 +1,176 @@ +"""Ad-hoc analysis of a single Meet Kevin video. + +Usage: + python scripts/analyze_kevin_video.py + +Pulls captions via yt-dlp, runs the same Claude prompt the watcher uses +in production, and prints the resulting tickers + actions. No DB writes, +no Redis publish — strictly observational. + +Env vars (or pass via flags): + TRADING_ANTHROPIC_OAUTH_TOKEN — required (matches what the pod uses) + TRADING_MEET_KEVIN_LLM_MODEL — default: claude-haiku-4-5-20251001 + TRADING_MEET_KEVIN_PROMPT_VERSION — default: v1 +""" + +from __future__ import annotations + +import argparse +import asyncio +import json +import os +import re +import subprocess +import sys +import tempfile +from datetime import datetime, timezone +from decimal import Decimal + +from anthropic import AsyncAnthropic + +# repo root on sys.path so `services.*` and `shared.*` import cleanly +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from services.meet_kevin_watcher.caption_extractor import extract_captions +from services.meet_kevin_watcher.llm_analyzer import LlmAnalyzer + + +_YOUTUBE_ID = re.compile(r"^[A-Za-z0-9_-]{11}$") + + +def parse_video_id(s: str) -> str: + """Accept a bare 11-char YouTube ID or any URL containing v=ID.""" + if _YOUTUBE_ID.match(s): + return s + m = re.search(r"(?:v=|youtu\.be/|/shorts/)([A-Za-z0-9_-]{11})", s) + if m: + return m.group(1) + raise SystemExit(f"could not parse YouTube video ID from: {s!r}") + + +def fetch_video_metadata(video_id: str) -> dict: + """Pull title/description/upload_date via yt-dlp --dump-json (no download).""" + url = f"https://www.youtube.com/watch?v={video_id}" + try: + out = subprocess.check_output( + ["yt-dlp", "--skip-download", "--dump-json", url], + stderr=subprocess.DEVNULL, + timeout=30, + ) + except (subprocess.CalledProcessError, FileNotFoundError, subprocess.TimeoutExpired): + return {} + try: + return json.loads(out.decode("utf-8")) + except json.JSONDecodeError: + return {} + + +def fmt_pct(d: Decimal | float) -> str: + return f"{float(d) * 100:5.1f}%" + + +def print_analysis(video_id: str, captions, result) -> None: + a = result.analysis + print(f"\n=== Meet Kevin analysis — {video_id} ===") + print(f" Model: {result.raw_response.get('tool_name', '?')}") + print( + f" Tokens: in={result.prompt_tokens} out={result.completion_tokens}" + f" Cost: ${float(result.cost_usd):.4f}" + ) + print(f" Transcript: {len(captions.raw_text)} chars, " + f"{len(captions.segments)} segments") + print(f"\n Market outlook: {a.market_outlook_direction.value}") + print(f" Reasoning: {a.market_outlook_reasoning[:200]}") + if a.macro_themes: + print(f" Macro themes: {', '.join(a.macro_themes)}") + if a.key_risks: + print(f" Key risks: {', '.join(a.key_risks)}") + print(f"\n Summary:") + for line in a.summary.split("\n"): + print(f" {line}") + + if not a.tickers: + print("\n TICKERS: (none extracted)") + return + + print(f"\n TICKERS ({len(a.tickers)}):") + print(f" {'SYMBOL':<8} {'ACTION':<7} {'CONV':>6} {'HORIZON':<11} {'RATIONALE'}") + print(f" {'-'*8} {'-'*7} {'-'*6} {'-'*11} {'-'*60}") + # sort by action priority (buy/sell first), then conviction desc + action_rank = {"buy": 0, "sell": 1, "hold": 2, "watch": 3, "avoid": 4} + sorted_t = sorted( + a.tickers, + key=lambda t: ( + action_rank.get(t.action.value, 99), + -float(t.conviction), + ), + ) + for t in sorted_t: + rationale = (t.rationale_quote or "").replace("\n", " ")[:60] + print( + f" {t.symbol:<8} " + f"{t.action.value:<7} " + f"{fmt_pct(t.conviction):>6} " + f"{t.time_horizon.value:<11} " + f"{rationale}" + ) + + +async def main() -> None: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("video", help="YouTube video ID or URL") + parser.add_argument( + "--model", + default=os.environ.get( + "TRADING_MEET_KEVIN_LLM_MODEL", "claude-haiku-4-5-20251001" + ), + ) + parser.add_argument( + "--prompt-version", + default=os.environ.get("TRADING_MEET_KEVIN_PROMPT_VERSION", "v1"), + ) + args = parser.parse_args() + + token = os.environ.get("TRADING_ANTHROPIC_OAUTH_TOKEN") + if not token: + raise SystemExit("TRADING_ANTHROPIC_OAUTH_TOKEN is required") + + video_id = parse_video_id(args.video) + print(f"Analyzing video {video_id} with model {args.model}...") + + metadata = fetch_video_metadata(video_id) + title = metadata.get("title") or video_id + description = metadata.get("description") or "" + upload_date = metadata.get("upload_date") # YYYYMMDD + if upload_date: + published_at = datetime.strptime(upload_date, "%Y%m%d").replace( + tzinfo=timezone.utc + ) + else: + published_at = datetime.now(timezone.utc) + + with tempfile.TemporaryDirectory(prefix=f"kevin-{video_id}-") as workdir: + captions = await extract_captions(video_id, workdir) + if captions is None or not captions.raw_text.strip(): + raise SystemExit(f"no captions available for {video_id}") + + client = AsyncAnthropic(auth_token=token) + analyzer = LlmAnalyzer( + client=client, + model=args.model, + prompt_version=args.prompt_version, + ) + + result = await analyzer.analyze( + title=title, + description=description, + published_at=published_at, + transcript_text=captions.raw_text, + transcript_segments=[dict(s) for s in captions.segments], + ) + + print_analysis(video_id, captions, result) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/scripts/kevin-analyze.sh b/scripts/kevin-analyze.sh new file mode 100755 index 0000000..328a42c --- /dev/null +++ b/scripts/kevin-analyze.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash +# Ad-hoc Meet Kevin video analyzer wrapper. +# +# Usage: +# ./scripts/kevin-analyze.sh +# +# Picks the running meet-kevin-watcher container (which already has +# yt-dlp + ffmpeg + the Anthropic token + the right Python env) and +# runs scripts/analyze_kevin_video.py inside it. + +set -euo pipefail + +if [[ $# -ne 1 ]]; then + echo "usage: $0 " >&2 + exit 1 +fi + +POD=$(kubectl -n trading-bot get pod -l app=trading-bot-workers \ + -o jsonpath='{.items[0].metadata.name}') + +if [[ -z "$POD" ]]; then + echo "no trading-bot-workers pod found" >&2 + exit 1 +fi + +exec kubectl -n trading-bot exec "$POD" -c meet-kevin-watcher -- \ + python -m scripts.analyze_kevin_video "$1"