2026-05-22 19:24:40 +00:00
|
|
|
|
"""Anthropic SDK LLM analyzer for Meet Kevin video transcripts.
|
2026-05-21 19:44:57 +00:00
|
|
|
|
|
2026-05-22 19:24:40 +00:00
|
|
|
|
Calls Claude Sonnet (via native Anthropic SDK with OAuth bearer token) with
|
|
|
|
|
|
tool-use forcing to extract structured MeetKevinAnalysis from a video transcript.
|
2026-05-21 19:44:57 +00:00
|
|
|
|
|
|
|
|
|
|
Public API:
|
|
|
|
|
|
SYSTEM_PROMPT — module-level analyst instructions
|
|
|
|
|
|
compute_cost_usd() — Decimal-precise cost from token counts
|
|
|
|
|
|
LlmCallResult — frozen dataclass returned by analyze()
|
|
|
|
|
|
LlmAnalyzer — async class; .analyze() does the API call
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
import logging
|
|
|
|
|
|
from dataclasses import dataclass
|
|
|
|
|
|
from datetime import datetime
|
|
|
|
|
|
from decimal import Decimal
|
|
|
|
|
|
from typing import Any
|
|
|
|
|
|
|
2026-05-22 19:24:40 +00:00
|
|
|
|
from anthropic import AsyncAnthropic
|
2026-05-21 19:44:57 +00:00
|
|
|
|
|
|
|
|
|
|
from shared.schemas.meet_kevin import MeetKevinAnalysis
|
|
|
|
|
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
# Pricing table (USD per 1 000 000 tokens: input, output)
|
2026-05-22 19:24:40 +00:00
|
|
|
|
# Native Anthropic list pricing. With OAuth/Enterprise tokens real billing
|
|
|
|
|
|
# is via subscription quota, but we still compute notional USD for the
|
|
|
|
|
|
# daily-cap accounting logic.
|
2026-05-21 19:44:57 +00:00
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
_PRICING: dict[str, tuple[Decimal, Decimal]] = {
|
2026-05-22 19:24:40 +00:00
|
|
|
|
"claude-sonnet-4-5": (Decimal("3"), Decimal("15")),
|
|
|
|
|
|
"claude-sonnet-4-6": (Decimal("3"), Decimal("15")),
|
2026-05-21 19:44:57 +00:00
|
|
|
|
"claude-opus-4-7": (Decimal("15"), Decimal("75")),
|
|
|
|
|
|
"claude-haiku-4-5-20251001": (Decimal("1"), Decimal("5")),
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
# System prompt
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
SYSTEM_PROMPT = """
|
|
|
|
|
|
You are a professional financial analyst specialising in retail investor sentiment.
|
|
|
|
|
|
Your task is to read the full transcript of a Meet Kevin (Kevin Paffrath) YouTube
|
|
|
|
|
|
video and extract a structured investment analysis from it.
|
|
|
|
|
|
|
|
|
|
|
|
## Your mission
|
|
|
|
|
|
|
|
|
|
|
|
Read the transcript carefully and produce a single, precise call to the
|
|
|
|
|
|
`submit_analysis` tool. Do **not** respond with prose — your entire output must be
|
|
|
|
|
|
that one tool call with all required fields filled in correctly.
|
|
|
|
|
|
|
|
|
|
|
|
## What to extract
|
|
|
|
|
|
|
|
|
|
|
|
### Market outlook
|
|
|
|
|
|
Identify the overall market direction Kevin is expressing: bullish, bearish, neutral,
|
|
|
|
|
|
or mixed. Write a concise `market_outlook_reasoning` (2–4 sentences) that explains
|
|
|
|
|
|
*why* you assigned that direction, grounded in specific statements from the video.
|
|
|
|
|
|
|
|
|
|
|
|
### Macro themes
|
|
|
|
|
|
List the 2–6 highest-level economic or policy themes Kevin discusses (e.g.
|
|
|
|
|
|
"Federal Reserve rate path", "AI capex cycle", "commercial real estate stress",
|
|
|
|
|
|
"dollar strength", "energy transition"). These should be phrase-length labels, not
|
|
|
|
|
|
full sentences.
|
|
|
|
|
|
|
|
|
|
|
|
### Key risks
|
|
|
|
|
|
List the 2–5 principal downside risks Kevin flags. Again, short phrase labels, not
|
|
|
|
|
|
paragraphs. Only include risks Kevin explicitly names or clearly implies — do not
|
|
|
|
|
|
invent risks he did not discuss.
|
|
|
|
|
|
|
|
|
|
|
|
### Summary
|
|
|
|
|
|
Write a ~200-word plain-English summary of the video's investment thesis. Focus on
|
|
|
|
|
|
actionable takeaways and any specific catalysts Kevin mentions. Avoid filler phrases
|
|
|
|
|
|
like "In this video Kevin discusses…" — start directly with the insight.
|
|
|
|
|
|
|
|
|
|
|
|
### Per-ticker mentions (tickers field)
|
|
|
|
|
|
Extract every stock, ETF, or crypto ticker that Kevin makes a substantive statement
|
2026-05-28 21:40:07 +00:00
|
|
|
|
about. **All fields below must be FORWARD-LOOKING — they describe where Kevin
|
|
|
|
|
|
expects the stock to go over the stated time_horizon, NOT how it has moved
|
|
|
|
|
|
recently.** Kevin often reacts to past drops by capitulating ("I sold because it
|
|
|
|
|
|
dumped 20%") — that is a backward-looking reactive trade and is NOT what we want
|
|
|
|
|
|
to capture. Only emit an action when Kevin expresses a directional view about
|
|
|
|
|
|
the FUTURE.
|
|
|
|
|
|
|
|
|
|
|
|
For each ticker, fill in the following:
|
2026-05-21 19:44:57 +00:00
|
|
|
|
|
|
|
|
|
|
- **symbol** — The uppercase ticker symbol (e.g. "NVDA", "SPY", "BTC"). If Kevin
|
|
|
|
|
|
mentions the company name but not the ticker, infer the ticker from the name (e.g.
|
|
|
|
|
|
"Nvidia" → "NVDA"). Max 6 characters. Only include tickers you are confident about.
|
|
|
|
|
|
|
2026-05-28 21:40:07 +00:00
|
|
|
|
- **action** — The forward-looking action that would profit from Kevin's predicted
|
|
|
|
|
|
next move. Use exactly one of: `buy`, `sell`, `hold`, `watch`, `avoid`.
|
|
|
|
|
|
- `buy` — Kevin expresses a forward bullish view (he thinks it goes UP from
|
|
|
|
|
|
here). Examples: "I'm loading up", "this is going to $X", "I think this
|
|
|
|
|
|
bottoms here and rips".
|
|
|
|
|
|
- `sell` — Kevin expresses a forward bearish view (he thinks it goes DOWN
|
|
|
|
|
|
from here). Examples: "I'm getting out before earnings", "this is going to
|
|
|
|
|
|
crash", "fair value is way below this".
|
|
|
|
|
|
- `hold` — Kevin already owns and expects sideways/no-strong-direction.
|
|
|
|
|
|
- `watch` — Kevin is interested but waiting for a setup. Used when his view is
|
|
|
|
|
|
"I want to buy IF X happens" — not enough conviction yet.
|
|
|
|
|
|
- `avoid` — Forward-looking "don't touch this at any price" — Kevin thinks the
|
|
|
|
|
|
forward risk/reward is bad.
|
|
|
|
|
|
- **Critical filter: if Kevin says "I sold" purely because the stock already
|
|
|
|
|
|
dropped (capitulation, profit-taking, stop-loss tripped) and offers no
|
|
|
|
|
|
forward view on where it goes from here, use `watch` instead.** A
|
|
|
|
|
|
backward-looking reactive sell is NOT a forward-looking `sell` signal.
|
|
|
|
|
|
- If unsure, skip the ticker rather than defaulting to `hold`.
|
|
|
|
|
|
|
|
|
|
|
|
- **expected_move** — The forward-looking directional view distilled into a single
|
|
|
|
|
|
bucket. Independent of `action`. Use one of: `up_strong` (>= +5% over the
|
|
|
|
|
|
horizon), `up_mild` (+1% to +5%), `sideways` (-1% to +1%), `down_mild` (-5% to
|
|
|
|
|
|
-1%), `down_strong` (<= -5%), `unknown` (Kevin made no forward-looking call).
|
|
|
|
|
|
This is the field the trading bot weights most heavily, so be conservative —
|
|
|
|
|
|
use `unknown` when Kevin is reacting to the past instead of predicting the
|
|
|
|
|
|
future.
|
2026-05-21 19:44:57 +00:00
|
|
|
|
|
|
|
|
|
|
- **conviction** — A float between 0.0 and 1.0 representing how confident Kevin
|
2026-05-28 21:40:07 +00:00
|
|
|
|
sounds **about the forward move** (NOT how loudly he is talking about the past).
|
|
|
|
|
|
Use 0.8–1.0 for "I'm buying this aggressively because I expect it to rip",
|
2026-05-21 19:44:57 +00:00
|
|
|
|
0.5–0.7 for a clear directional view with some hedging, 0.2–0.4 for a tentative
|
|
|
|
|
|
or heavily-caveated take. A ticker Kevin mentions only in passing (< 20 words of
|
|
|
|
|
|
commentary) should be **skipped entirely** rather than assigned low conviction.
|
|
|
|
|
|
|
|
|
|
|
|
- **time_horizon** — Pick the closest match from: `intraday`, `days`, `weeks`,
|
|
|
|
|
|
`months`, `long_term`, `unspecified`. If Kevin does not say, use `unspecified`.
|
|
|
|
|
|
|
|
|
|
|
|
- **rationale_quote** — A short verbatim or lightly paraphrased quote (20–80 words)
|
2026-05-28 21:40:07 +00:00
|
|
|
|
from the transcript that best justifies the action you assigned. The quote must
|
|
|
|
|
|
contain Kevin's FORWARD-LOOKING reasoning — if you can only find a backward
|
|
|
|
|
|
statement ("it dropped 20%"), the ticker doesn't belong in this output.
|
2026-05-21 19:44:57 +00:00
|
|
|
|
|
|
|
|
|
|
- **video_timestamp_seconds** — If the transcript includes segment timestamps (lines
|
|
|
|
|
|
formatted as `[<N>s] <text>`), set this to the integer second where Kevin first
|
|
|
|
|
|
makes the substantive statement about this ticker. If no timestamps are available,
|
|
|
|
|
|
set to null.
|
|
|
|
|
|
|
|
|
|
|
|
## Rules for ticker inclusion
|
|
|
|
|
|
|
|
|
|
|
|
1. **Skip tickers mentioned only in passing.** Kevin often references tickers as
|
|
|
|
|
|
examples or comparisons without making any recommendation. If he says fewer than
|
|
|
|
|
|
~20 words about a ticker with no clear directional signal, omit it from `tickers`.
|
|
|
|
|
|
|
|
|
|
|
|
2. **Do not duplicate tickers.** If Kevin mentions the same ticker multiple times,
|
|
|
|
|
|
merge the signals into a single entry that represents his overall view from the
|
|
|
|
|
|
video. Use the timestamp of the *first* substantive mention.
|
|
|
|
|
|
|
|
|
|
|
|
3. **Symbols only, no company names.** The `symbol` field must be a ticker, not a
|
|
|
|
|
|
company name. "Nvidia" is wrong; "NVDA" is correct.
|
|
|
|
|
|
|
|
|
|
|
|
4. **Conviction scores are comparative.** Calibrate them relative to each other
|
|
|
|
|
|
within the video — Kevin's "top conviction" pick in a video might be 0.85, while
|
|
|
|
|
|
a hedged mention is 0.45.
|
|
|
|
|
|
|
|
|
|
|
|
## Quality checklist (review before calling submit_analysis)
|
|
|
|
|
|
|
|
|
|
|
|
- [ ] `market_outlook_direction` is one of: bullish, neutral, bearish, mixed
|
|
|
|
|
|
- [ ] `macro_themes` has 2–6 items, each a concise phrase
|
|
|
|
|
|
- [ ] `key_risks` has 2–5 items, each a concise phrase
|
|
|
|
|
|
- [ ] `summary` is approximately 200 words
|
2026-05-28 21:40:07 +00:00
|
|
|
|
- [ ] Every ticker in `tickers` has a clear FORWARD-LOOKING signal (no "I sold
|
|
|
|
|
|
because it dropped" without a forward view)
|
|
|
|
|
|
- [ ] Each ticker's `expected_move` matches its `action` (e.g. `buy` should pair
|
|
|
|
|
|
with `up_strong` or `up_mild`; `sell` with `down_strong` or `down_mild`)
|
|
|
|
|
|
- [ ] Tickers mentioned only in passing or only reactively are omitted
|
|
|
|
|
|
- [ ] `conviction` reflects confidence in the forward move, not volume of past
|
|
|
|
|
|
commentary
|
2026-05-21 19:44:57 +00:00
|
|
|
|
- [ ] `time_horizon` is one of the six allowed values
|
2026-05-28 21:40:07 +00:00
|
|
|
|
- [ ] `rationale_quote` contains Kevin's forward-looking reasoning, not just a
|
|
|
|
|
|
backward observation
|
2026-05-21 19:44:57 +00:00
|
|
|
|
- [ ] You are calling `submit_analysis` exactly once with all required fields
|
|
|
|
|
|
|
|
|
|
|
|
Now read the transcript provided in the user message and call `submit_analysis`.
|
|
|
|
|
|
""".strip()
|
|
|
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
2026-05-22 19:24:40 +00:00
|
|
|
|
# Tool definition (Anthropic tool-use format)
|
2026-05-21 19:44:57 +00:00
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
2026-05-22 19:24:40 +00:00
|
|
|
|
_ANALYSIS_TOOL: dict[str, Any] = {
|
|
|
|
|
|
"name": "submit_analysis",
|
|
|
|
|
|
"description": (
|
|
|
|
|
|
"Submit the structured analysis of one Meet Kevin video. Call this exactly once."
|
|
|
|
|
|
),
|
|
|
|
|
|
"input_schema": {
|
|
|
|
|
|
"type": "object",
|
|
|
|
|
|
"required": [
|
|
|
|
|
|
"market_outlook_direction",
|
|
|
|
|
|
"market_outlook_reasoning",
|
|
|
|
|
|
"macro_themes",
|
|
|
|
|
|
"key_risks",
|
|
|
|
|
|
"summary",
|
|
|
|
|
|
"tickers",
|
|
|
|
|
|
],
|
|
|
|
|
|
"properties": {
|
|
|
|
|
|
"market_outlook_direction": {
|
|
|
|
|
|
"type": "string",
|
|
|
|
|
|
"enum": ["bullish", "neutral", "bearish", "mixed"],
|
|
|
|
|
|
"description": "Overall market sentiment direction",
|
|
|
|
|
|
},
|
|
|
|
|
|
"market_outlook_reasoning": {
|
|
|
|
|
|
"type": "string",
|
|
|
|
|
|
"description": "2-4 sentence explanation of the market outlook direction",
|
|
|
|
|
|
},
|
|
|
|
|
|
"macro_themes": {
|
|
|
|
|
|
"type": "array",
|
|
|
|
|
|
"items": {"type": "string"},
|
|
|
|
|
|
"description": "2-6 high-level macro economic themes discussed",
|
|
|
|
|
|
},
|
|
|
|
|
|
"key_risks": {
|
|
|
|
|
|
"type": "array",
|
|
|
|
|
|
"items": {"type": "string"},
|
|
|
|
|
|
"description": "2-5 principal downside risks Kevin mentions",
|
|
|
|
|
|
},
|
|
|
|
|
|
"summary": {
|
|
|
|
|
|
"type": "string",
|
|
|
|
|
|
"description": "~200-word plain-English investment thesis summary",
|
|
|
|
|
|
},
|
|
|
|
|
|
"tickers": {
|
|
|
|
|
|
"type": "array",
|
|
|
|
|
|
"description": "Per-ticker mentions with action and conviction",
|
|
|
|
|
|
"items": {
|
|
|
|
|
|
"type": "object",
|
|
|
|
|
|
"required": [
|
|
|
|
|
|
"symbol",
|
|
|
|
|
|
"action",
|
|
|
|
|
|
"conviction",
|
|
|
|
|
|
"time_horizon",
|
|
|
|
|
|
"rationale_quote",
|
|
|
|
|
|
"video_timestamp_seconds",
|
2026-05-28 21:40:07 +00:00
|
|
|
|
"expected_move",
|
2026-05-22 19:24:40 +00:00
|
|
|
|
],
|
|
|
|
|
|
"properties": {
|
|
|
|
|
|
"symbol": {
|
|
|
|
|
|
"type": "string",
|
|
|
|
|
|
"description": "Uppercase ticker symbol (1-6 chars)",
|
|
|
|
|
|
},
|
|
|
|
|
|
"action": {
|
|
|
|
|
|
"type": "string",
|
|
|
|
|
|
"enum": ["buy", "sell", "hold", "watch", "avoid"],
|
|
|
|
|
|
"description": "Recommendation action",
|
|
|
|
|
|
},
|
|
|
|
|
|
"conviction": {
|
|
|
|
|
|
"type": "number",
|
|
|
|
|
|
"minimum": 0.0,
|
|
|
|
|
|
"maximum": 1.0,
|
|
|
|
|
|
"description": "Confidence in recommendation (0.0-1.0)",
|
|
|
|
|
|
},
|
|
|
|
|
|
"time_horizon": {
|
|
|
|
|
|
"type": "string",
|
|
|
|
|
|
"enum": [
|
|
|
|
|
|
"intraday",
|
|
|
|
|
|
"days",
|
|
|
|
|
|
"weeks",
|
|
|
|
|
|
"months",
|
|
|
|
|
|
"long_term",
|
|
|
|
|
|
"unspecified",
|
|
|
|
|
|
],
|
|
|
|
|
|
"description": "Time horizon for the recommendation",
|
|
|
|
|
|
},
|
|
|
|
|
|
"rationale_quote": {
|
|
|
|
|
|
"type": "string",
|
|
|
|
|
|
"description": "Short verbatim or paraphrased quote from video",
|
|
|
|
|
|
},
|
|
|
|
|
|
"video_timestamp_seconds": {
|
|
|
|
|
|
"type": ["integer", "null"],
|
|
|
|
|
|
"description": "Timestamp in seconds for deep-link target",
|
2026-05-21 19:44:57 +00:00
|
|
|
|
},
|
2026-05-28 21:40:07 +00:00
|
|
|
|
"expected_move": {
|
|
|
|
|
|
"type": "string",
|
|
|
|
|
|
"enum": [
|
|
|
|
|
|
"up_strong",
|
|
|
|
|
|
"up_mild",
|
|
|
|
|
|
"sideways",
|
|
|
|
|
|
"down_mild",
|
|
|
|
|
|
"down_strong",
|
|
|
|
|
|
"unknown",
|
|
|
|
|
|
],
|
|
|
|
|
|
"description": (
|
|
|
|
|
|
"Forward-looking directional view over time_horizon. "
|
|
|
|
|
|
"up_strong >= +5%, up_mild +1-5%, sideways -1 to +1%, "
|
|
|
|
|
|
"down_mild -5 to -1%, down_strong <= -5%. "
|
|
|
|
|
|
"Use 'unknown' if Kevin makes no directional call."
|
|
|
|
|
|
),
|
|
|
|
|
|
},
|
2026-05-21 19:44:57 +00:00
|
|
|
|
},
|
|
|
|
|
|
},
|
|
|
|
|
|
},
|
|
|
|
|
|
},
|
|
|
|
|
|
},
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
# Public helpers
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def compute_cost_usd(model: str, input_tokens: int, output_tokens: int) -> Decimal:
|
|
|
|
|
|
"""Compute LLM call cost in USD using pinned per-model pricing.
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
model: Model identifier string (must be a key in _PRICING).
|
|
|
|
|
|
input_tokens: Number of input/prompt tokens consumed.
|
|
|
|
|
|
output_tokens: Number of output/completion tokens generated.
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
Cost as a Decimal. Returns Decimal("0") for unknown models (logs warning).
|
|
|
|
|
|
"""
|
|
|
|
|
|
pricing = _PRICING.get(model)
|
|
|
|
|
|
if pricing is None:
|
|
|
|
|
|
logger.warning("compute_cost_usd: unknown model %r — returning zero cost", model)
|
|
|
|
|
|
return Decimal("0")
|
|
|
|
|
|
|
|
|
|
|
|
price_per_m_input, price_per_m_output = pricing
|
|
|
|
|
|
million = Decimal("1000000")
|
|
|
|
|
|
cost = (
|
|
|
|
|
|
Decimal(input_tokens) / million * price_per_m_input
|
|
|
|
|
|
+ Decimal(output_tokens) / million * price_per_m_output
|
|
|
|
|
|
)
|
|
|
|
|
|
return cost.quantize(Decimal("0.0001"))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
# Result dataclass
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
|
|
|
|
class LlmCallResult:
|
|
|
|
|
|
"""Immutable result of one LLM analyze() call."""
|
|
|
|
|
|
|
|
|
|
|
|
analysis: MeetKevinAnalysis
|
|
|
|
|
|
raw_response: dict
|
|
|
|
|
|
prompt_tokens: int
|
|
|
|
|
|
completion_tokens: int
|
|
|
|
|
|
cost_usd: Decimal
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
# Analyzer class
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
_MAX_SEGMENTS = 1000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class LlmAnalyzer:
|
2026-05-22 19:24:40 +00:00
|
|
|
|
"""Calls Claude (via native Anthropic SDK) to extract structured analysis from a video transcript.
|
2026-05-21 19:44:57 +00:00
|
|
|
|
|
|
|
|
|
|
Args:
|
2026-05-22 19:24:40 +00:00
|
|
|
|
client: Configured AsyncAnthropic client with OAuth bearer token.
|
|
|
|
|
|
model: Model identifier (e.g. "claude-sonnet-4-5").
|
2026-05-21 19:44:57 +00:00
|
|
|
|
prompt_version: Prompt version string stored in kevin_analyses.
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
2026-05-22 19:24:40 +00:00
|
|
|
|
def __init__(self, client: AsyncAnthropic, model: str, prompt_version: str) -> None:
|
2026-05-21 19:44:57 +00:00
|
|
|
|
self._client = client
|
|
|
|
|
|
self._model = model
|
|
|
|
|
|
self._prompt_version = prompt_version
|
|
|
|
|
|
|
|
|
|
|
|
async def analyze(
|
|
|
|
|
|
self,
|
|
|
|
|
|
*,
|
|
|
|
|
|
title: str,
|
|
|
|
|
|
description: str,
|
|
|
|
|
|
published_at: datetime,
|
|
|
|
|
|
transcript_text: str,
|
|
|
|
|
|
transcript_segments: list[dict],
|
|
|
|
|
|
) -> LlmCallResult:
|
2026-05-22 09:52:55 +00:00
|
|
|
|
"""Run LLM analysis on a transcript and return a structured result.
|
2026-05-21 19:44:57 +00:00
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
title: Video title.
|
|
|
|
|
|
description: Video description (may be empty).
|
|
|
|
|
|
published_at: UTC publication timestamp.
|
|
|
|
|
|
transcript_text: Full concatenated transcript text.
|
|
|
|
|
|
transcript_segments: List of {start, end, text} dicts.
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
LlmCallResult with parsed MeetKevinAnalysis and token accounting.
|
|
|
|
|
|
|
|
|
|
|
|
Raises:
|
2026-05-22 19:24:40 +00:00
|
|
|
|
ValueError: If the response contains no tool_use block.
|
|
|
|
|
|
pydantic.ValidationError: If tool input fails schema validation.
|
2026-05-21 19:44:57 +00:00
|
|
|
|
"""
|
|
|
|
|
|
user_msg = self._build_user_message(
|
|
|
|
|
|
title=title,
|
|
|
|
|
|
description=description,
|
|
|
|
|
|
published_at=published_at,
|
|
|
|
|
|
transcript_text=transcript_text,
|
|
|
|
|
|
transcript_segments=transcript_segments,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
2026-05-22 19:24:40 +00:00
|
|
|
|
response = await self._client.messages.create(
|
2026-05-21 19:44:57 +00:00
|
|
|
|
model=self._model,
|
|
|
|
|
|
max_tokens=4096,
|
2026-05-22 19:24:40 +00:00
|
|
|
|
system=[
|
|
|
|
|
|
{"type": "text", "text": SYSTEM_PROMPT, "cache_control": {"type": "ephemeral"}}
|
2026-05-21 19:44:57 +00:00
|
|
|
|
],
|
2026-05-22 19:24:40 +00:00
|
|
|
|
tools=[_ANALYSIS_TOOL],
|
|
|
|
|
|
tool_choice={"type": "tool", "name": "submit_analysis"},
|
|
|
|
|
|
messages=[{"role": "user", "content": user_msg}],
|
2026-05-21 19:44:57 +00:00
|
|
|
|
)
|
|
|
|
|
|
|
2026-05-22 19:24:40 +00:00
|
|
|
|
# Find the first tool_use block in the response
|
|
|
|
|
|
tool_use_block = None
|
|
|
|
|
|
for block in response.content:
|
|
|
|
|
|
if block.type == "tool_use":
|
|
|
|
|
|
tool_use_block = block
|
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
|
|
if tool_use_block is None:
|
2026-05-21 19:44:57 +00:00
|
|
|
|
raise ValueError(
|
2026-05-22 19:24:40 +00:00
|
|
|
|
"LLM response contained no tool_use block (expected submit_analysis call)"
|
2026-05-21 19:44:57 +00:00
|
|
|
|
)
|
|
|
|
|
|
|
2026-05-22 19:24:40 +00:00
|
|
|
|
tool_input: dict = tool_use_block.input
|
2026-05-22 09:52:55 +00:00
|
|
|
|
analysis = MeetKevinAnalysis.model_validate(tool_input)
|
2026-05-21 19:44:57 +00:00
|
|
|
|
|
2026-05-22 19:24:40 +00:00
|
|
|
|
prompt_tokens: int = response.usage.input_tokens
|
|
|
|
|
|
completion_tokens: int = response.usage.output_tokens
|
2026-05-21 19:44:57 +00:00
|
|
|
|
cost_usd = compute_cost_usd(self._model, prompt_tokens, completion_tokens)
|
|
|
|
|
|
|
|
|
|
|
|
raw_response: dict = {
|
2026-05-22 19:24:40 +00:00
|
|
|
|
"stop_reason": response.stop_reason,
|
|
|
|
|
|
"tool_name": tool_use_block.name,
|
2026-05-22 09:52:55 +00:00
|
|
|
|
"tool_input": tool_input,
|
2026-05-21 19:44:57 +00:00
|
|
|
|
"usage": {
|
|
|
|
|
|
"input_tokens": prompt_tokens,
|
|
|
|
|
|
"output_tokens": completion_tokens,
|
|
|
|
|
|
},
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return LlmCallResult(
|
|
|
|
|
|
analysis=analysis,
|
|
|
|
|
|
raw_response=raw_response,
|
|
|
|
|
|
prompt_tokens=prompt_tokens,
|
|
|
|
|
|
completion_tokens=completion_tokens,
|
|
|
|
|
|
cost_usd=cost_usd,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
# ------------------------------------------------------------------
|
|
|
|
|
|
# Private helpers
|
|
|
|
|
|
# ------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
def _build_user_message(
|
|
|
|
|
|
self,
|
|
|
|
|
|
*,
|
|
|
|
|
|
title: str,
|
|
|
|
|
|
description: str,
|
|
|
|
|
|
published_at: datetime,
|
|
|
|
|
|
transcript_text: str,
|
|
|
|
|
|
transcript_segments: list[dict],
|
|
|
|
|
|
) -> str:
|
|
|
|
|
|
"""Build the user-turn message for the API call."""
|
|
|
|
|
|
parts: list[str] = [
|
|
|
|
|
|
f"Title: {title}",
|
|
|
|
|
|
f"Published: {published_at.strftime('%Y-%m-%d %H:%M UTC')}",
|
|
|
|
|
|
]
|
|
|
|
|
|
if description:
|
|
|
|
|
|
parts.append(f"Description: {description}")
|
|
|
|
|
|
|
|
|
|
|
|
parts.append("") # blank line before transcript
|
|
|
|
|
|
|
|
|
|
|
|
if transcript_segments:
|
|
|
|
|
|
# Prefer timestamped segments (up to _MAX_SEGMENTS)
|
|
|
|
|
|
segment_lines = [
|
|
|
|
|
|
f"[{int(seg.get('start', 0))}s] {seg.get('text', '').strip()}"
|
|
|
|
|
|
for seg in transcript_segments[:_MAX_SEGMENTS]
|
|
|
|
|
|
]
|
|
|
|
|
|
parts.append("Transcript (with timestamps):")
|
|
|
|
|
|
parts.extend(segment_lines)
|
|
|
|
|
|
elif transcript_text:
|
|
|
|
|
|
parts.append("Transcript:")
|
|
|
|
|
|
parts.append(transcript_text)
|
|
|
|
|
|
else:
|
|
|
|
|
|
parts.append("Transcript: (no transcript available)")
|
|
|
|
|
|
|
|
|
|
|
|
return "\n".join(parts)
|