From 8309556c007f4239c85383ca82412ce28794c508 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Thu, 21 May 2026 19:44:57 +0000 Subject: [PATCH] feat(meet-kevin): Claude Sonnet 4.6 LLM analyzer (tool-use forcing + prompt cache) Co-Authored-By: Claude Sonnet 4.6 --- services/meet_kevin_watcher/llm_analyzer.py | 427 ++++++++++++++++ .../meet_kevin_watcher/test_llm_analyzer.py | 460 ++++++++++++++++++ 2 files changed, 887 insertions(+) create mode 100644 services/meet_kevin_watcher/llm_analyzer.py create mode 100644 tests/services/meet_kevin_watcher/test_llm_analyzer.py diff --git a/services/meet_kevin_watcher/llm_analyzer.py b/services/meet_kevin_watcher/llm_analyzer.py new file mode 100644 index 0000000..0768912 --- /dev/null +++ b/services/meet_kevin_watcher/llm_analyzer.py @@ -0,0 +1,427 @@ +"""Claude LLM analyzer for Meet Kevin video transcripts. + +Calls Claude Sonnet 4.6 with tool-use forcing to extract structured +MeetKevinAnalysis from a video transcript. Uses prompt caching on the +system block to reduce cost across videos processed within the same +5-minute window. + +Public API: + SYSTEM_PROMPT — module-level analyst instructions + compute_cost_usd() — Decimal-precise cost from token counts + LlmCallResult — frozen dataclass returned by analyze() + LlmAnalyzer — async class; .analyze() does the API call +""" + +import logging +from dataclasses import dataclass +from datetime import datetime +from decimal import Decimal +from typing import Any + +from anthropic import AsyncAnthropic + +from shared.schemas.meet_kevin import MeetKevinAnalysis + +logger = logging.getLogger(__name__) + +# --------------------------------------------------------------------------- +# Pricing table (USD per 1 000 000 tokens: input, output) +# --------------------------------------------------------------------------- + +_PRICING: dict[str, tuple[Decimal, Decimal]] = { + "claude-sonnet-4-6": (Decimal("3"), Decimal("15")), + "claude-opus-4-7": (Decimal("15"), Decimal("75")), + "claude-haiku-4-5-20251001": (Decimal("1"), Decimal("5")), +} + +# --------------------------------------------------------------------------- +# System prompt +# --------------------------------------------------------------------------- + +SYSTEM_PROMPT = """ +You are a professional financial analyst specialising in retail investor sentiment. +Your task is to read the full transcript of a Meet Kevin (Kevin Paffrath) YouTube +video and extract a structured investment analysis from it. + +## Your mission + +Read the transcript carefully and produce a single, precise call to the +`submit_analysis` tool. Do **not** respond with prose — your entire output must be +that one tool call with all required fields filled in correctly. + +## What to extract + +### Market outlook +Identify the overall market direction Kevin is expressing: bullish, bearish, neutral, +or mixed. Write a concise `market_outlook_reasoning` (2–4 sentences) that explains +*why* you assigned that direction, grounded in specific statements from the video. + +### Macro themes +List the 2–6 highest-level economic or policy themes Kevin discusses (e.g. +"Federal Reserve rate path", "AI capex cycle", "commercial real estate stress", +"dollar strength", "energy transition"). These should be phrase-length labels, not +full sentences. + +### Key risks +List the 2–5 principal downside risks Kevin flags. Again, short phrase labels, not +paragraphs. Only include risks Kevin explicitly names or clearly implies — do not +invent risks he did not discuss. + +### Summary +Write a ~200-word plain-English summary of the video's investment thesis. Focus on +actionable takeaways and any specific catalysts Kevin mentions. Avoid filler phrases +like "In this video Kevin discusses…" — start directly with the insight. + +### Per-ticker mentions (tickers field) +Extract every stock, ETF, or crypto ticker that Kevin makes a substantive statement +about. For each one, fill in the following: + +- **symbol** — The uppercase ticker symbol (e.g. "NVDA", "SPY", "BTC"). If Kevin + mentions the company name but not the ticker, infer the ticker from the name (e.g. + "Nvidia" → "NVDA"). Max 6 characters. Only include tickers you are confident about. + +- **action** — The clearest action signal you can infer from what Kevin says. Use + exactly one of: `buy`, `sell`, `hold`, `watch`, `avoid`. If Kevin expresses + interest but no clear directional view, use `watch`. If he says he is exiting or + would not touch it, use `sell` or `avoid` respectively. Do not default to `hold` + just because you are unsure — skip the ticker instead. + +- **conviction** — A float between 0.0 and 1.0 representing how confident Kevin + sounds. Use 0.8–1.0 for "I'm buying this aggressively / this is my top pick", + 0.5–0.7 for a clear directional view with some hedging, 0.2–0.4 for a tentative + or heavily-caveated take. A ticker Kevin mentions only in passing (< 20 words of + commentary) should be **skipped entirely** rather than assigned low conviction. + +- **time_horizon** — Pick the closest match from: `intraday`, `days`, `weeks`, + `months`, `long_term`, `unspecified`. If Kevin does not say, use `unspecified`. + +- **rationale_quote** — A short verbatim or lightly paraphrased quote (20–80 words) + from the transcript that best justifies the action you assigned. Include enough + context to be meaningful on its own. + +- **video_timestamp_seconds** — If the transcript includes segment timestamps (lines + formatted as `[s] `), set this to the integer second where Kevin first + makes the substantive statement about this ticker. If no timestamps are available, + set to null. + +## Rules for ticker inclusion + +1. **Skip tickers mentioned only in passing.** Kevin often references tickers as + examples or comparisons without making any recommendation. If he says fewer than + ~20 words about a ticker with no clear directional signal, omit it from `tickers`. + +2. **Do not duplicate tickers.** If Kevin mentions the same ticker multiple times, + merge the signals into a single entry that represents his overall view from the + video. Use the timestamp of the *first* substantive mention. + +3. **Symbols only, no company names.** The `symbol` field must be a ticker, not a + company name. "Nvidia" is wrong; "NVDA" is correct. + +4. **Conviction scores are comparative.** Calibrate them relative to each other + within the video — Kevin's "top conviction" pick in a video might be 0.85, while + a hedged mention is 0.45. + +## Quality checklist (review before calling submit_analysis) + +- [ ] `market_outlook_direction` is one of: bullish, neutral, bearish, mixed +- [ ] `macro_themes` has 2–6 items, each a concise phrase +- [ ] `key_risks` has 2–5 items, each a concise phrase +- [ ] `summary` is approximately 200 words +- [ ] Every ticker in `tickers` has a clear actionable signal (no "I'm not sure") +- [ ] Tickers mentioned only in passing are omitted +- [ ] `conviction` values are floats in [0.0, 1.0] +- [ ] `time_horizon` is one of the six allowed values +- [ ] `rationale_quote` is grounded in something Kevin actually said +- [ ] You are calling `submit_analysis` exactly once with all required fields + +Now read the transcript provided in the user message and call `submit_analysis`. +""".strip() + +# --------------------------------------------------------------------------- +# Tool definition (JSON Schema mirroring MeetKevinAnalysis) +# --------------------------------------------------------------------------- + +_ANALYSIS_TOOL: dict[str, Any] = { + "name": "submit_analysis", + "description": ( + "Submit a structured analysis of a Meet Kevin video transcript. " + "Call this exactly once with all fields filled in." + ), + "input_schema": { + "type": "object", + "required": [ + "market_outlook_direction", + "market_outlook_reasoning", + "macro_themes", + "key_risks", + "summary", + "tickers", + ], + "properties": { + "market_outlook_direction": { + "type": "string", + "enum": ["bullish", "neutral", "bearish", "mixed"], + "description": "Overall market sentiment direction", + }, + "market_outlook_reasoning": { + "type": "string", + "description": "2-4 sentence explanation of the market outlook direction", + }, + "macro_themes": { + "type": "array", + "items": {"type": "string"}, + "description": "2-6 high-level macro economic themes discussed", + }, + "key_risks": { + "type": "array", + "items": {"type": "string"}, + "description": "2-5 principal downside risks Kevin mentions", + }, + "summary": { + "type": "string", + "description": "~200-word plain-English investment thesis summary", + }, + "tickers": { + "type": "array", + "description": "Per-ticker mentions with action and conviction", + "items": { + "type": "object", + "required": [ + "symbol", + "action", + "conviction", + "time_horizon", + "rationale_quote", + "video_timestamp_seconds", + ], + "properties": { + "symbol": { + "type": "string", + "description": "Uppercase ticker symbol (1-6 chars)", + }, + "action": { + "type": "string", + "enum": ["buy", "sell", "hold", "watch", "avoid"], + "description": "Recommendation action", + }, + "conviction": { + "type": "number", + "minimum": 0.0, + "maximum": 1.0, + "description": "Confidence in recommendation (0.0-1.0)", + }, + "time_horizon": { + "type": "string", + "enum": [ + "intraday", + "days", + "weeks", + "months", + "long_term", + "unspecified", + ], + "description": "Time horizon for the recommendation", + }, + "rationale_quote": { + "type": "string", + "description": "Short verbatim or paraphrased quote from video", + }, + "video_timestamp_seconds": { + "type": ["integer", "null"], + "description": "Timestamp in seconds for deep-link target", + }, + }, + }, + }, + }, + }, +} + +# --------------------------------------------------------------------------- +# Public helpers +# --------------------------------------------------------------------------- + + +def compute_cost_usd(model: str, input_tokens: int, output_tokens: int) -> Decimal: + """Compute LLM call cost in USD using pinned per-model pricing. + + Args: + model: Model identifier string (must be a key in _PRICING). + input_tokens: Number of input/prompt tokens consumed. + output_tokens: Number of output/completion tokens generated. + + Returns: + Cost as a Decimal. Returns Decimal("0") for unknown models (logs warning). + """ + pricing = _PRICING.get(model) + if pricing is None: + logger.warning("compute_cost_usd: unknown model %r — returning zero cost", model) + return Decimal("0") + + price_per_m_input, price_per_m_output = pricing + million = Decimal("1000000") + cost = ( + Decimal(input_tokens) / million * price_per_m_input + + Decimal(output_tokens) / million * price_per_m_output + ) + return cost.quantize(Decimal("0.0001")) + + +# --------------------------------------------------------------------------- +# Result dataclass +# --------------------------------------------------------------------------- + + +@dataclass(frozen=True) +class LlmCallResult: + """Immutable result of one LLM analyze() call.""" + + analysis: MeetKevinAnalysis + raw_response: dict + prompt_tokens: int + completion_tokens: int + cost_usd: Decimal + + +# --------------------------------------------------------------------------- +# Analyzer class +# --------------------------------------------------------------------------- + +_MAX_SEGMENTS = 1000 + + +class LlmAnalyzer: + """Calls Claude to extract structured analysis from a video transcript. + + Args: + client: Configured AsyncAnthropic client. + model: Model identifier (e.g. "claude-sonnet-4-6"). + prompt_version: Prompt version string stored in kevin_analyses. + """ + + def __init__(self, client: AsyncAnthropic, model: str, prompt_version: str) -> None: + self._client = client + self._model = model + self._prompt_version = prompt_version + + async def analyze( + self, + *, + title: str, + description: str, + published_at: datetime, + transcript_text: str, + transcript_segments: list[dict], + ) -> LlmCallResult: + """Run Claude analysis on a transcript and return a structured result. + + Args: + title: Video title. + description: Video description (may be empty). + published_at: UTC publication timestamp. + transcript_text: Full concatenated transcript text. + transcript_segments: List of {start, end, text} dicts. + + Returns: + LlmCallResult with parsed MeetKevinAnalysis and token accounting. + + Raises: + ValueError: If the response contains no tool_use block. + pydantic.ValidationError: If tool_use input fails schema validation. + """ + user_msg = self._build_user_message( + title=title, + description=description, + published_at=published_at, + transcript_text=transcript_text, + transcript_segments=transcript_segments, + ) + + response = await self._client.messages.create( + model=self._model, + max_tokens=4096, + system=[ + { + "type": "text", + "text": SYSTEM_PROMPT, + "cache_control": {"type": "ephemeral"}, + } + ], + tools=[_ANALYSIS_TOOL], + tool_choice={"type": "tool", "name": "submit_analysis"}, + messages=[{"role": "user", "content": user_msg}], + ) + + # Find the first tool_use block + tool_block = next( + (b for b in response.content if b.type == "tool_use"), + None, + ) + if tool_block is None: + raise ValueError( + f"Claude response contained no tool_use block " + f"(stop_reason={response.stop_reason!r})" + ) + + analysis = MeetKevinAnalysis.model_validate(tool_block.input) + + prompt_tokens: int = response.usage.input_tokens + completion_tokens: int = response.usage.output_tokens + cost_usd = compute_cost_usd(self._model, prompt_tokens, completion_tokens) + + raw_response: dict = { + "stop_reason": response.stop_reason, + "tool_name": tool_block.name, + "tool_input": tool_block.input, + "usage": { + "input_tokens": prompt_tokens, + "output_tokens": completion_tokens, + }, + } + + return LlmCallResult( + analysis=analysis, + raw_response=raw_response, + prompt_tokens=prompt_tokens, + completion_tokens=completion_tokens, + cost_usd=cost_usd, + ) + + # ------------------------------------------------------------------ + # Private helpers + # ------------------------------------------------------------------ + + def _build_user_message( + self, + *, + title: str, + description: str, + published_at: datetime, + transcript_text: str, + transcript_segments: list[dict], + ) -> str: + """Build the user-turn message for the API call.""" + parts: list[str] = [ + f"Title: {title}", + f"Published: {published_at.strftime('%Y-%m-%d %H:%M UTC')}", + ] + if description: + parts.append(f"Description: {description}") + + parts.append("") # blank line before transcript + + if transcript_segments: + # Prefer timestamped segments (up to _MAX_SEGMENTS) + segment_lines = [ + f"[{int(seg.get('start', 0))}s] {seg.get('text', '').strip()}" + for seg in transcript_segments[:_MAX_SEGMENTS] + ] + parts.append("Transcript (with timestamps):") + parts.extend(segment_lines) + elif transcript_text: + parts.append("Transcript:") + parts.append(transcript_text) + else: + parts.append("Transcript: (no transcript available)") + + return "\n".join(parts) diff --git a/tests/services/meet_kevin_watcher/test_llm_analyzer.py b/tests/services/meet_kevin_watcher/test_llm_analyzer.py new file mode 100644 index 0000000..a576aa4 --- /dev/null +++ b/tests/services/meet_kevin_watcher/test_llm_analyzer.py @@ -0,0 +1,460 @@ +"""Tests for the Claude LLM analyzer (Task 7). + +Tests use MagicMock/AsyncMock to avoid real API calls. +""" + +from datetime import datetime, timezone +from decimal import Decimal +from unittest.mock import AsyncMock, MagicMock + +import pytest + +from services.meet_kevin_watcher.llm_analyzer import ( + SYSTEM_PROMPT, + LlmAnalyzer, + LlmCallResult, + compute_cost_usd, +) +from shared.schemas.meet_kevin import ( + MarketOutlook, + MeetKevinAnalysis, + TickerAction, + TimeHorizon, +) + + +# --------------------------------------------------------------------------- +# Test helpers +# --------------------------------------------------------------------------- + +def _make_anthropic_response(tool_input, in_tokens=5000, out_tokens=800): + """Build a minimal mock of an Anthropic messages.create response.""" + block = MagicMock() + block.type = "tool_use" + block.name = "submit_analysis" + block.input = tool_input + + resp = MagicMock() + resp.content = [block] + resp.usage = MagicMock(input_tokens=in_tokens, output_tokens=out_tokens) + resp.stop_reason = "tool_use" + return resp + + +def _valid_analysis_input() -> dict: + """Return a dict that Pydantic can validate into MeetKevinAnalysis.""" + return { + "market_outlook_direction": "bullish", + "market_outlook_reasoning": "Strong earnings and low unemployment.", + "macro_themes": ["Fed pivot", "AI boom"], + "key_risks": ["Inflation rebound", "Credit crunch"], + "summary": "Kevin discussed the current bull market and highlighted several tech stocks.", + "tickers": [ + { + "symbol": "NVDA", + "action": "buy", + "conviction": 0.85, + "time_horizon": "months", + "rationale_quote": "AI infrastructure buildout has years to run", + "video_timestamp_seconds": 320, + } + ], + } + + +def _make_client(response=None): + """Return a mocked AsyncAnthropic client with messages.create wired up.""" + mock_create = AsyncMock(return_value=response) + mock_messages = MagicMock() + mock_messages.create = mock_create + + client = MagicMock() + client.messages = mock_messages + return client, mock_create + + +# --------------------------------------------------------------------------- +# compute_cost_usd +# --------------------------------------------------------------------------- + + +class TestComputeCostUsd: + """Verify monetary cost calculations using Decimal arithmetic.""" + + def test_sonnet_46_pricing(self): + """claude-sonnet-4-6: $3/M input + $15/M output.""" + # 1M input + 1M output = $3 + $15 = $18 + result = compute_cost_usd("claude-sonnet-4-6", 1_000_000, 1_000_000) + assert result == Decimal("18.0000") + + def test_opus_47_pricing(self): + """claude-opus-4-7: $15/M input + $75/M output.""" + result = compute_cost_usd("claude-opus-4-7", 1_000_000, 1_000_000) + assert result == Decimal("90.0000") + + def test_haiku_45_pricing(self): + """claude-haiku-4-5-20251001: $1/M input + $5/M output.""" + result = compute_cost_usd("claude-haiku-4-5-20251001", 1_000_000, 1_000_000) + assert result == Decimal("6.0000") + + def test_unknown_model_returns_zero(self): + """Unknown model logs warning and returns Decimal('0').""" + result = compute_cost_usd("unknown-model", 1000, 1000) + assert result == Decimal("0") + + def test_zero_tokens(self): + """Zero tokens produce zero cost.""" + result = compute_cost_usd("claude-sonnet-4-6", 0, 0) + assert result == Decimal("0") + + def test_result_is_decimal(self): + """Return type is always Decimal, not float.""" + result = compute_cost_usd("claude-sonnet-4-6", 5000, 800) + assert isinstance(result, Decimal) + + def test_small_realistic_call(self): + """Realistic 10K input + 1K output token call (Sonnet 4.6).""" + # input: 10000/1_000_000 * 3 = 0.03000 + # output: 1000/1_000_000 * 15 = 0.01500 + # total: 0.04500 + result = compute_cost_usd("claude-sonnet-4-6", 10_000, 1_000) + assert result == Decimal("0.0450") + + +# --------------------------------------------------------------------------- +# SYSTEM_PROMPT +# --------------------------------------------------------------------------- + + +class TestSystemPrompt: + """Verify the system prompt has the required content markers.""" + + def test_contains_submit_analysis(self): + """SYSTEM_PROMPT must reference the tool name 'submit_analysis'.""" + assert "submit_analysis" in SYSTEM_PROMPT + + def test_contains_ticker(self): + """SYSTEM_PROMPT must mention 'ticker' (case-insensitive).""" + assert "ticker" in SYSTEM_PROMPT.lower() + + def test_is_substantial(self): + """SYSTEM_PROMPT should be at least 300 words (analyst guidance).""" + word_count = len(SYSTEM_PROMPT.split()) + assert word_count >= 300, f"SYSTEM_PROMPT is only {word_count} words" + + def test_mentions_conviction(self): + """SYSTEM_PROMPT should mention conviction scoring.""" + assert "conviction" in SYSTEM_PROMPT.lower() + + def test_mentions_time_horizon(self): + """SYSTEM_PROMPT should describe time_horizon field.""" + assert "time_horizon" in SYSTEM_PROMPT or "time horizon" in SYSTEM_PROMPT.lower() + + +# --------------------------------------------------------------------------- +# LlmCallResult dataclass +# --------------------------------------------------------------------------- + + +class TestLlmCallResult: + """Verify LlmCallResult shape and immutability.""" + + def test_is_frozen(self): + """LlmCallResult must be a frozen dataclass.""" + analysis = MeetKevinAnalysis(**_valid_analysis_input()) + result = LlmCallResult( + analysis=analysis, + raw_response={"stop_reason": "tool_use"}, + prompt_tokens=5000, + completion_tokens=800, + cost_usd=Decimal("0.027"), + ) + with pytest.raises((AttributeError, TypeError)): + result.prompt_tokens = 9999 # type: ignore + + def test_fields_accessible(self): + """All five fields are accessible on LlmCallResult.""" + analysis = MeetKevinAnalysis(**_valid_analysis_input()) + cost = Decimal("0.027") + result = LlmCallResult( + analysis=analysis, + raw_response={"stop_reason": "tool_use"}, + prompt_tokens=5000, + completion_tokens=800, + cost_usd=cost, + ) + assert result.analysis is analysis + assert result.raw_response == {"stop_reason": "tool_use"} + assert result.prompt_tokens == 5000 + assert result.completion_tokens == 800 + assert result.cost_usd == cost + + +# --------------------------------------------------------------------------- +# LlmAnalyzer.analyze — happy path +# --------------------------------------------------------------------------- + + +class TestLlmAnalyzerHappyPath: + """Happy-path tests for the analyzer.""" + + @pytest.mark.asyncio + async def test_returns_llm_call_result(self): + """analyze() returns an LlmCallResult with parsed MeetKevinAnalysis.""" + tool_input = _valid_analysis_input() + resp = _make_anthropic_response(tool_input, in_tokens=5000, out_tokens=800) + client, mock_create = _make_client(resp) + + analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1") + result = await analyzer.analyze( + title="Market Update", + description="Kevin covers the latest market trends.", + published_at=datetime(2026, 5, 21, 12, 0, 0, tzinfo=timezone.utc), + transcript_text="Welcome to today's update. NVDA is looking strong.", + transcript_segments=[ + {"start": 0.0, "end": 5.0, "text": "Welcome to today's update."}, + {"start": 5.0, "end": 10.0, "text": "NVDA is looking strong."}, + ], + ) + + assert isinstance(result, LlmCallResult) + assert isinstance(result.analysis, MeetKevinAnalysis) + assert result.prompt_tokens == 5000 + assert result.completion_tokens == 800 + + @pytest.mark.asyncio + async def test_analysis_fields_parsed_correctly(self): + """Parsed MeetKevinAnalysis has correct field values from tool input.""" + tool_input = _valid_analysis_input() + resp = _make_anthropic_response(tool_input) + client, _ = _make_client(resp) + + analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1") + result = await analyzer.analyze( + title="Test Video", + description="Description", + published_at=datetime(2026, 5, 21, tzinfo=timezone.utc), + transcript_text="Some transcript.", + transcript_segments=[], + ) + + analysis = result.analysis + assert analysis.market_outlook_direction == MarketOutlook.BULLISH + assert analysis.market_outlook_reasoning == "Strong earnings and low unemployment." + assert "Fed pivot" in analysis.macro_themes + assert len(analysis.tickers) == 1 + assert analysis.tickers[0].symbol == "NVDA" + assert analysis.tickers[0].action == TickerAction.BUY + assert analysis.tickers[0].conviction == pytest.approx(0.85) + assert analysis.tickers[0].time_horizon == TimeHorizon.MONTHS + + @pytest.mark.asyncio + async def test_cost_usd_is_positive(self): + """cost_usd is calculated and positive for a valid token count.""" + resp = _make_anthropic_response(_valid_analysis_input(), in_tokens=10_000, out_tokens=1_000) + client, _ = _make_client(resp) + + analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1") + result = await analyzer.analyze( + title="Test", + description="", + published_at=datetime(2026, 5, 21, tzinfo=timezone.utc), + transcript_text="", + transcript_segments=[], + ) + + assert result.cost_usd > Decimal("0") + + @pytest.mark.asyncio + async def test_api_called_with_tool_choice_forcing(self): + """messages.create is called with tool_choice forcing submit_analysis.""" + resp = _make_anthropic_response(_valid_analysis_input()) + client, mock_create = _make_client(resp) + + analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1") + await analyzer.analyze( + title="Test", + description="", + published_at=datetime(2026, 5, 21, tzinfo=timezone.utc), + transcript_text="", + transcript_segments=[], + ) + + mock_create.assert_called_once() + kwargs = mock_create.call_args.kwargs + assert kwargs["tool_choice"] == {"type": "tool", "name": "submit_analysis"} + + @pytest.mark.asyncio + async def test_api_called_with_cache_control_on_system(self): + """System prompt is passed with cache_control: {type: ephemeral}.""" + resp = _make_anthropic_response(_valid_analysis_input()) + client, mock_create = _make_client(resp) + + analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1") + await analyzer.analyze( + title="Test", + description="", + published_at=datetime(2026, 5, 21, tzinfo=timezone.utc), + transcript_text="", + transcript_segments=[], + ) + + kwargs = mock_create.call_args.kwargs + system = kwargs["system"] + assert isinstance(system, list) + assert len(system) >= 1 + assert system[0]["type"] == "text" + assert system[0]["cache_control"] == {"type": "ephemeral"} + + @pytest.mark.asyncio + async def test_api_called_with_correct_model(self): + """messages.create is called with the model passed to LlmAnalyzer.""" + resp = _make_anthropic_response(_valid_analysis_input()) + client, mock_create = _make_client(resp) + + analyzer = LlmAnalyzer(client=client, model="claude-opus-4-7", prompt_version="v1") + await analyzer.analyze( + title="Test", + description="", + published_at=datetime(2026, 5, 21, tzinfo=timezone.utc), + transcript_text="", + transcript_segments=[], + ) + + kwargs = mock_create.call_args.kwargs + assert kwargs["model"] == "claude-opus-4-7" + + @pytest.mark.asyncio + async def test_api_called_with_submit_analysis_tool(self): + """Tool definition includes name='submit_analysis'.""" + resp = _make_anthropic_response(_valid_analysis_input()) + client, mock_create = _make_client(resp) + + analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1") + await analyzer.analyze( + title="Test", + description="", + published_at=datetime(2026, 5, 21, tzinfo=timezone.utc), + transcript_text="", + transcript_segments=[], + ) + + kwargs = mock_create.call_args.kwargs + tools = kwargs["tools"] + assert any(t.get("name") == "submit_analysis" for t in tools) + + @pytest.mark.asyncio + async def test_raw_response_is_captured(self): + """raw_response in LlmCallResult holds serializable dict.""" + resp = _make_anthropic_response(_valid_analysis_input()) + client, _ = _make_client(resp) + + analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1") + result = await analyzer.analyze( + title="Test", + description="", + published_at=datetime(2026, 5, 21, tzinfo=timezone.utc), + transcript_text="", + transcript_segments=[], + ) + + assert isinstance(result.raw_response, dict) + + @pytest.mark.asyncio + async def test_transcript_segments_included_in_user_message(self): + """User message contains timestamped segment lines from transcript_segments.""" + resp = _make_anthropic_response(_valid_analysis_input()) + client, mock_create = _make_client(resp) + + segments = [ + {"start": 0.0, "end": 5.0, "text": "Hello world."}, + {"start": 5.0, "end": 10.0, "text": "Let's talk stocks."}, + ] + + analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1") + await analyzer.analyze( + title="Test", + description="", + published_at=datetime(2026, 5, 21, tzinfo=timezone.utc), + transcript_text="Hello world. Let's talk stocks.", + transcript_segments=segments, + ) + + kwargs = mock_create.call_args.kwargs + user_content = kwargs["messages"][0]["content"] + # The user message should contain the segment text + assert "Hello world." in user_content + assert "Let's talk stocks." in user_content + + +# --------------------------------------------------------------------------- +# LlmAnalyzer.analyze — failure paths +# --------------------------------------------------------------------------- + + +class TestLlmAnalyzerFailurePaths: + """Failure path tests.""" + + @pytest.mark.asyncio + async def test_no_tool_use_block_raises_value_error(self): + """If response has no tool_use block, raises ValueError mentioning tool_use.""" + # Response with a text block instead of tool_use + text_block = MagicMock() + text_block.type = "text" + text_block.text = "Here is my analysis..." + + resp = MagicMock() + resp.content = [text_block] + resp.usage = MagicMock(input_tokens=5000, output_tokens=800) + resp.stop_reason = "end_turn" + + client, _ = _make_client(resp) + + analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1") + with pytest.raises(ValueError, match="tool_use"): + await analyzer.analyze( + title="Test", + description="", + published_at=datetime(2026, 5, 21, tzinfo=timezone.utc), + transcript_text="", + transcript_segments=[], + ) + + @pytest.mark.asyncio + async def test_empty_content_raises_value_error(self): + """If response content is empty, raises ValueError.""" + resp = MagicMock() + resp.content = [] + resp.usage = MagicMock(input_tokens=5000, output_tokens=800) + resp.stop_reason = "tool_use" + + client, _ = _make_client(resp) + + analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1") + with pytest.raises(ValueError): + await analyzer.analyze( + title="Test", + description="", + published_at=datetime(2026, 5, 21, tzinfo=timezone.utc), + transcript_text="", + transcript_segments=[], + ) + + @pytest.mark.asyncio + async def test_invalid_tool_input_raises_validation_error(self): + """Malformed tool input (invalid enum) raises a validation error.""" + bad_input = _valid_analysis_input() + bad_input["market_outlook_direction"] = "extremely_bullish" # not a valid enum + + resp = _make_anthropic_response(bad_input) + client, _ = _make_client(resp) + + analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1") + with pytest.raises(Exception): # pydantic ValidationError or ValueError + await analyzer.analyze( + title="Test", + description="", + published_at=datetime(2026, 5, 21, tzinfo=timezone.utc), + transcript_text="", + transcript_segments=[], + )