feat(meet-kevin): Claude Sonnet 4.6 LLM analyzer (tool-use forcing + prompt cache)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-21 19:44:57 +00:00 · 2026-05-21 19:44:57 +00:00 · 8309556c00
commit 8309556c00
parent 145f7dbec5
2 changed files with 887 additions and 0 deletions
--- a/services/meet_kevin_watcher/llm_analyzer.py
+++ b/services/meet_kevin_watcher/llm_analyzer.py
@ -0,0 +1,427 @@
+"""Claude LLM analyzer for Meet Kevin video transcripts.
+
+Calls Claude Sonnet 4.6 with tool-use forcing to extract structured
+MeetKevinAnalysis from a video transcript. Uses prompt caching on the
+system block to reduce cost across videos processed within the same
+5-minute window.
+
+Public API:
+  SYSTEM_PROMPT         — module-level analyst instructions
+  compute_cost_usd()    — Decimal-precise cost from token counts
+  LlmCallResult         — frozen dataclass returned by analyze()
+  LlmAnalyzer           — async class; .analyze() does the API call
+"""
+
+import logging
+from dataclasses import dataclass
+from datetime import datetime
+from decimal import Decimal
+from typing import Any
+
+from anthropic import AsyncAnthropic
+
+from shared.schemas.meet_kevin import MeetKevinAnalysis
+
+logger = logging.getLogger(__name__)
+
+# ---------------------------------------------------------------------------
+# Pricing table  (USD per 1 000 000 tokens: input, output)
+# ---------------------------------------------------------------------------
+
+_PRICING: dict[str, tuple[Decimal, Decimal]] = {
+    "claude-sonnet-4-6": (Decimal("3"), Decimal("15")),
+    "claude-opus-4-7": (Decimal("15"), Decimal("75")),
+    "claude-haiku-4-5-20251001": (Decimal("1"), Decimal("5")),
+}
+
+# ---------------------------------------------------------------------------
+# System prompt
+# ---------------------------------------------------------------------------
+
+SYSTEM_PROMPT = """
+You are a professional financial analyst specialising in retail investor sentiment.
+Your task is to read the full transcript of a Meet Kevin (Kevin Paffrath) YouTube
+video and extract a structured investment analysis from it.
+
+## Your mission
+
+Read the transcript carefully and produce a single, precise call to the
+`submit_analysis` tool. Do **not** respond with prose — your entire output must be
+that one tool call with all required fields filled in correctly.
+
+## What to extract
+
+### Market outlook
+Identify the overall market direction Kevin is expressing: bullish, bearish, neutral,
+or mixed. Write a concise `market_outlook_reasoning` (2–4 sentences) that explains
+*why* you assigned that direction, grounded in specific statements from the video.
+
+### Macro themes
+List the 2–6 highest-level economic or policy themes Kevin discusses (e.g.
+"Federal Reserve rate path", "AI capex cycle", "commercial real estate stress",
+"dollar strength", "energy transition"). These should be phrase-length labels, not
+full sentences.
+
+### Key risks
+List the 2–5 principal downside risks Kevin flags. Again, short phrase labels, not
+paragraphs. Only include risks Kevin explicitly names or clearly implies — do not
+invent risks he did not discuss.
+
+### Summary
+Write a ~200-word plain-English summary of the video's investment thesis. Focus on
+actionable takeaways and any specific catalysts Kevin mentions. Avoid filler phrases
+like "In this video Kevin discusses…" — start directly with the insight.
+
+### Per-ticker mentions (tickers field)
+Extract every stock, ETF, or crypto ticker that Kevin makes a substantive statement
+about. For each one, fill in the following:
+
+- **symbol** — The uppercase ticker symbol (e.g. "NVDA", "SPY", "BTC"). If Kevin
+  mentions the company name but not the ticker, infer the ticker from the name (e.g.
+  "Nvidia" → "NVDA"). Max 6 characters. Only include tickers you are confident about.
+
+- **action** — The clearest action signal you can infer from what Kevin says. Use
+  exactly one of: `buy`, `sell`, `hold`, `watch`, `avoid`. If Kevin expresses
+  interest but no clear directional view, use `watch`. If he says he is exiting or
+  would not touch it, use `sell` or `avoid` respectively. Do not default to `hold`
+  just because you are unsure — skip the ticker instead.
+
+- **conviction** — A float between 0.0 and 1.0 representing how confident Kevin
+  sounds. Use 0.8–1.0 for "I'm buying this aggressively / this is my top pick",
+  0.5–0.7 for a clear directional view with some hedging, 0.2–0.4 for a tentative
+  or heavily-caveated take. A ticker Kevin mentions only in passing (< 20 words of
+  commentary) should be **skipped entirely** rather than assigned low conviction.
+
+- **time_horizon** — Pick the closest match from: `intraday`, `days`, `weeks`,
+  `months`, `long_term`, `unspecified`. If Kevin does not say, use `unspecified`.
+
+- **rationale_quote** — A short verbatim or lightly paraphrased quote (20–80 words)
+  from the transcript that best justifies the action you assigned. Include enough
+  context to be meaningful on its own.
+
+- **video_timestamp_seconds** — If the transcript includes segment timestamps (lines
+  formatted as `[<N>s] <text>`), set this to the integer second where Kevin first
+  makes the substantive statement about this ticker. If no timestamps are available,
+  set to null.
+
+## Rules for ticker inclusion
+
+1. **Skip tickers mentioned only in passing.** Kevin often references tickers as
+   examples or comparisons without making any recommendation. If he says fewer than
+   ~20 words about a ticker with no clear directional signal, omit it from `tickers`.
+
+2. **Do not duplicate tickers.** If Kevin mentions the same ticker multiple times,
+   merge the signals into a single entry that represents his overall view from the
+   video. Use the timestamp of the *first* substantive mention.
+
+3. **Symbols only, no company names.** The `symbol` field must be a ticker, not a
+   company name. "Nvidia" is wrong; "NVDA" is correct.
+
+4. **Conviction scores are comparative.** Calibrate them relative to each other
+   within the video — Kevin's "top conviction" pick in a video might be 0.85, while
+   a hedged mention is 0.45.
+
+## Quality checklist (review before calling submit_analysis)
+
+- [ ] `market_outlook_direction` is one of: bullish, neutral, bearish, mixed
+- [ ] `macro_themes` has 2–6 items, each a concise phrase
+- [ ] `key_risks` has 2–5 items, each a concise phrase
+- [ ] `summary` is approximately 200 words
+- [ ] Every ticker in `tickers` has a clear actionable signal (no "I'm not sure")
+- [ ] Tickers mentioned only in passing are omitted
+- [ ] `conviction` values are floats in [0.0, 1.0]
+- [ ] `time_horizon` is one of the six allowed values
+- [ ] `rationale_quote` is grounded in something Kevin actually said
+- [ ] You are calling `submit_analysis` exactly once with all required fields
+
+Now read the transcript provided in the user message and call `submit_analysis`.
+""".strip()
+
+# ---------------------------------------------------------------------------
+# Tool definition  (JSON Schema mirroring MeetKevinAnalysis)
+# ---------------------------------------------------------------------------
+
+_ANALYSIS_TOOL: dict[str, Any] = {
+    "name": "submit_analysis",
+    "description": (
+        "Submit a structured analysis of a Meet Kevin video transcript. "
+        "Call this exactly once with all fields filled in."
+    ),
+    "input_schema": {
+        "type": "object",
+        "required": [
+            "market_outlook_direction",
+            "market_outlook_reasoning",
+            "macro_themes",
+            "key_risks",
+            "summary",
+            "tickers",
+        ],
+        "properties": {
+            "market_outlook_direction": {
+                "type": "string",
+                "enum": ["bullish", "neutral", "bearish", "mixed"],
+                "description": "Overall market sentiment direction",
+            },
+            "market_outlook_reasoning": {
+                "type": "string",
+                "description": "2-4 sentence explanation of the market outlook direction",
+            },
+            "macro_themes": {
+                "type": "array",
+                "items": {"type": "string"},
+                "description": "2-6 high-level macro economic themes discussed",
+            },
+            "key_risks": {
+                "type": "array",
+                "items": {"type": "string"},
+                "description": "2-5 principal downside risks Kevin mentions",
+            },
+            "summary": {
+                "type": "string",
+                "description": "~200-word plain-English investment thesis summary",
+            },
+            "tickers": {
+                "type": "array",
+                "description": "Per-ticker mentions with action and conviction",
+                "items": {
+                    "type": "object",
+                    "required": [
+                        "symbol",
+                        "action",
+                        "conviction",
+                        "time_horizon",
+                        "rationale_quote",
+                        "video_timestamp_seconds",
+                    ],
+                    "properties": {
+                        "symbol": {
+                            "type": "string",
+                            "description": "Uppercase ticker symbol (1-6 chars)",
+                        },
+                        "action": {
+                            "type": "string",
+                            "enum": ["buy", "sell", "hold", "watch", "avoid"],
+                            "description": "Recommendation action",
+                        },
+                        "conviction": {
+                            "type": "number",
+                            "minimum": 0.0,
+                            "maximum": 1.0,
+                            "description": "Confidence in recommendation (0.0-1.0)",
+                        },
+                        "time_horizon": {
+                            "type": "string",
+                            "enum": [
+                                "intraday",
+                                "days",
+                                "weeks",
+                                "months",
+                                "long_term",
+                                "unspecified",
+                            ],
+                            "description": "Time horizon for the recommendation",
+                        },
+                        "rationale_quote": {
+                            "type": "string",
+                            "description": "Short verbatim or paraphrased quote from video",
+                        },
+                        "video_timestamp_seconds": {
+                            "type": ["integer", "null"],
+                            "description": "Timestamp in seconds for deep-link target",
+                        },
+                    },
+                },
+            },
+        },
+    },
+}
+
+# ---------------------------------------------------------------------------
+# Public helpers
+# ---------------------------------------------------------------------------
+
+
+def compute_cost_usd(model: str, input_tokens: int, output_tokens: int) -> Decimal:
+    """Compute LLM call cost in USD using pinned per-model pricing.
+
+    Args:
+        model: Model identifier string (must be a key in _PRICING).
+        input_tokens: Number of input/prompt tokens consumed.
+        output_tokens: Number of output/completion tokens generated.
+
+    Returns:
+        Cost as a Decimal. Returns Decimal("0") for unknown models (logs warning).
+    """
+    pricing = _PRICING.get(model)
+    if pricing is None:
+        logger.warning("compute_cost_usd: unknown model %r — returning zero cost", model)
+        return Decimal("0")
+
+    price_per_m_input, price_per_m_output = pricing
+    million = Decimal("1000000")
+    cost = (
+        Decimal(input_tokens) / million * price_per_m_input
+        + Decimal(output_tokens) / million * price_per_m_output
+    )
+    return cost.quantize(Decimal("0.0001"))
+
+
+# ---------------------------------------------------------------------------
+# Result dataclass
+# ---------------------------------------------------------------------------
+
+
+@dataclass(frozen=True)
+class LlmCallResult:
+    """Immutable result of one LLM analyze() call."""
+
+    analysis: MeetKevinAnalysis
+    raw_response: dict
+    prompt_tokens: int
+    completion_tokens: int
+    cost_usd: Decimal
+
+
+# ---------------------------------------------------------------------------
+# Analyzer class
+# ---------------------------------------------------------------------------
+
+_MAX_SEGMENTS = 1000
+
+
+class LlmAnalyzer:
+    """Calls Claude to extract structured analysis from a video transcript.
+
+    Args:
+        client: Configured AsyncAnthropic client.
+        model: Model identifier (e.g. "claude-sonnet-4-6").
+        prompt_version: Prompt version string stored in kevin_analyses.
+    """
+
+    def __init__(self, client: AsyncAnthropic, model: str, prompt_version: str) -> None:
+        self._client = client
+        self._model = model
+        self._prompt_version = prompt_version
+
+    async def analyze(
+        self,
+        *,
+        title: str,
+        description: str,
+        published_at: datetime,
+        transcript_text: str,
+        transcript_segments: list[dict],
+    ) -> LlmCallResult:
+        """Run Claude analysis on a transcript and return a structured result.
+
+        Args:
+            title: Video title.
+            description: Video description (may be empty).
+            published_at: UTC publication timestamp.
+            transcript_text: Full concatenated transcript text.
+            transcript_segments: List of {start, end, text} dicts.
+
+        Returns:
+            LlmCallResult with parsed MeetKevinAnalysis and token accounting.
+
+        Raises:
+            ValueError: If the response contains no tool_use block.
+            pydantic.ValidationError: If tool_use input fails schema validation.
+        """
+        user_msg = self._build_user_message(
+            title=title,
+            description=description,
+            published_at=published_at,
+            transcript_text=transcript_text,
+            transcript_segments=transcript_segments,
+        )
+
+        response = await self._client.messages.create(
+            model=self._model,
+            max_tokens=4096,
+            system=[
+                {
+                    "type": "text",
+                    "text": SYSTEM_PROMPT,
+                    "cache_control": {"type": "ephemeral"},
+                }
+            ],
+            tools=[_ANALYSIS_TOOL],
+            tool_choice={"type": "tool", "name": "submit_analysis"},
+            messages=[{"role": "user", "content": user_msg}],
+        )
+
+        # Find the first tool_use block
+        tool_block = next(
+            (b for b in response.content if b.type == "tool_use"),
+            None,
+        )
+        if tool_block is None:
+            raise ValueError(
+                f"Claude response contained no tool_use block "
+                f"(stop_reason={response.stop_reason!r})"
+            )
+
+        analysis = MeetKevinAnalysis.model_validate(tool_block.input)
+
+        prompt_tokens: int = response.usage.input_tokens
+        completion_tokens: int = response.usage.output_tokens
+        cost_usd = compute_cost_usd(self._model, prompt_tokens, completion_tokens)
+
+        raw_response: dict = {
+            "stop_reason": response.stop_reason,
+            "tool_name": tool_block.name,
+            "tool_input": tool_block.input,
+            "usage": {
+                "input_tokens": prompt_tokens,
+                "output_tokens": completion_tokens,
+            },
+        }
+
+        return LlmCallResult(
+            analysis=analysis,
+            raw_response=raw_response,
+            prompt_tokens=prompt_tokens,
+            completion_tokens=completion_tokens,
+            cost_usd=cost_usd,
+        )
+
+    # ------------------------------------------------------------------
+    # Private helpers
+    # ------------------------------------------------------------------
+
+    def _build_user_message(
+        self,
+        *,
+        title: str,
+        description: str,
+        published_at: datetime,
+        transcript_text: str,
+        transcript_segments: list[dict],
+    ) -> str:
+        """Build the user-turn message for the API call."""
+        parts: list[str] = [
+            f"Title: {title}",
+            f"Published: {published_at.strftime('%Y-%m-%d %H:%M UTC')}",
+        ]
+        if description:
+            parts.append(f"Description: {description}")
+
+        parts.append("")  # blank line before transcript
+
+        if transcript_segments:
+            # Prefer timestamped segments (up to _MAX_SEGMENTS)
+            segment_lines = [
+                f"[{int(seg.get('start', 0))}s] {seg.get('text', '').strip()}"
+                for seg in transcript_segments[:_MAX_SEGMENTS]
+            ]
+            parts.append("Transcript (with timestamps):")
+            parts.extend(segment_lines)
+        elif transcript_text:
+            parts.append("Transcript:")
+            parts.append(transcript_text)
+        else:
+            parts.append("Transcript: (no transcript available)")
+
+        return "\n".join(parts)
--- a/tests/services/meet_kevin_watcher/test_llm_analyzer.py
+++ b/tests/services/meet_kevin_watcher/test_llm_analyzer.py
@ -0,0 +1,460 @@
+"""Tests for the Claude LLM analyzer (Task 7).
+
+Tests use MagicMock/AsyncMock to avoid real API calls.
+"""
+
+from datetime import datetime, timezone
+from decimal import Decimal
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+from services.meet_kevin_watcher.llm_analyzer import (
+    SYSTEM_PROMPT,
+    LlmAnalyzer,
+    LlmCallResult,
+    compute_cost_usd,
+)
+from shared.schemas.meet_kevin import (
+    MarketOutlook,
+    MeetKevinAnalysis,
+    TickerAction,
+    TimeHorizon,
+)
+
+
+# ---------------------------------------------------------------------------
+# Test helpers
+# ---------------------------------------------------------------------------
+
+def _make_anthropic_response(tool_input, in_tokens=5000, out_tokens=800):
+    """Build a minimal mock of an Anthropic messages.create response."""
+    block = MagicMock()
+    block.type = "tool_use"
+    block.name = "submit_analysis"
+    block.input = tool_input
+
+    resp = MagicMock()
+    resp.content = [block]
+    resp.usage = MagicMock(input_tokens=in_tokens, output_tokens=out_tokens)
+    resp.stop_reason = "tool_use"
+    return resp
+
+
+def _valid_analysis_input() -> dict:
+    """Return a dict that Pydantic can validate into MeetKevinAnalysis."""
+    return {
+        "market_outlook_direction": "bullish",
+        "market_outlook_reasoning": "Strong earnings and low unemployment.",
+        "macro_themes": ["Fed pivot", "AI boom"],
+        "key_risks": ["Inflation rebound", "Credit crunch"],
+        "summary": "Kevin discussed the current bull market and highlighted several tech stocks.",
+        "tickers": [
+            {
+                "symbol": "NVDA",
+                "action": "buy",
+                "conviction": 0.85,
+                "time_horizon": "months",
+                "rationale_quote": "AI infrastructure buildout has years to run",
+                "video_timestamp_seconds": 320,
+            }
+        ],
+    }
+
+
+def _make_client(response=None):
+    """Return a mocked AsyncAnthropic client with messages.create wired up."""
+    mock_create = AsyncMock(return_value=response)
+    mock_messages = MagicMock()
+    mock_messages.create = mock_create
+
+    client = MagicMock()
+    client.messages = mock_messages
+    return client, mock_create
+
+
+# ---------------------------------------------------------------------------
+# compute_cost_usd
+# ---------------------------------------------------------------------------
+
+
+class TestComputeCostUsd:
+    """Verify monetary cost calculations using Decimal arithmetic."""
+
+    def test_sonnet_46_pricing(self):
+        """claude-sonnet-4-6: $3/M input + $15/M output."""
+        # 1M input + 1M output = $3 + $15 = $18
+        result = compute_cost_usd("claude-sonnet-4-6", 1_000_000, 1_000_000)
+        assert result == Decimal("18.0000")
+
+    def test_opus_47_pricing(self):
+        """claude-opus-4-7: $15/M input + $75/M output."""
+        result = compute_cost_usd("claude-opus-4-7", 1_000_000, 1_000_000)
+        assert result == Decimal("90.0000")
+
+    def test_haiku_45_pricing(self):
+        """claude-haiku-4-5-20251001: $1/M input + $5/M output."""
+        result = compute_cost_usd("claude-haiku-4-5-20251001", 1_000_000, 1_000_000)
+        assert result == Decimal("6.0000")
+
+    def test_unknown_model_returns_zero(self):
+        """Unknown model logs warning and returns Decimal('0')."""
+        result = compute_cost_usd("unknown-model", 1000, 1000)
+        assert result == Decimal("0")
+
+    def test_zero_tokens(self):
+        """Zero tokens produce zero cost."""
+        result = compute_cost_usd("claude-sonnet-4-6", 0, 0)
+        assert result == Decimal("0")
+
+    def test_result_is_decimal(self):
+        """Return type is always Decimal, not float."""
+        result = compute_cost_usd("claude-sonnet-4-6", 5000, 800)
+        assert isinstance(result, Decimal)
+
+    def test_small_realistic_call(self):
+        """Realistic 10K input + 1K output token call (Sonnet 4.6)."""
+        # input: 10000/1_000_000 * 3 = 0.03000
+        # output: 1000/1_000_000 * 15 = 0.01500
+        # total: 0.04500
+        result = compute_cost_usd("claude-sonnet-4-6", 10_000, 1_000)
+        assert result == Decimal("0.0450")
+
+
+# ---------------------------------------------------------------------------
+# SYSTEM_PROMPT
+# ---------------------------------------------------------------------------
+
+
+class TestSystemPrompt:
+    """Verify the system prompt has the required content markers."""
+
+    def test_contains_submit_analysis(self):
+        """SYSTEM_PROMPT must reference the tool name 'submit_analysis'."""
+        assert "submit_analysis" in SYSTEM_PROMPT
+
+    def test_contains_ticker(self):
+        """SYSTEM_PROMPT must mention 'ticker' (case-insensitive)."""
+        assert "ticker" in SYSTEM_PROMPT.lower()
+
+    def test_is_substantial(self):
+        """SYSTEM_PROMPT should be at least 300 words (analyst guidance)."""
+        word_count = len(SYSTEM_PROMPT.split())
+        assert word_count >= 300, f"SYSTEM_PROMPT is only {word_count} words"
+
+    def test_mentions_conviction(self):
+        """SYSTEM_PROMPT should mention conviction scoring."""
+        assert "conviction" in SYSTEM_PROMPT.lower()
+
+    def test_mentions_time_horizon(self):
+        """SYSTEM_PROMPT should describe time_horizon field."""
+        assert "time_horizon" in SYSTEM_PROMPT or "time horizon" in SYSTEM_PROMPT.lower()
+
+
+# ---------------------------------------------------------------------------
+# LlmCallResult dataclass
+# ---------------------------------------------------------------------------
+
+
+class TestLlmCallResult:
+    """Verify LlmCallResult shape and immutability."""
+
+    def test_is_frozen(self):
+        """LlmCallResult must be a frozen dataclass."""
+        analysis = MeetKevinAnalysis(**_valid_analysis_input())
+        result = LlmCallResult(
+            analysis=analysis,
+            raw_response={"stop_reason": "tool_use"},
+            prompt_tokens=5000,
+            completion_tokens=800,
+            cost_usd=Decimal("0.027"),
+        )
+        with pytest.raises((AttributeError, TypeError)):
+            result.prompt_tokens = 9999  # type: ignore
+
+    def test_fields_accessible(self):
+        """All five fields are accessible on LlmCallResult."""
+        analysis = MeetKevinAnalysis(**_valid_analysis_input())
+        cost = Decimal("0.027")
+        result = LlmCallResult(
+            analysis=analysis,
+            raw_response={"stop_reason": "tool_use"},
+            prompt_tokens=5000,
+            completion_tokens=800,
+            cost_usd=cost,
+        )
+        assert result.analysis is analysis
+        assert result.raw_response == {"stop_reason": "tool_use"}
+        assert result.prompt_tokens == 5000
+        assert result.completion_tokens == 800
+        assert result.cost_usd == cost
+
+
+# ---------------------------------------------------------------------------
+# LlmAnalyzer.analyze — happy path
+# ---------------------------------------------------------------------------
+
+
+class TestLlmAnalyzerHappyPath:
+    """Happy-path tests for the analyzer."""
+
+    @pytest.mark.asyncio
+    async def test_returns_llm_call_result(self):
+        """analyze() returns an LlmCallResult with parsed MeetKevinAnalysis."""
+        tool_input = _valid_analysis_input()
+        resp = _make_anthropic_response(tool_input, in_tokens=5000, out_tokens=800)
+        client, mock_create = _make_client(resp)
+
+        analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1")
+        result = await analyzer.analyze(
+            title="Market Update",
+            description="Kevin covers the latest market trends.",
+            published_at=datetime(2026, 5, 21, 12, 0, 0, tzinfo=timezone.utc),
+            transcript_text="Welcome to today's update. NVDA is looking strong.",
+            transcript_segments=[
+                {"start": 0.0, "end": 5.0, "text": "Welcome to today's update."},
+                {"start": 5.0, "end": 10.0, "text": "NVDA is looking strong."},
+            ],
+        )
+
+        assert isinstance(result, LlmCallResult)
+        assert isinstance(result.analysis, MeetKevinAnalysis)
+        assert result.prompt_tokens == 5000
+        assert result.completion_tokens == 800
+
+    @pytest.mark.asyncio
+    async def test_analysis_fields_parsed_correctly(self):
+        """Parsed MeetKevinAnalysis has correct field values from tool input."""
+        tool_input = _valid_analysis_input()
+        resp = _make_anthropic_response(tool_input)
+        client, _ = _make_client(resp)
+
+        analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1")
+        result = await analyzer.analyze(
+            title="Test Video",
+            description="Description",
+            published_at=datetime(2026, 5, 21, tzinfo=timezone.utc),
+            transcript_text="Some transcript.",
+            transcript_segments=[],
+        )
+
+        analysis = result.analysis
+        assert analysis.market_outlook_direction == MarketOutlook.BULLISH
+        assert analysis.market_outlook_reasoning == "Strong earnings and low unemployment."
+        assert "Fed pivot" in analysis.macro_themes
+        assert len(analysis.tickers) == 1
+        assert analysis.tickers[0].symbol == "NVDA"
+        assert analysis.tickers[0].action == TickerAction.BUY
+        assert analysis.tickers[0].conviction == pytest.approx(0.85)
+        assert analysis.tickers[0].time_horizon == TimeHorizon.MONTHS
+
+    @pytest.mark.asyncio
+    async def test_cost_usd_is_positive(self):
+        """cost_usd is calculated and positive for a valid token count."""
+        resp = _make_anthropic_response(_valid_analysis_input(), in_tokens=10_000, out_tokens=1_000)
+        client, _ = _make_client(resp)
+
+        analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1")
+        result = await analyzer.analyze(
+            title="Test",
+            description="",
+            published_at=datetime(2026, 5, 21, tzinfo=timezone.utc),
+            transcript_text="",
+            transcript_segments=[],
+        )
+
+        assert result.cost_usd > Decimal("0")
+
+    @pytest.mark.asyncio
+    async def test_api_called_with_tool_choice_forcing(self):
+        """messages.create is called with tool_choice forcing submit_analysis."""
+        resp = _make_anthropic_response(_valid_analysis_input())
+        client, mock_create = _make_client(resp)
+
+        analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1")
+        await analyzer.analyze(
+            title="Test",
+            description="",
+            published_at=datetime(2026, 5, 21, tzinfo=timezone.utc),
+            transcript_text="",
+            transcript_segments=[],
+        )
+
+        mock_create.assert_called_once()
+        kwargs = mock_create.call_args.kwargs
+        assert kwargs["tool_choice"] == {"type": "tool", "name": "submit_analysis"}
+
+    @pytest.mark.asyncio
+    async def test_api_called_with_cache_control_on_system(self):
+        """System prompt is passed with cache_control: {type: ephemeral}."""
+        resp = _make_anthropic_response(_valid_analysis_input())
+        client, mock_create = _make_client(resp)
+
+        analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1")
+        await analyzer.analyze(
+            title="Test",
+            description="",
+            published_at=datetime(2026, 5, 21, tzinfo=timezone.utc),
+            transcript_text="",
+            transcript_segments=[],
+        )
+
+        kwargs = mock_create.call_args.kwargs
+        system = kwargs["system"]
+        assert isinstance(system, list)
+        assert len(system) >= 1
+        assert system[0]["type"] == "text"
+        assert system[0]["cache_control"] == {"type": "ephemeral"}
+
+    @pytest.mark.asyncio
+    async def test_api_called_with_correct_model(self):
+        """messages.create is called with the model passed to LlmAnalyzer."""
+        resp = _make_anthropic_response(_valid_analysis_input())
+        client, mock_create = _make_client(resp)
+
+        analyzer = LlmAnalyzer(client=client, model="claude-opus-4-7", prompt_version="v1")
+        await analyzer.analyze(
+            title="Test",
+            description="",
+            published_at=datetime(2026, 5, 21, tzinfo=timezone.utc),
+            transcript_text="",
+            transcript_segments=[],
+        )
+
+        kwargs = mock_create.call_args.kwargs
+        assert kwargs["model"] == "claude-opus-4-7"
+
+    @pytest.mark.asyncio
+    async def test_api_called_with_submit_analysis_tool(self):
+        """Tool definition includes name='submit_analysis'."""
+        resp = _make_anthropic_response(_valid_analysis_input())
+        client, mock_create = _make_client(resp)
+
+        analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1")
+        await analyzer.analyze(
+            title="Test",
+            description="",
+            published_at=datetime(2026, 5, 21, tzinfo=timezone.utc),
+            transcript_text="",
+            transcript_segments=[],
+        )
+
+        kwargs = mock_create.call_args.kwargs
+        tools = kwargs["tools"]
+        assert any(t.get("name") == "submit_analysis" for t in tools)
+
+    @pytest.mark.asyncio
+    async def test_raw_response_is_captured(self):
+        """raw_response in LlmCallResult holds serializable dict."""
+        resp = _make_anthropic_response(_valid_analysis_input())
+        client, _ = _make_client(resp)
+
+        analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1")
+        result = await analyzer.analyze(
+            title="Test",
+            description="",
+            published_at=datetime(2026, 5, 21, tzinfo=timezone.utc),
+            transcript_text="",
+            transcript_segments=[],
+        )
+
+        assert isinstance(result.raw_response, dict)
+
+    @pytest.mark.asyncio
+    async def test_transcript_segments_included_in_user_message(self):
+        """User message contains timestamped segment lines from transcript_segments."""
+        resp = _make_anthropic_response(_valid_analysis_input())
+        client, mock_create = _make_client(resp)
+
+        segments = [
+            {"start": 0.0, "end": 5.0, "text": "Hello world."},
+            {"start": 5.0, "end": 10.0, "text": "Let's talk stocks."},
+        ]
+
+        analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1")
+        await analyzer.analyze(
+            title="Test",
+            description="",
+            published_at=datetime(2026, 5, 21, tzinfo=timezone.utc),
+            transcript_text="Hello world. Let's talk stocks.",
+            transcript_segments=segments,
+        )
+
+        kwargs = mock_create.call_args.kwargs
+        user_content = kwargs["messages"][0]["content"]
+        # The user message should contain the segment text
+        assert "Hello world." in user_content
+        assert "Let's talk stocks." in user_content
+
+
+# ---------------------------------------------------------------------------
+# LlmAnalyzer.analyze — failure paths
+# ---------------------------------------------------------------------------
+
+
+class TestLlmAnalyzerFailurePaths:
+    """Failure path tests."""
+
+    @pytest.mark.asyncio
+    async def test_no_tool_use_block_raises_value_error(self):
+        """If response has no tool_use block, raises ValueError mentioning tool_use."""
+        # Response with a text block instead of tool_use
+        text_block = MagicMock()
+        text_block.type = "text"
+        text_block.text = "Here is my analysis..."
+
+        resp = MagicMock()
+        resp.content = [text_block]
+        resp.usage = MagicMock(input_tokens=5000, output_tokens=800)
+        resp.stop_reason = "end_turn"
+
+        client, _ = _make_client(resp)
+
+        analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1")
+        with pytest.raises(ValueError, match="tool_use"):
+            await analyzer.analyze(
+                title="Test",
+                description="",
+                published_at=datetime(2026, 5, 21, tzinfo=timezone.utc),
+                transcript_text="",
+                transcript_segments=[],
+            )
+
+    @pytest.mark.asyncio
+    async def test_empty_content_raises_value_error(self):
+        """If response content is empty, raises ValueError."""
+        resp = MagicMock()
+        resp.content = []
+        resp.usage = MagicMock(input_tokens=5000, output_tokens=800)
+        resp.stop_reason = "tool_use"
+
+        client, _ = _make_client(resp)
+
+        analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1")
+        with pytest.raises(ValueError):
+            await analyzer.analyze(
+                title="Test",
+                description="",
+                published_at=datetime(2026, 5, 21, tzinfo=timezone.utc),
+                transcript_text="",
+                transcript_segments=[],
+            )
+
+    @pytest.mark.asyncio
+    async def test_invalid_tool_input_raises_validation_error(self):
+        """Malformed tool input (invalid enum) raises a validation error."""
+        bad_input = _valid_analysis_input()
+        bad_input["market_outlook_direction"] = "extremely_bullish"  # not a valid enum
+
+        resp = _make_anthropic_response(bad_input)
+        client, _ = _make_client(resp)
+
+        analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1")
+        with pytest.raises(Exception):  # pydantic ValidationError or ValueError
+            await analyzer.analyze(
+                title="Test",
+                description="",
+                published_at=datetime(2026, 5, 21, tzinfo=timezone.utc),
+                transcript_text="",
+                transcript_segments=[],
+            )