feat(meet-kevin): Claude Sonnet 4.6 LLM analyzer (tool-use forcing + prompt cache)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Viktor Barzin 2026-05-21 19:44:57 +00:00
parent 145f7dbec5
commit 8309556c00
2 changed files with 887 additions and 0 deletions

View file

@ -0,0 +1,427 @@
"""Claude LLM analyzer for Meet Kevin video transcripts.
Calls Claude Sonnet 4.6 with tool-use forcing to extract structured
MeetKevinAnalysis from a video transcript. Uses prompt caching on the
system block to reduce cost across videos processed within the same
5-minute window.
Public API:
SYSTEM_PROMPT module-level analyst instructions
compute_cost_usd() Decimal-precise cost from token counts
LlmCallResult frozen dataclass returned by analyze()
LlmAnalyzer async class; .analyze() does the API call
"""
import logging
from dataclasses import dataclass
from datetime import datetime
from decimal import Decimal
from typing import Any
from anthropic import AsyncAnthropic
from shared.schemas.meet_kevin import MeetKevinAnalysis
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Pricing table (USD per 1 000 000 tokens: input, output)
# ---------------------------------------------------------------------------
_PRICING: dict[str, tuple[Decimal, Decimal]] = {
"claude-sonnet-4-6": (Decimal("3"), Decimal("15")),
"claude-opus-4-7": (Decimal("15"), Decimal("75")),
"claude-haiku-4-5-20251001": (Decimal("1"), Decimal("5")),
}
# ---------------------------------------------------------------------------
# System prompt
# ---------------------------------------------------------------------------
SYSTEM_PROMPT = """
You are a professional financial analyst specialising in retail investor sentiment.
Your task is to read the full transcript of a Meet Kevin (Kevin Paffrath) YouTube
video and extract a structured investment analysis from it.
## Your mission
Read the transcript carefully and produce a single, precise call to the
`submit_analysis` tool. Do **not** respond with prose your entire output must be
that one tool call with all required fields filled in correctly.
## What to extract
### Market outlook
Identify the overall market direction Kevin is expressing: bullish, bearish, neutral,
or mixed. Write a concise `market_outlook_reasoning` (24 sentences) that explains
*why* you assigned that direction, grounded in specific statements from the video.
### Macro themes
List the 26 highest-level economic or policy themes Kevin discusses (e.g.
"Federal Reserve rate path", "AI capex cycle", "commercial real estate stress",
"dollar strength", "energy transition"). These should be phrase-length labels, not
full sentences.
### Key risks
List the 25 principal downside risks Kevin flags. Again, short phrase labels, not
paragraphs. Only include risks Kevin explicitly names or clearly implies do not
invent risks he did not discuss.
### Summary
Write a ~200-word plain-English summary of the video's investment thesis. Focus on
actionable takeaways and any specific catalysts Kevin mentions. Avoid filler phrases
like "In this video Kevin discusses…" start directly with the insight.
### Per-ticker mentions (tickers field)
Extract every stock, ETF, or crypto ticker that Kevin makes a substantive statement
about. For each one, fill in the following:
- **symbol** The uppercase ticker symbol (e.g. "NVDA", "SPY", "BTC"). If Kevin
mentions the company name but not the ticker, infer the ticker from the name (e.g.
"Nvidia" "NVDA"). Max 6 characters. Only include tickers you are confident about.
- **action** The clearest action signal you can infer from what Kevin says. Use
exactly one of: `buy`, `sell`, `hold`, `watch`, `avoid`. If Kevin expresses
interest but no clear directional view, use `watch`. If he says he is exiting or
would not touch it, use `sell` or `avoid` respectively. Do not default to `hold`
just because you are unsure skip the ticker instead.
- **conviction** A float between 0.0 and 1.0 representing how confident Kevin
sounds. Use 0.81.0 for "I'm buying this aggressively / this is my top pick",
0.50.7 for a clear directional view with some hedging, 0.20.4 for a tentative
or heavily-caveated take. A ticker Kevin mentions only in passing (< 20 words of
commentary) should be **skipped entirely** rather than assigned low conviction.
- **time_horizon** Pick the closest match from: `intraday`, `days`, `weeks`,
`months`, `long_term`, `unspecified`. If Kevin does not say, use `unspecified`.
- **rationale_quote** A short verbatim or lightly paraphrased quote (2080 words)
from the transcript that best justifies the action you assigned. Include enough
context to be meaningful on its own.
- **video_timestamp_seconds** If the transcript includes segment timestamps (lines
formatted as `[<N>s] <text>`), set this to the integer second where Kevin first
makes the substantive statement about this ticker. If no timestamps are available,
set to null.
## Rules for ticker inclusion
1. **Skip tickers mentioned only in passing.** Kevin often references tickers as
examples or comparisons without making any recommendation. If he says fewer than
~20 words about a ticker with no clear directional signal, omit it from `tickers`.
2. **Do not duplicate tickers.** If Kevin mentions the same ticker multiple times,
merge the signals into a single entry that represents his overall view from the
video. Use the timestamp of the *first* substantive mention.
3. **Symbols only, no company names.** The `symbol` field must be a ticker, not a
company name. "Nvidia" is wrong; "NVDA" is correct.
4. **Conviction scores are comparative.** Calibrate them relative to each other
within the video Kevin's "top conviction" pick in a video might be 0.85, while
a hedged mention is 0.45.
## Quality checklist (review before calling submit_analysis)
- [ ] `market_outlook_direction` is one of: bullish, neutral, bearish, mixed
- [ ] `macro_themes` has 26 items, each a concise phrase
- [ ] `key_risks` has 25 items, each a concise phrase
- [ ] `summary` is approximately 200 words
- [ ] Every ticker in `tickers` has a clear actionable signal (no "I'm not sure")
- [ ] Tickers mentioned only in passing are omitted
- [ ] `conviction` values are floats in [0.0, 1.0]
- [ ] `time_horizon` is one of the six allowed values
- [ ] `rationale_quote` is grounded in something Kevin actually said
- [ ] You are calling `submit_analysis` exactly once with all required fields
Now read the transcript provided in the user message and call `submit_analysis`.
""".strip()
# ---------------------------------------------------------------------------
# Tool definition (JSON Schema mirroring MeetKevinAnalysis)
# ---------------------------------------------------------------------------
_ANALYSIS_TOOL: dict[str, Any] = {
"name": "submit_analysis",
"description": (
"Submit a structured analysis of a Meet Kevin video transcript. "
"Call this exactly once with all fields filled in."
),
"input_schema": {
"type": "object",
"required": [
"market_outlook_direction",
"market_outlook_reasoning",
"macro_themes",
"key_risks",
"summary",
"tickers",
],
"properties": {
"market_outlook_direction": {
"type": "string",
"enum": ["bullish", "neutral", "bearish", "mixed"],
"description": "Overall market sentiment direction",
},
"market_outlook_reasoning": {
"type": "string",
"description": "2-4 sentence explanation of the market outlook direction",
},
"macro_themes": {
"type": "array",
"items": {"type": "string"},
"description": "2-6 high-level macro economic themes discussed",
},
"key_risks": {
"type": "array",
"items": {"type": "string"},
"description": "2-5 principal downside risks Kevin mentions",
},
"summary": {
"type": "string",
"description": "~200-word plain-English investment thesis summary",
},
"tickers": {
"type": "array",
"description": "Per-ticker mentions with action and conviction",
"items": {
"type": "object",
"required": [
"symbol",
"action",
"conviction",
"time_horizon",
"rationale_quote",
"video_timestamp_seconds",
],
"properties": {
"symbol": {
"type": "string",
"description": "Uppercase ticker symbol (1-6 chars)",
},
"action": {
"type": "string",
"enum": ["buy", "sell", "hold", "watch", "avoid"],
"description": "Recommendation action",
},
"conviction": {
"type": "number",
"minimum": 0.0,
"maximum": 1.0,
"description": "Confidence in recommendation (0.0-1.0)",
},
"time_horizon": {
"type": "string",
"enum": [
"intraday",
"days",
"weeks",
"months",
"long_term",
"unspecified",
],
"description": "Time horizon for the recommendation",
},
"rationale_quote": {
"type": "string",
"description": "Short verbatim or paraphrased quote from video",
},
"video_timestamp_seconds": {
"type": ["integer", "null"],
"description": "Timestamp in seconds for deep-link target",
},
},
},
},
},
},
}
# ---------------------------------------------------------------------------
# Public helpers
# ---------------------------------------------------------------------------
def compute_cost_usd(model: str, input_tokens: int, output_tokens: int) -> Decimal:
"""Compute LLM call cost in USD using pinned per-model pricing.
Args:
model: Model identifier string (must be a key in _PRICING).
input_tokens: Number of input/prompt tokens consumed.
output_tokens: Number of output/completion tokens generated.
Returns:
Cost as a Decimal. Returns Decimal("0") for unknown models (logs warning).
"""
pricing = _PRICING.get(model)
if pricing is None:
logger.warning("compute_cost_usd: unknown model %r — returning zero cost", model)
return Decimal("0")
price_per_m_input, price_per_m_output = pricing
million = Decimal("1000000")
cost = (
Decimal(input_tokens) / million * price_per_m_input
+ Decimal(output_tokens) / million * price_per_m_output
)
return cost.quantize(Decimal("0.0001"))
# ---------------------------------------------------------------------------
# Result dataclass
# ---------------------------------------------------------------------------
@dataclass(frozen=True)
class LlmCallResult:
"""Immutable result of one LLM analyze() call."""
analysis: MeetKevinAnalysis
raw_response: dict
prompt_tokens: int
completion_tokens: int
cost_usd: Decimal
# ---------------------------------------------------------------------------
# Analyzer class
# ---------------------------------------------------------------------------
_MAX_SEGMENTS = 1000
class LlmAnalyzer:
"""Calls Claude to extract structured analysis from a video transcript.
Args:
client: Configured AsyncAnthropic client.
model: Model identifier (e.g. "claude-sonnet-4-6").
prompt_version: Prompt version string stored in kevin_analyses.
"""
def __init__(self, client: AsyncAnthropic, model: str, prompt_version: str) -> None:
self._client = client
self._model = model
self._prompt_version = prompt_version
async def analyze(
self,
*,
title: str,
description: str,
published_at: datetime,
transcript_text: str,
transcript_segments: list[dict],
) -> LlmCallResult:
"""Run Claude analysis on a transcript and return a structured result.
Args:
title: Video title.
description: Video description (may be empty).
published_at: UTC publication timestamp.
transcript_text: Full concatenated transcript text.
transcript_segments: List of {start, end, text} dicts.
Returns:
LlmCallResult with parsed MeetKevinAnalysis and token accounting.
Raises:
ValueError: If the response contains no tool_use block.
pydantic.ValidationError: If tool_use input fails schema validation.
"""
user_msg = self._build_user_message(
title=title,
description=description,
published_at=published_at,
transcript_text=transcript_text,
transcript_segments=transcript_segments,
)
response = await self._client.messages.create(
model=self._model,
max_tokens=4096,
system=[
{
"type": "text",
"text": SYSTEM_PROMPT,
"cache_control": {"type": "ephemeral"},
}
],
tools=[_ANALYSIS_TOOL],
tool_choice={"type": "tool", "name": "submit_analysis"},
messages=[{"role": "user", "content": user_msg}],
)
# Find the first tool_use block
tool_block = next(
(b for b in response.content if b.type == "tool_use"),
None,
)
if tool_block is None:
raise ValueError(
f"Claude response contained no tool_use block "
f"(stop_reason={response.stop_reason!r})"
)
analysis = MeetKevinAnalysis.model_validate(tool_block.input)
prompt_tokens: int = response.usage.input_tokens
completion_tokens: int = response.usage.output_tokens
cost_usd = compute_cost_usd(self._model, prompt_tokens, completion_tokens)
raw_response: dict = {
"stop_reason": response.stop_reason,
"tool_name": tool_block.name,
"tool_input": tool_block.input,
"usage": {
"input_tokens": prompt_tokens,
"output_tokens": completion_tokens,
},
}
return LlmCallResult(
analysis=analysis,
raw_response=raw_response,
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
cost_usd=cost_usd,
)
# ------------------------------------------------------------------
# Private helpers
# ------------------------------------------------------------------
def _build_user_message(
self,
*,
title: str,
description: str,
published_at: datetime,
transcript_text: str,
transcript_segments: list[dict],
) -> str:
"""Build the user-turn message for the API call."""
parts: list[str] = [
f"Title: {title}",
f"Published: {published_at.strftime('%Y-%m-%d %H:%M UTC')}",
]
if description:
parts.append(f"Description: {description}")
parts.append("") # blank line before transcript
if transcript_segments:
# Prefer timestamped segments (up to _MAX_SEGMENTS)
segment_lines = [
f"[{int(seg.get('start', 0))}s] {seg.get('text', '').strip()}"
for seg in transcript_segments[:_MAX_SEGMENTS]
]
parts.append("Transcript (with timestamps):")
parts.extend(segment_lines)
elif transcript_text:
parts.append("Transcript:")
parts.append(transcript_text)
else:
parts.append("Transcript: (no transcript available)")
return "\n".join(parts)

View file

@ -0,0 +1,460 @@
"""Tests for the Claude LLM analyzer (Task 7).
Tests use MagicMock/AsyncMock to avoid real API calls.
"""
from datetime import datetime, timezone
from decimal import Decimal
from unittest.mock import AsyncMock, MagicMock
import pytest
from services.meet_kevin_watcher.llm_analyzer import (
SYSTEM_PROMPT,
LlmAnalyzer,
LlmCallResult,
compute_cost_usd,
)
from shared.schemas.meet_kevin import (
MarketOutlook,
MeetKevinAnalysis,
TickerAction,
TimeHorizon,
)
# ---------------------------------------------------------------------------
# Test helpers
# ---------------------------------------------------------------------------
def _make_anthropic_response(tool_input, in_tokens=5000, out_tokens=800):
"""Build a minimal mock of an Anthropic messages.create response."""
block = MagicMock()
block.type = "tool_use"
block.name = "submit_analysis"
block.input = tool_input
resp = MagicMock()
resp.content = [block]
resp.usage = MagicMock(input_tokens=in_tokens, output_tokens=out_tokens)
resp.stop_reason = "tool_use"
return resp
def _valid_analysis_input() -> dict:
"""Return a dict that Pydantic can validate into MeetKevinAnalysis."""
return {
"market_outlook_direction": "bullish",
"market_outlook_reasoning": "Strong earnings and low unemployment.",
"macro_themes": ["Fed pivot", "AI boom"],
"key_risks": ["Inflation rebound", "Credit crunch"],
"summary": "Kevin discussed the current bull market and highlighted several tech stocks.",
"tickers": [
{
"symbol": "NVDA",
"action": "buy",
"conviction": 0.85,
"time_horizon": "months",
"rationale_quote": "AI infrastructure buildout has years to run",
"video_timestamp_seconds": 320,
}
],
}
def _make_client(response=None):
"""Return a mocked AsyncAnthropic client with messages.create wired up."""
mock_create = AsyncMock(return_value=response)
mock_messages = MagicMock()
mock_messages.create = mock_create
client = MagicMock()
client.messages = mock_messages
return client, mock_create
# ---------------------------------------------------------------------------
# compute_cost_usd
# ---------------------------------------------------------------------------
class TestComputeCostUsd:
"""Verify monetary cost calculations using Decimal arithmetic."""
def test_sonnet_46_pricing(self):
"""claude-sonnet-4-6: $3/M input + $15/M output."""
# 1M input + 1M output = $3 + $15 = $18
result = compute_cost_usd("claude-sonnet-4-6", 1_000_000, 1_000_000)
assert result == Decimal("18.0000")
def test_opus_47_pricing(self):
"""claude-opus-4-7: $15/M input + $75/M output."""
result = compute_cost_usd("claude-opus-4-7", 1_000_000, 1_000_000)
assert result == Decimal("90.0000")
def test_haiku_45_pricing(self):
"""claude-haiku-4-5-20251001: $1/M input + $5/M output."""
result = compute_cost_usd("claude-haiku-4-5-20251001", 1_000_000, 1_000_000)
assert result == Decimal("6.0000")
def test_unknown_model_returns_zero(self):
"""Unknown model logs warning and returns Decimal('0')."""
result = compute_cost_usd("unknown-model", 1000, 1000)
assert result == Decimal("0")
def test_zero_tokens(self):
"""Zero tokens produce zero cost."""
result = compute_cost_usd("claude-sonnet-4-6", 0, 0)
assert result == Decimal("0")
def test_result_is_decimal(self):
"""Return type is always Decimal, not float."""
result = compute_cost_usd("claude-sonnet-4-6", 5000, 800)
assert isinstance(result, Decimal)
def test_small_realistic_call(self):
"""Realistic 10K input + 1K output token call (Sonnet 4.6)."""
# input: 10000/1_000_000 * 3 = 0.03000
# output: 1000/1_000_000 * 15 = 0.01500
# total: 0.04500
result = compute_cost_usd("claude-sonnet-4-6", 10_000, 1_000)
assert result == Decimal("0.0450")
# ---------------------------------------------------------------------------
# SYSTEM_PROMPT
# ---------------------------------------------------------------------------
class TestSystemPrompt:
"""Verify the system prompt has the required content markers."""
def test_contains_submit_analysis(self):
"""SYSTEM_PROMPT must reference the tool name 'submit_analysis'."""
assert "submit_analysis" in SYSTEM_PROMPT
def test_contains_ticker(self):
"""SYSTEM_PROMPT must mention 'ticker' (case-insensitive)."""
assert "ticker" in SYSTEM_PROMPT.lower()
def test_is_substantial(self):
"""SYSTEM_PROMPT should be at least 300 words (analyst guidance)."""
word_count = len(SYSTEM_PROMPT.split())
assert word_count >= 300, f"SYSTEM_PROMPT is only {word_count} words"
def test_mentions_conviction(self):
"""SYSTEM_PROMPT should mention conviction scoring."""
assert "conviction" in SYSTEM_PROMPT.lower()
def test_mentions_time_horizon(self):
"""SYSTEM_PROMPT should describe time_horizon field."""
assert "time_horizon" in SYSTEM_PROMPT or "time horizon" in SYSTEM_PROMPT.lower()
# ---------------------------------------------------------------------------
# LlmCallResult dataclass
# ---------------------------------------------------------------------------
class TestLlmCallResult:
"""Verify LlmCallResult shape and immutability."""
def test_is_frozen(self):
"""LlmCallResult must be a frozen dataclass."""
analysis = MeetKevinAnalysis(**_valid_analysis_input())
result = LlmCallResult(
analysis=analysis,
raw_response={"stop_reason": "tool_use"},
prompt_tokens=5000,
completion_tokens=800,
cost_usd=Decimal("0.027"),
)
with pytest.raises((AttributeError, TypeError)):
result.prompt_tokens = 9999 # type: ignore
def test_fields_accessible(self):
"""All five fields are accessible on LlmCallResult."""
analysis = MeetKevinAnalysis(**_valid_analysis_input())
cost = Decimal("0.027")
result = LlmCallResult(
analysis=analysis,
raw_response={"stop_reason": "tool_use"},
prompt_tokens=5000,
completion_tokens=800,
cost_usd=cost,
)
assert result.analysis is analysis
assert result.raw_response == {"stop_reason": "tool_use"}
assert result.prompt_tokens == 5000
assert result.completion_tokens == 800
assert result.cost_usd == cost
# ---------------------------------------------------------------------------
# LlmAnalyzer.analyze — happy path
# ---------------------------------------------------------------------------
class TestLlmAnalyzerHappyPath:
"""Happy-path tests for the analyzer."""
@pytest.mark.asyncio
async def test_returns_llm_call_result(self):
"""analyze() returns an LlmCallResult with parsed MeetKevinAnalysis."""
tool_input = _valid_analysis_input()
resp = _make_anthropic_response(tool_input, in_tokens=5000, out_tokens=800)
client, mock_create = _make_client(resp)
analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1")
result = await analyzer.analyze(
title="Market Update",
description="Kevin covers the latest market trends.",
published_at=datetime(2026, 5, 21, 12, 0, 0, tzinfo=timezone.utc),
transcript_text="Welcome to today's update. NVDA is looking strong.",
transcript_segments=[
{"start": 0.0, "end": 5.0, "text": "Welcome to today's update."},
{"start": 5.0, "end": 10.0, "text": "NVDA is looking strong."},
],
)
assert isinstance(result, LlmCallResult)
assert isinstance(result.analysis, MeetKevinAnalysis)
assert result.prompt_tokens == 5000
assert result.completion_tokens == 800
@pytest.mark.asyncio
async def test_analysis_fields_parsed_correctly(self):
"""Parsed MeetKevinAnalysis has correct field values from tool input."""
tool_input = _valid_analysis_input()
resp = _make_anthropic_response(tool_input)
client, _ = _make_client(resp)
analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1")
result = await analyzer.analyze(
title="Test Video",
description="Description",
published_at=datetime(2026, 5, 21, tzinfo=timezone.utc),
transcript_text="Some transcript.",
transcript_segments=[],
)
analysis = result.analysis
assert analysis.market_outlook_direction == MarketOutlook.BULLISH
assert analysis.market_outlook_reasoning == "Strong earnings and low unemployment."
assert "Fed pivot" in analysis.macro_themes
assert len(analysis.tickers) == 1
assert analysis.tickers[0].symbol == "NVDA"
assert analysis.tickers[0].action == TickerAction.BUY
assert analysis.tickers[0].conviction == pytest.approx(0.85)
assert analysis.tickers[0].time_horizon == TimeHorizon.MONTHS
@pytest.mark.asyncio
async def test_cost_usd_is_positive(self):
"""cost_usd is calculated and positive for a valid token count."""
resp = _make_anthropic_response(_valid_analysis_input(), in_tokens=10_000, out_tokens=1_000)
client, _ = _make_client(resp)
analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1")
result = await analyzer.analyze(
title="Test",
description="",
published_at=datetime(2026, 5, 21, tzinfo=timezone.utc),
transcript_text="",
transcript_segments=[],
)
assert result.cost_usd > Decimal("0")
@pytest.mark.asyncio
async def test_api_called_with_tool_choice_forcing(self):
"""messages.create is called with tool_choice forcing submit_analysis."""
resp = _make_anthropic_response(_valid_analysis_input())
client, mock_create = _make_client(resp)
analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1")
await analyzer.analyze(
title="Test",
description="",
published_at=datetime(2026, 5, 21, tzinfo=timezone.utc),
transcript_text="",
transcript_segments=[],
)
mock_create.assert_called_once()
kwargs = mock_create.call_args.kwargs
assert kwargs["tool_choice"] == {"type": "tool", "name": "submit_analysis"}
@pytest.mark.asyncio
async def test_api_called_with_cache_control_on_system(self):
"""System prompt is passed with cache_control: {type: ephemeral}."""
resp = _make_anthropic_response(_valid_analysis_input())
client, mock_create = _make_client(resp)
analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1")
await analyzer.analyze(
title="Test",
description="",
published_at=datetime(2026, 5, 21, tzinfo=timezone.utc),
transcript_text="",
transcript_segments=[],
)
kwargs = mock_create.call_args.kwargs
system = kwargs["system"]
assert isinstance(system, list)
assert len(system) >= 1
assert system[0]["type"] == "text"
assert system[0]["cache_control"] == {"type": "ephemeral"}
@pytest.mark.asyncio
async def test_api_called_with_correct_model(self):
"""messages.create is called with the model passed to LlmAnalyzer."""
resp = _make_anthropic_response(_valid_analysis_input())
client, mock_create = _make_client(resp)
analyzer = LlmAnalyzer(client=client, model="claude-opus-4-7", prompt_version="v1")
await analyzer.analyze(
title="Test",
description="",
published_at=datetime(2026, 5, 21, tzinfo=timezone.utc),
transcript_text="",
transcript_segments=[],
)
kwargs = mock_create.call_args.kwargs
assert kwargs["model"] == "claude-opus-4-7"
@pytest.mark.asyncio
async def test_api_called_with_submit_analysis_tool(self):
"""Tool definition includes name='submit_analysis'."""
resp = _make_anthropic_response(_valid_analysis_input())
client, mock_create = _make_client(resp)
analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1")
await analyzer.analyze(
title="Test",
description="",
published_at=datetime(2026, 5, 21, tzinfo=timezone.utc),
transcript_text="",
transcript_segments=[],
)
kwargs = mock_create.call_args.kwargs
tools = kwargs["tools"]
assert any(t.get("name") == "submit_analysis" for t in tools)
@pytest.mark.asyncio
async def test_raw_response_is_captured(self):
"""raw_response in LlmCallResult holds serializable dict."""
resp = _make_anthropic_response(_valid_analysis_input())
client, _ = _make_client(resp)
analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1")
result = await analyzer.analyze(
title="Test",
description="",
published_at=datetime(2026, 5, 21, tzinfo=timezone.utc),
transcript_text="",
transcript_segments=[],
)
assert isinstance(result.raw_response, dict)
@pytest.mark.asyncio
async def test_transcript_segments_included_in_user_message(self):
"""User message contains timestamped segment lines from transcript_segments."""
resp = _make_anthropic_response(_valid_analysis_input())
client, mock_create = _make_client(resp)
segments = [
{"start": 0.0, "end": 5.0, "text": "Hello world."},
{"start": 5.0, "end": 10.0, "text": "Let's talk stocks."},
]
analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1")
await analyzer.analyze(
title="Test",
description="",
published_at=datetime(2026, 5, 21, tzinfo=timezone.utc),
transcript_text="Hello world. Let's talk stocks.",
transcript_segments=segments,
)
kwargs = mock_create.call_args.kwargs
user_content = kwargs["messages"][0]["content"]
# The user message should contain the segment text
assert "Hello world." in user_content
assert "Let's talk stocks." in user_content
# ---------------------------------------------------------------------------
# LlmAnalyzer.analyze — failure paths
# ---------------------------------------------------------------------------
class TestLlmAnalyzerFailurePaths:
"""Failure path tests."""
@pytest.mark.asyncio
async def test_no_tool_use_block_raises_value_error(self):
"""If response has no tool_use block, raises ValueError mentioning tool_use."""
# Response with a text block instead of tool_use
text_block = MagicMock()
text_block.type = "text"
text_block.text = "Here is my analysis..."
resp = MagicMock()
resp.content = [text_block]
resp.usage = MagicMock(input_tokens=5000, output_tokens=800)
resp.stop_reason = "end_turn"
client, _ = _make_client(resp)
analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1")
with pytest.raises(ValueError, match="tool_use"):
await analyzer.analyze(
title="Test",
description="",
published_at=datetime(2026, 5, 21, tzinfo=timezone.utc),
transcript_text="",
transcript_segments=[],
)
@pytest.mark.asyncio
async def test_empty_content_raises_value_error(self):
"""If response content is empty, raises ValueError."""
resp = MagicMock()
resp.content = []
resp.usage = MagicMock(input_tokens=5000, output_tokens=800)
resp.stop_reason = "tool_use"
client, _ = _make_client(resp)
analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1")
with pytest.raises(ValueError):
await analyzer.analyze(
title="Test",
description="",
published_at=datetime(2026, 5, 21, tzinfo=timezone.utc),
transcript_text="",
transcript_segments=[],
)
@pytest.mark.asyncio
async def test_invalid_tool_input_raises_validation_error(self):
"""Malformed tool input (invalid enum) raises a validation error."""
bad_input = _valid_analysis_input()
bad_input["market_outlook_direction"] = "extremely_bullish" # not a valid enum
resp = _make_anthropic_response(bad_input)
client, _ = _make_client(resp)
analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1")
with pytest.raises(Exception): # pydantic ValidationError or ValueError
await analyzer.analyze(
title="Test",
description="",
published_at=datetime(2026, 5, 21, tzinfo=timezone.utc),
transcript_text="",
transcript_segments=[],
)