refactor(meet-kevin): switch LLM analyzer to OpenRouter via OpenAI SDK

User's Vault has openrouter_api_key but no direct sk-ant-* Anthropic key.
OpenRouter passes through Claude Sonnet 4.6 (~3% markup over Anthropic
list pricing) and matches the existing gpt_mini_endpoint pattern used
by recruiter-responder.

- Replace anthropic.AsyncAnthropic with openai.AsyncOpenAI + base_url
- Convert Anthropic tool-use API to OpenAI function-calling
- System prompt unchanged (analyst instructions are model-agnostic)
- Drop cache_control (not in OpenAI API); revisit later if cost matters
- Model slug: anthropic/claude-sonnet-4.5 (OpenRouter's current Claude tier)
- Pricing: $3.10/M input, $15.50/M output (OpenRouter pass-through)
- Config field anthropic_api_key -> openrouter_api_key
- pyproject extras: anthropic>=0.40 -> openai>=1.50

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Viktor Barzin 2026-05-22 09:52:55 +00:00
parent 3c20c8c12c
commit 89f01ad9c0
5 changed files with 244 additions and 216 deletions

View file

@ -20,7 +20,7 @@ news = ["feedparser>=6.0", "praw>=7.7", "asyncpraw>=7.7", "httpx>=0.27"]
sentiment = ["transformers>=4.38", "torch>=2.2", "ollama>=0.1"]
trading = ["alpaca-py>=0.21", "pytz>=2024.1", "yfinance>=0.2", "httpx>=0.27"]
backtester = ["numpy>=1.26", "pandas>=2.2"]
meet_kevin = ["yt-dlp>=2025.12", "feedparser>=6.0", "anthropic>=0.40", "httpx>=0.27"]
meet_kevin = ["yt-dlp>=2025.12", "feedparser>=6.0", "openai>=1.50", "httpx>=0.27"]
dev = ["pytest>=8.0", "pytest-asyncio>=0.23", "pytest-cov>=4.1", "ruff>=0.3", "mypy>=1.8", "httpx>=0.27"]
[build-system]

View file

@ -18,12 +18,12 @@ class MeetKevinWatcherConfig(BaseConfig):
# LLM analysis settings
meet_kevin_max_llm_retries: int = 3
meet_kevin_llm_model: str = "claude-sonnet-4-6"
meet_kevin_llm_model: str = "anthropic/claude-sonnet-4.5"
meet_kevin_prompt_version: str = "v1"
meet_kevin_daily_cost_cap_usd: float = 5.0
# API credentials
anthropic_api_key: str = ""
openrouter_api_key: str = ""
# Runtime settings
meet_kevin_workdir: str = "/tmp/meet_kevin_captions"

View file

@ -1,9 +1,7 @@
"""Claude LLM analyzer for Meet Kevin video transcripts.
"""OpenRouter LLM analyzer for Meet Kevin video transcripts.
Calls Claude Sonnet 4.6 with tool-use forcing to extract structured
MeetKevinAnalysis from a video transcript. Uses prompt caching on the
system block to reduce cost across videos processed within the same
5-minute window.
Calls Claude Sonnet (via OpenRouter) with function-calling forcing to extract
structured MeetKevinAnalysis from a video transcript.
Public API:
SYSTEM_PROMPT module-level analyst instructions
@ -12,13 +10,14 @@ Public API:
LlmAnalyzer async class; .analyze() does the API call
"""
import json
import logging
from dataclasses import dataclass
from datetime import datetime
from decimal import Decimal
from typing import Any
from anthropic import AsyncAnthropic
from openai import AsyncOpenAI
from shared.schemas.meet_kevin import MeetKevinAnalysis
@ -26,12 +25,16 @@ logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Pricing table (USD per 1 000 000 tokens: input, output)
# OpenRouter pass-through pricing (~3% markup over Anthropic list)
# ---------------------------------------------------------------------------
_PRICING: dict[str, tuple[Decimal, Decimal]] = {
"claude-sonnet-4-6": (Decimal("3"), Decimal("15")),
"claude-sonnet-4-6": (Decimal("3.10"), Decimal("15.50")),
"claude-opus-4-7": (Decimal("15"), Decimal("75")),
"claude-haiku-4-5-20251001": (Decimal("1"), Decimal("5")),
# OpenRouter model slugs
"anthropic/claude-sonnet-4.5": (Decimal("3.10"), Decimal("15.50")),
"anthropic/claude-sonnet-4.6": (Decimal("3.10"), Decimal("15.50")),
}
# ---------------------------------------------------------------------------
@ -138,97 +141,99 @@ Now read the transcript provided in the user message and call `submit_analysis`.
""".strip()
# ---------------------------------------------------------------------------
# Tool definition (JSON Schema mirroring MeetKevinAnalysis)
# Tool definition (OpenAI function-calling format)
# ---------------------------------------------------------------------------
_ANALYSIS_TOOL: dict[str, Any] = {
"name": "submit_analysis",
"description": (
"Submit a structured analysis of a Meet Kevin video transcript. "
"Call this exactly once with all fields filled in."
),
"input_schema": {
"type": "object",
"required": [
"market_outlook_direction",
"market_outlook_reasoning",
"macro_themes",
"key_risks",
"summary",
"tickers",
],
"properties": {
"market_outlook_direction": {
"type": "string",
"enum": ["bullish", "neutral", "bearish", "mixed"],
"description": "Overall market sentiment direction",
},
"market_outlook_reasoning": {
"type": "string",
"description": "2-4 sentence explanation of the market outlook direction",
},
"macro_themes": {
"type": "array",
"items": {"type": "string"},
"description": "2-6 high-level macro economic themes discussed",
},
"key_risks": {
"type": "array",
"items": {"type": "string"},
"description": "2-5 principal downside risks Kevin mentions",
},
"summary": {
"type": "string",
"description": "~200-word plain-English investment thesis summary",
},
"tickers": {
"type": "array",
"description": "Per-ticker mentions with action and conviction",
"items": {
"type": "object",
"required": [
"symbol",
"action",
"conviction",
"time_horizon",
"rationale_quote",
"video_timestamp_seconds",
],
"properties": {
"symbol": {
"type": "string",
"description": "Uppercase ticker symbol (1-6 chars)",
},
"action": {
"type": "string",
"enum": ["buy", "sell", "hold", "watch", "avoid"],
"description": "Recommendation action",
},
"conviction": {
"type": "number",
"minimum": 0.0,
"maximum": 1.0,
"description": "Confidence in recommendation (0.0-1.0)",
},
"time_horizon": {
"type": "string",
"enum": [
"intraday",
"days",
"weeks",
"months",
"long_term",
"unspecified",
],
"description": "Time horizon for the recommendation",
},
"rationale_quote": {
"type": "string",
"description": "Short verbatim or paraphrased quote from video",
},
"video_timestamp_seconds": {
"type": ["integer", "null"],
"description": "Timestamp in seconds for deep-link target",
_ANALYSIS_TOOL_OPENAI: dict[str, Any] = {
"type": "function",
"function": {
"name": "submit_analysis",
"description": (
"Submit the structured analysis of one Meet Kevin video. Call this exactly once."
),
"parameters": {
"type": "object",
"required": [
"market_outlook_direction",
"market_outlook_reasoning",
"macro_themes",
"key_risks",
"summary",
"tickers",
],
"properties": {
"market_outlook_direction": {
"type": "string",
"enum": ["bullish", "neutral", "bearish", "mixed"],
"description": "Overall market sentiment direction",
},
"market_outlook_reasoning": {
"type": "string",
"description": "2-4 sentence explanation of the market outlook direction",
},
"macro_themes": {
"type": "array",
"items": {"type": "string"},
"description": "2-6 high-level macro economic themes discussed",
},
"key_risks": {
"type": "array",
"items": {"type": "string"},
"description": "2-5 principal downside risks Kevin mentions",
},
"summary": {
"type": "string",
"description": "~200-word plain-English investment thesis summary",
},
"tickers": {
"type": "array",
"description": "Per-ticker mentions with action and conviction",
"items": {
"type": "object",
"required": [
"symbol",
"action",
"conviction",
"time_horizon",
"rationale_quote",
"video_timestamp_seconds",
],
"properties": {
"symbol": {
"type": "string",
"description": "Uppercase ticker symbol (1-6 chars)",
},
"action": {
"type": "string",
"enum": ["buy", "sell", "hold", "watch", "avoid"],
"description": "Recommendation action",
},
"conviction": {
"type": "number",
"minimum": 0.0,
"maximum": 1.0,
"description": "Confidence in recommendation (0.0-1.0)",
},
"time_horizon": {
"type": "string",
"enum": [
"intraday",
"days",
"weeks",
"months",
"long_term",
"unspecified",
],
"description": "Time horizon for the recommendation",
},
"rationale_quote": {
"type": "string",
"description": "Short verbatim or paraphrased quote from video",
},
"video_timestamp_seconds": {
"type": ["integer", "null"],
"description": "Timestamp in seconds for deep-link target",
},
},
},
},
@ -291,15 +296,15 @@ _MAX_SEGMENTS = 1000
class LlmAnalyzer:
"""Calls Claude to extract structured analysis from a video transcript.
"""Calls Claude (via OpenRouter) to extract structured analysis from a video transcript.
Args:
client: Configured AsyncAnthropic client.
model: Model identifier (e.g. "claude-sonnet-4-6").
client: Configured AsyncOpenAI client pointed at OpenRouter.
model: Model identifier (e.g. "anthropic/claude-sonnet-4.5").
prompt_version: Prompt version string stored in kevin_analyses.
"""
def __init__(self, client: AsyncAnthropic, model: str, prompt_version: str) -> None:
def __init__(self, client: AsyncOpenAI, model: str, prompt_version: str) -> None:
self._client = client
self._model = model
self._prompt_version = prompt_version
@ -313,7 +318,7 @@ class LlmAnalyzer:
transcript_text: str,
transcript_segments: list[dict],
) -> LlmCallResult:
"""Run Claude analysis on a transcript and return a structured result.
"""Run LLM analysis on a transcript and return a structured result.
Args:
title: Video title.
@ -326,8 +331,8 @@ class LlmAnalyzer:
LlmCallResult with parsed MeetKevinAnalysis and token accounting.
Raises:
ValueError: If the response contains no tool_use block.
pydantic.ValidationError: If tool_use input fails schema validation.
ValueError: If the response contains no tool_calls.
pydantic.ValidationError: If function arguments fail schema validation.
"""
user_msg = self._build_user_message(
title=title,
@ -337,42 +342,35 @@ class LlmAnalyzer:
transcript_segments=transcript_segments,
)
response = await self._client.messages.create(
response = await self._client.chat.completions.create(
model=self._model,
max_tokens=4096,
system=[
{
"type": "text",
"text": SYSTEM_PROMPT,
"cache_control": {"type": "ephemeral"},
}
messages=[
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": user_msg},
],
tools=[_ANALYSIS_TOOL],
tool_choice={"type": "tool", "name": "submit_analysis"},
messages=[{"role": "user", "content": user_msg}],
tools=[_ANALYSIS_TOOL_OPENAI],
tool_choice={"type": "function", "function": {"name": "submit_analysis"}},
)
# Find the first tool_use block
tool_block = next(
(b for b in response.content if b.type == "tool_use"),
None,
)
if tool_block is None:
message = response.choices[0].message
if not message.tool_calls:
raise ValueError(
f"Claude response contained no tool_use block "
f"(stop_reason={response.stop_reason!r})"
"LLM response contained no tool_calls (expected submit_analysis function call)"
)
analysis = MeetKevinAnalysis.model_validate(tool_block.input)
tool_call = message.tool_calls[0]
tool_input = json.loads(tool_call.function.arguments)
analysis = MeetKevinAnalysis.model_validate(tool_input)
prompt_tokens: int = response.usage.input_tokens
completion_tokens: int = response.usage.output_tokens
prompt_tokens: int = response.usage.prompt_tokens
completion_tokens: int = response.usage.completion_tokens
cost_usd = compute_cost_usd(self._model, prompt_tokens, completion_tokens)
raw_response: dict = {
"stop_reason": response.stop_reason,
"tool_name": tool_block.name,
"tool_input": tool_block.input,
"finish_reason": response.choices[0].finish_reason,
"tool_name": tool_call.function.name,
"tool_input": tool_input,
"usage": {
"input_tokens": prompt_tokens,
"output_tokens": completion_tokens,

View file

@ -16,7 +16,7 @@ from datetime import timezone
from decimal import Decimal
import httpx
from anthropic import AsyncAnthropic
from openai import AsyncOpenAI
from sqlalchemy import select
from sqlalchemy.dialects.postgresql import insert as pg_insert
@ -179,10 +179,17 @@ async def run() -> None:
# Database
engine, session_factory = create_db(config)
# Anthropic client + LLM analyzer
anthropic = AsyncAnthropic(api_key=config.anthropic_api_key)
# OpenRouter client + LLM analyzer
client = AsyncOpenAI(
api_key=config.openrouter_api_key,
base_url="https://openrouter.ai/api/v1",
default_headers={
"HTTP-Referer": "https://trading.viktorbarzin.me",
"X-Title": "trading-bot meet-kevin",
},
)
analyzer = LlmAnalyzer(
client=anthropic,
client=client,
model=config.meet_kevin_llm_model,
prompt_version=config.meet_kevin_prompt_version,
)
@ -241,7 +248,7 @@ async def run() -> None:
except asyncio.TimeoutError:
pass # Normal timeout — loop again
finally:
await anthropic.close()
await client.close()
await engine.dispose()
logger.info("meet-kevin-watcher stopped gracefully")

View file

@ -1,8 +1,9 @@
"""Tests for the Claude LLM analyzer (Task 7).
"""Tests for the OpenRouter LLM analyzer (Task 7).
Tests use MagicMock/AsyncMock to avoid real API calls.
"""
import json
from datetime import datetime, timezone
from decimal import Decimal
from unittest.mock import AsyncMock, MagicMock
@ -27,17 +28,23 @@ from shared.schemas.meet_kevin import (
# Test helpers
# ---------------------------------------------------------------------------
def _make_anthropic_response(tool_input, in_tokens=5000, out_tokens=800):
"""Build a minimal mock of an Anthropic messages.create response."""
block = MagicMock()
block.type = "tool_use"
block.name = "submit_analysis"
block.input = tool_input
def _make_openai_response(tool_args: dict, in_tokens: int = 5000, out_tokens: int = 800):
"""Mock an OpenAI ChatCompletion response with one tool_call."""
tool_call = MagicMock()
tool_call.function = MagicMock()
tool_call.function.name = "submit_analysis"
tool_call.function.arguments = json.dumps(tool_args)
msg = MagicMock()
msg.tool_calls = [tool_call]
choice = MagicMock()
choice.message = msg
choice.finish_reason = "tool_calls"
resp = MagicMock()
resp.content = [block]
resp.usage = MagicMock(input_tokens=in_tokens, output_tokens=out_tokens)
resp.stop_reason = "tool_use"
resp.choices = [choice]
resp.usage = MagicMock(prompt_tokens=in_tokens, completion_tokens=out_tokens)
return resp
@ -63,13 +70,15 @@ def _valid_analysis_input() -> dict:
def _make_client(response=None):
"""Return a mocked AsyncAnthropic client with messages.create wired up."""
"""Return a mocked AsyncOpenAI client with chat.completions.create wired up."""
mock_create = AsyncMock(return_value=response)
mock_messages = MagicMock()
mock_messages.create = mock_create
mock_completions = MagicMock()
mock_completions.create = mock_create
mock_chat = MagicMock()
mock_chat.completions = mock_completions
client = MagicMock()
client.messages = mock_messages
client.chat = mock_chat
return client, mock_create
@ -81,11 +90,16 @@ def _make_client(response=None):
class TestComputeCostUsd:
"""Verify monetary cost calculations using Decimal arithmetic."""
def test_sonnet_46_pricing(self):
"""claude-sonnet-4-6: $3/M input + $15/M output."""
# 1M input + 1M output = $3 + $15 = $18
def test_sonnet_45_openrouter_pricing(self):
"""anthropic/claude-sonnet-4.5: $3.10/M input + $15.50/M output."""
# 1M input + 1M output = $3.10 + $15.50 = $18.60
result = compute_cost_usd("anthropic/claude-sonnet-4.5", 1_000_000, 1_000_000)
assert result == Decimal("18.6000")
def test_sonnet_46_legacy_slug(self):
"""claude-sonnet-4-6 (legacy slug) is also priced at $3.10/$15.50."""
result = compute_cost_usd("claude-sonnet-4-6", 1_000_000, 1_000_000)
assert result == Decimal("18.0000")
assert result == Decimal("18.6000")
def test_opus_47_pricing(self):
"""claude-opus-4-7: $15/M input + $75/M output."""
@ -104,21 +118,21 @@ class TestComputeCostUsd:
def test_zero_tokens(self):
"""Zero tokens produce zero cost."""
result = compute_cost_usd("claude-sonnet-4-6", 0, 0)
result = compute_cost_usd("anthropic/claude-sonnet-4.5", 0, 0)
assert result == Decimal("0")
def test_result_is_decimal(self):
"""Return type is always Decimal, not float."""
result = compute_cost_usd("claude-sonnet-4-6", 5000, 800)
result = compute_cost_usd("anthropic/claude-sonnet-4.5", 5000, 800)
assert isinstance(result, Decimal)
def test_small_realistic_call(self):
"""Realistic 10K input + 1K output token call (Sonnet 4.6)."""
# input: 10000/1_000_000 * 3 = 0.03000
# output: 1000/1_000_000 * 15 = 0.01500
# total: 0.04500
result = compute_cost_usd("claude-sonnet-4-6", 10_000, 1_000)
assert result == Decimal("0.0450")
"""Realistic 10K input + 1K output token call (Sonnet 4.5 via OpenRouter)."""
# input: 10000/1_000_000 * 3.10 = 0.03100
# output: 1000/1_000_000 * 15.50 = 0.01550
# total: 0.04650
result = compute_cost_usd("anthropic/claude-sonnet-4.5", 10_000, 1_000)
assert result == Decimal("0.0465")
# ---------------------------------------------------------------------------
@ -164,7 +178,7 @@ class TestLlmCallResult:
analysis = MeetKevinAnalysis(**_valid_analysis_input())
result = LlmCallResult(
analysis=analysis,
raw_response={"stop_reason": "tool_use"},
raw_response={"finish_reason": "tool_calls"},
prompt_tokens=5000,
completion_tokens=800,
cost_usd=Decimal("0.027"),
@ -178,13 +192,13 @@ class TestLlmCallResult:
cost = Decimal("0.027")
result = LlmCallResult(
analysis=analysis,
raw_response={"stop_reason": "tool_use"},
raw_response={"finish_reason": "tool_calls"},
prompt_tokens=5000,
completion_tokens=800,
cost_usd=cost,
)
assert result.analysis is analysis
assert result.raw_response == {"stop_reason": "tool_use"}
assert result.raw_response == {"finish_reason": "tool_calls"}
assert result.prompt_tokens == 5000
assert result.completion_tokens == 800
assert result.cost_usd == cost
@ -202,10 +216,10 @@ class TestLlmAnalyzerHappyPath:
async def test_returns_llm_call_result(self):
"""analyze() returns an LlmCallResult with parsed MeetKevinAnalysis."""
tool_input = _valid_analysis_input()
resp = _make_anthropic_response(tool_input, in_tokens=5000, out_tokens=800)
resp = _make_openai_response(tool_input, in_tokens=5000, out_tokens=800)
client, mock_create = _make_client(resp)
analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1")
analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1")
result = await analyzer.analyze(
title="Market Update",
description="Kevin covers the latest market trends.",
@ -226,10 +240,10 @@ class TestLlmAnalyzerHappyPath:
async def test_analysis_fields_parsed_correctly(self):
"""Parsed MeetKevinAnalysis has correct field values from tool input."""
tool_input = _valid_analysis_input()
resp = _make_anthropic_response(tool_input)
resp = _make_openai_response(tool_input)
client, _ = _make_client(resp)
analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1")
analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1")
result = await analyzer.analyze(
title="Test Video",
description="Description",
@ -251,10 +265,10 @@ class TestLlmAnalyzerHappyPath:
@pytest.mark.asyncio
async def test_cost_usd_is_positive(self):
"""cost_usd is calculated and positive for a valid token count."""
resp = _make_anthropic_response(_valid_analysis_input(), in_tokens=10_000, out_tokens=1_000)
resp = _make_openai_response(_valid_analysis_input(), in_tokens=10_000, out_tokens=1_000)
client, _ = _make_client(resp)
analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1")
analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1")
result = await analyzer.analyze(
title="Test",
description="",
@ -267,11 +281,11 @@ class TestLlmAnalyzerHappyPath:
@pytest.mark.asyncio
async def test_api_called_with_tool_choice_forcing(self):
"""messages.create is called with tool_choice forcing submit_analysis."""
resp = _make_anthropic_response(_valid_analysis_input())
"""chat.completions.create is called with tool_choice forcing submit_analysis."""
resp = _make_openai_response(_valid_analysis_input())
client, mock_create = _make_client(resp)
analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1")
analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1")
await analyzer.analyze(
title="Test",
description="",
@ -282,15 +296,15 @@ class TestLlmAnalyzerHappyPath:
mock_create.assert_called_once()
kwargs = mock_create.call_args.kwargs
assert kwargs["tool_choice"] == {"type": "tool", "name": "submit_analysis"}
assert kwargs["tool_choice"] == {"type": "function", "function": {"name": "submit_analysis"}}
@pytest.mark.asyncio
async def test_api_called_with_cache_control_on_system(self):
"""System prompt is passed with cache_control: {type: ephemeral}."""
resp = _make_anthropic_response(_valid_analysis_input())
async def test_api_called_with_system_prompt_in_messages(self):
"""System prompt is passed as a system role message in the messages list."""
resp = _make_openai_response(_valid_analysis_input())
client, mock_create = _make_client(resp)
analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1")
analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1")
await analyzer.analyze(
title="Test",
description="",
@ -300,19 +314,18 @@ class TestLlmAnalyzerHappyPath:
)
kwargs = mock_create.call_args.kwargs
system = kwargs["system"]
assert isinstance(system, list)
assert len(system) >= 1
assert system[0]["type"] == "text"
assert system[0]["cache_control"] == {"type": "ephemeral"}
messages = kwargs["messages"]
assert isinstance(messages, list)
assert messages[0]["role"] == "system"
assert SYSTEM_PROMPT in messages[0]["content"]
@pytest.mark.asyncio
async def test_api_called_with_correct_model(self):
"""messages.create is called with the model passed to LlmAnalyzer."""
resp = _make_anthropic_response(_valid_analysis_input())
"""chat.completions.create is called with the model passed to LlmAnalyzer."""
resp = _make_openai_response(_valid_analysis_input())
client, mock_create = _make_client(resp)
analyzer = LlmAnalyzer(client=client, model="claude-opus-4-7", prompt_version="v1")
analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1")
await analyzer.analyze(
title="Test",
description="",
@ -322,15 +335,15 @@ class TestLlmAnalyzerHappyPath:
)
kwargs = mock_create.call_args.kwargs
assert kwargs["model"] == "claude-opus-4-7"
assert kwargs["model"] == "anthropic/claude-sonnet-4.5"
@pytest.mark.asyncio
async def test_api_called_with_submit_analysis_tool(self):
"""Tool definition includes name='submit_analysis'."""
resp = _make_anthropic_response(_valid_analysis_input())
"""Tool definition includes function name 'submit_analysis'."""
resp = _make_openai_response(_valid_analysis_input())
client, mock_create = _make_client(resp)
analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1")
analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1")
await analyzer.analyze(
title="Test",
description="",
@ -341,15 +354,18 @@ class TestLlmAnalyzerHappyPath:
kwargs = mock_create.call_args.kwargs
tools = kwargs["tools"]
assert any(t.get("name") == "submit_analysis" for t in tools)
assert any(
t.get("type") == "function" and t.get("function", {}).get("name") == "submit_analysis"
for t in tools
)
@pytest.mark.asyncio
async def test_raw_response_is_captured(self):
"""raw_response in LlmCallResult holds serializable dict."""
resp = _make_anthropic_response(_valid_analysis_input())
resp = _make_openai_response(_valid_analysis_input())
client, _ = _make_client(resp)
analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1")
analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1")
result = await analyzer.analyze(
title="Test",
description="",
@ -363,7 +379,7 @@ class TestLlmAnalyzerHappyPath:
@pytest.mark.asyncio
async def test_transcript_segments_included_in_user_message(self):
"""User message contains timestamped segment lines from transcript_segments."""
resp = _make_anthropic_response(_valid_analysis_input())
resp = _make_openai_response(_valid_analysis_input())
client, mock_create = _make_client(resp)
segments = [
@ -371,7 +387,7 @@ class TestLlmAnalyzerHappyPath:
{"start": 5.0, "end": 10.0, "text": "Let's talk stocks."},
]
analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1")
analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1")
await analyzer.analyze(
title="Test",
description="",
@ -381,8 +397,8 @@ class TestLlmAnalyzerHappyPath:
)
kwargs = mock_create.call_args.kwargs
user_content = kwargs["messages"][0]["content"]
# The user message should contain the segment text
# user message is the second entry in messages list
user_content = kwargs["messages"][1]["content"]
assert "Hello world." in user_content
assert "Let's talk stocks." in user_content
@ -396,22 +412,23 @@ class TestLlmAnalyzerFailurePaths:
"""Failure path tests."""
@pytest.mark.asyncio
async def test_no_tool_use_block_raises_value_error(self):
"""If response has no tool_use block, raises ValueError mentioning tool_use."""
# Response with a text block instead of tool_use
text_block = MagicMock()
text_block.type = "text"
text_block.text = "Here is my analysis..."
async def test_no_tool_calls_raises_value_error(self):
"""If response message has no tool_calls, raises ValueError."""
msg = MagicMock()
msg.tool_calls = None
choice = MagicMock()
choice.message = msg
choice.finish_reason = "stop"
resp = MagicMock()
resp.content = [text_block]
resp.usage = MagicMock(input_tokens=5000, output_tokens=800)
resp.stop_reason = "end_turn"
resp.choices = [choice]
resp.usage = MagicMock(prompt_tokens=5000, completion_tokens=800)
client, _ = _make_client(resp)
analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1")
with pytest.raises(ValueError, match="tool_use"):
analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1")
with pytest.raises(ValueError):
await analyzer.analyze(
title="Test",
description="",
@ -421,16 +438,22 @@ class TestLlmAnalyzerFailurePaths:
)
@pytest.mark.asyncio
async def test_empty_content_raises_value_error(self):
"""If response content is empty, raises ValueError."""
async def test_empty_tool_calls_raises_value_error(self):
"""If response message has empty tool_calls list, raises ValueError."""
msg = MagicMock()
msg.tool_calls = []
choice = MagicMock()
choice.message = msg
choice.finish_reason = "stop"
resp = MagicMock()
resp.content = []
resp.usage = MagicMock(input_tokens=5000, output_tokens=800)
resp.stop_reason = "tool_use"
resp.choices = [choice]
resp.usage = MagicMock(prompt_tokens=5000, completion_tokens=800)
client, _ = _make_client(resp)
analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1")
analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1")
with pytest.raises(ValueError):
await analyzer.analyze(
title="Test",
@ -446,10 +469,10 @@ class TestLlmAnalyzerFailurePaths:
bad_input = _valid_analysis_input()
bad_input["market_outlook_direction"] = "extremely_bullish" # not a valid enum
resp = _make_anthropic_response(bad_input)
resp = _make_openai_response(bad_input)
client, _ = _make_client(resp)
analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1")
analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1")
with pytest.raises(Exception): # pydantic ValidationError or ValueError
await analyzer.analyze(
title="Test",