From 8a1d03a9674f26c29251445f272c63e054e3c371 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Fri, 22 May 2026 19:24:40 +0000 Subject: [PATCH] refactor(meet-kevin): switch LLM back to native Anthropic SDK with OAuth bearer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previous refactor (89f01ad) moved to OpenRouter because no sk-ant-api-* key was found in Vault. Turns out claude-agent-service-spare-{1,2} hold sk-ant-oat01-* OAuth tokens (108 chars, scope user:inference, 1-year TTL, minted via 'claude setup-token' — see memory id=832). These tokens work with the Anthropic SDK via the auth_token= constructor argument (routes to Authorization: Bearer ... instead of x-api-key: ...). They consume the Enterprise Claude subscription quota rather than per-call billing, so the OpenRouter zero-credit problem goes away. - llm_analyzer.py: revert OpenAI client to AsyncAnthropic; tool-use API + cache_control restored - config.py: openrouter_api_key -> anthropic_oauth_token; model slug reverted from anthropic/claude-sonnet-4.5 -> claude-sonnet-4-5 - main.py: AsyncOpenAI -> AsyncAnthropic(auth_token=...), drop OpenRouter attribution headers - pyproject: openai>=1.50 -> anthropic>=0.40 in meet_kevin extras - tests: mocks ported back to messages.create + tool_use blocks --- pyproject.toml | 2 +- services/meet_kevin_watcher/config.py | 4 +- services/meet_kevin_watcher/llm_analyzer.py | 245 +++++++++--------- services/meet_kevin_watcher/main.py | 13 +- .../meet_kevin_watcher/test_llm_analyzer.py | 182 ++++++------- 5 files changed, 211 insertions(+), 235 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index da170e6..36be7f3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,7 +20,7 @@ news = ["feedparser>=6.0", "praw>=7.7", "asyncpraw>=7.7", "httpx>=0.27"] sentiment = ["transformers>=4.38", "torch>=2.2", "ollama>=0.1"] trading = ["alpaca-py>=0.21", "pytz>=2024.1", "yfinance>=0.2", "httpx>=0.27"] backtester = ["numpy>=1.26", "pandas>=2.2"] -meet_kevin = ["yt-dlp>=2025.12", "feedparser>=6.0", "openai>=1.50", "httpx>=0.27"] +meet_kevin = ["yt-dlp>=2025.12", "feedparser>=6.0", "anthropic>=0.40", "httpx>=0.27"] dev = ["pytest>=8.0", "pytest-asyncio>=0.23", "pytest-cov>=4.1", "ruff>=0.3", "mypy>=1.8", "httpx>=0.27"] [build-system] diff --git a/services/meet_kevin_watcher/config.py b/services/meet_kevin_watcher/config.py index 229ca34..99d55e9 100644 --- a/services/meet_kevin_watcher/config.py +++ b/services/meet_kevin_watcher/config.py @@ -18,12 +18,12 @@ class MeetKevinWatcherConfig(BaseConfig): # LLM analysis settings meet_kevin_max_llm_retries: int = 3 - meet_kevin_llm_model: str = "anthropic/claude-sonnet-4.5" + meet_kevin_llm_model: str = "claude-sonnet-4-5" meet_kevin_prompt_version: str = "v1" meet_kevin_daily_cost_cap_usd: float = 5.0 # API credentials - openrouter_api_key: str = "" + anthropic_oauth_token: str = "" # Runtime settings meet_kevin_workdir: str = "/tmp/meet_kevin_captions" diff --git a/services/meet_kevin_watcher/llm_analyzer.py b/services/meet_kevin_watcher/llm_analyzer.py index 35008e6..9ccc655 100644 --- a/services/meet_kevin_watcher/llm_analyzer.py +++ b/services/meet_kevin_watcher/llm_analyzer.py @@ -1,7 +1,7 @@ -"""OpenRouter LLM analyzer for Meet Kevin video transcripts. +"""Anthropic SDK LLM analyzer for Meet Kevin video transcripts. -Calls Claude Sonnet (via OpenRouter) with function-calling forcing to extract -structured MeetKevinAnalysis from a video transcript. +Calls Claude Sonnet (via native Anthropic SDK with OAuth bearer token) with +tool-use forcing to extract structured MeetKevinAnalysis from a video transcript. Public API: SYSTEM_PROMPT — module-level analyst instructions @@ -10,14 +10,13 @@ Public API: LlmAnalyzer — async class; .analyze() does the API call """ -import json import logging from dataclasses import dataclass from datetime import datetime from decimal import Decimal from typing import Any -from openai import AsyncOpenAI +from anthropic import AsyncAnthropic from shared.schemas.meet_kevin import MeetKevinAnalysis @@ -25,16 +24,16 @@ logger = logging.getLogger(__name__) # --------------------------------------------------------------------------- # Pricing table (USD per 1 000 000 tokens: input, output) -# OpenRouter pass-through pricing (~3% markup over Anthropic list) +# Native Anthropic list pricing. With OAuth/Enterprise tokens real billing +# is via subscription quota, but we still compute notional USD for the +# daily-cap accounting logic. # --------------------------------------------------------------------------- _PRICING: dict[str, tuple[Decimal, Decimal]] = { - "claude-sonnet-4-6": (Decimal("3.10"), Decimal("15.50")), + "claude-sonnet-4-5": (Decimal("3"), Decimal("15")), + "claude-sonnet-4-6": (Decimal("3"), Decimal("15")), "claude-opus-4-7": (Decimal("15"), Decimal("75")), "claude-haiku-4-5-20251001": (Decimal("1"), Decimal("5")), - # OpenRouter model slugs - "anthropic/claude-sonnet-4.5": (Decimal("3.10"), Decimal("15.50")), - "anthropic/claude-sonnet-4.6": (Decimal("3.10"), Decimal("15.50")), } # --------------------------------------------------------------------------- @@ -141,99 +140,96 @@ Now read the transcript provided in the user message and call `submit_analysis`. """.strip() # --------------------------------------------------------------------------- -# Tool definition (OpenAI function-calling format) +# Tool definition (Anthropic tool-use format) # --------------------------------------------------------------------------- -_ANALYSIS_TOOL_OPENAI: dict[str, Any] = { - "type": "function", - "function": { - "name": "submit_analysis", - "description": ( - "Submit the structured analysis of one Meet Kevin video. Call this exactly once." - ), - "parameters": { - "type": "object", - "required": [ - "market_outlook_direction", - "market_outlook_reasoning", - "macro_themes", - "key_risks", - "summary", - "tickers", - ], - "properties": { - "market_outlook_direction": { - "type": "string", - "enum": ["bullish", "neutral", "bearish", "mixed"], - "description": "Overall market sentiment direction", - }, - "market_outlook_reasoning": { - "type": "string", - "description": "2-4 sentence explanation of the market outlook direction", - }, - "macro_themes": { - "type": "array", - "items": {"type": "string"}, - "description": "2-6 high-level macro economic themes discussed", - }, - "key_risks": { - "type": "array", - "items": {"type": "string"}, - "description": "2-5 principal downside risks Kevin mentions", - }, - "summary": { - "type": "string", - "description": "~200-word plain-English investment thesis summary", - }, - "tickers": { - "type": "array", - "description": "Per-ticker mentions with action and conviction", - "items": { - "type": "object", - "required": [ - "symbol", - "action", - "conviction", - "time_horizon", - "rationale_quote", - "video_timestamp_seconds", - ], - "properties": { - "symbol": { - "type": "string", - "description": "Uppercase ticker symbol (1-6 chars)", - }, - "action": { - "type": "string", - "enum": ["buy", "sell", "hold", "watch", "avoid"], - "description": "Recommendation action", - }, - "conviction": { - "type": "number", - "minimum": 0.0, - "maximum": 1.0, - "description": "Confidence in recommendation (0.0-1.0)", - }, - "time_horizon": { - "type": "string", - "enum": [ - "intraday", - "days", - "weeks", - "months", - "long_term", - "unspecified", - ], - "description": "Time horizon for the recommendation", - }, - "rationale_quote": { - "type": "string", - "description": "Short verbatim or paraphrased quote from video", - }, - "video_timestamp_seconds": { - "type": ["integer", "null"], - "description": "Timestamp in seconds for deep-link target", - }, +_ANALYSIS_TOOL: dict[str, Any] = { + "name": "submit_analysis", + "description": ( + "Submit the structured analysis of one Meet Kevin video. Call this exactly once." + ), + "input_schema": { + "type": "object", + "required": [ + "market_outlook_direction", + "market_outlook_reasoning", + "macro_themes", + "key_risks", + "summary", + "tickers", + ], + "properties": { + "market_outlook_direction": { + "type": "string", + "enum": ["bullish", "neutral", "bearish", "mixed"], + "description": "Overall market sentiment direction", + }, + "market_outlook_reasoning": { + "type": "string", + "description": "2-4 sentence explanation of the market outlook direction", + }, + "macro_themes": { + "type": "array", + "items": {"type": "string"}, + "description": "2-6 high-level macro economic themes discussed", + }, + "key_risks": { + "type": "array", + "items": {"type": "string"}, + "description": "2-5 principal downside risks Kevin mentions", + }, + "summary": { + "type": "string", + "description": "~200-word plain-English investment thesis summary", + }, + "tickers": { + "type": "array", + "description": "Per-ticker mentions with action and conviction", + "items": { + "type": "object", + "required": [ + "symbol", + "action", + "conviction", + "time_horizon", + "rationale_quote", + "video_timestamp_seconds", + ], + "properties": { + "symbol": { + "type": "string", + "description": "Uppercase ticker symbol (1-6 chars)", + }, + "action": { + "type": "string", + "enum": ["buy", "sell", "hold", "watch", "avoid"], + "description": "Recommendation action", + }, + "conviction": { + "type": "number", + "minimum": 0.0, + "maximum": 1.0, + "description": "Confidence in recommendation (0.0-1.0)", + }, + "time_horizon": { + "type": "string", + "enum": [ + "intraday", + "days", + "weeks", + "months", + "long_term", + "unspecified", + ], + "description": "Time horizon for the recommendation", + }, + "rationale_quote": { + "type": "string", + "description": "Short verbatim or paraphrased quote from video", + }, + "video_timestamp_seconds": { + "type": ["integer", "null"], + "description": "Timestamp in seconds for deep-link target", }, }, }, @@ -296,15 +292,15 @@ _MAX_SEGMENTS = 1000 class LlmAnalyzer: - """Calls Claude (via OpenRouter) to extract structured analysis from a video transcript. + """Calls Claude (via native Anthropic SDK) to extract structured analysis from a video transcript. Args: - client: Configured AsyncOpenAI client pointed at OpenRouter. - model: Model identifier (e.g. "anthropic/claude-sonnet-4.5"). + client: Configured AsyncAnthropic client with OAuth bearer token. + model: Model identifier (e.g. "claude-sonnet-4-5"). prompt_version: Prompt version string stored in kevin_analyses. """ - def __init__(self, client: AsyncOpenAI, model: str, prompt_version: str) -> None: + def __init__(self, client: AsyncAnthropic, model: str, prompt_version: str) -> None: self._client = client self._model = model self._prompt_version = prompt_version @@ -331,8 +327,8 @@ class LlmAnalyzer: LlmCallResult with parsed MeetKevinAnalysis and token accounting. Raises: - ValueError: If the response contains no tool_calls. - pydantic.ValidationError: If function arguments fail schema validation. + ValueError: If the response contains no tool_use block. + pydantic.ValidationError: If tool input fails schema validation. """ user_msg = self._build_user_message( title=title, @@ -342,34 +338,39 @@ class LlmAnalyzer: transcript_segments=transcript_segments, ) - response = await self._client.chat.completions.create( + response = await self._client.messages.create( model=self._model, max_tokens=4096, - messages=[ - {"role": "system", "content": SYSTEM_PROMPT}, - {"role": "user", "content": user_msg}, + system=[ + {"type": "text", "text": SYSTEM_PROMPT, "cache_control": {"type": "ephemeral"}} ], - tools=[_ANALYSIS_TOOL_OPENAI], - tool_choice={"type": "function", "function": {"name": "submit_analysis"}}, + tools=[_ANALYSIS_TOOL], + tool_choice={"type": "tool", "name": "submit_analysis"}, + messages=[{"role": "user", "content": user_msg}], ) - message = response.choices[0].message - if not message.tool_calls: + # Find the first tool_use block in the response + tool_use_block = None + for block in response.content: + if block.type == "tool_use": + tool_use_block = block + break + + if tool_use_block is None: raise ValueError( - "LLM response contained no tool_calls (expected submit_analysis function call)" + "LLM response contained no tool_use block (expected submit_analysis call)" ) - tool_call = message.tool_calls[0] - tool_input = json.loads(tool_call.function.arguments) + tool_input: dict = tool_use_block.input analysis = MeetKevinAnalysis.model_validate(tool_input) - prompt_tokens: int = response.usage.prompt_tokens - completion_tokens: int = response.usage.completion_tokens + prompt_tokens: int = response.usage.input_tokens + completion_tokens: int = response.usage.output_tokens cost_usd = compute_cost_usd(self._model, prompt_tokens, completion_tokens) raw_response: dict = { - "finish_reason": response.choices[0].finish_reason, - "tool_name": tool_call.function.name, + "stop_reason": response.stop_reason, + "tool_name": tool_use_block.name, "tool_input": tool_input, "usage": { "input_tokens": prompt_tokens, diff --git a/services/meet_kevin_watcher/main.py b/services/meet_kevin_watcher/main.py index b79ff8e..0e8f96a 100644 --- a/services/meet_kevin_watcher/main.py +++ b/services/meet_kevin_watcher/main.py @@ -16,7 +16,7 @@ from datetime import timezone from decimal import Decimal import httpx -from openai import AsyncOpenAI +from anthropic import AsyncAnthropic from sqlalchemy import select from sqlalchemy.dialects.postgresql import insert as pg_insert @@ -179,14 +179,9 @@ async def run() -> None: # Database engine, session_factory = create_db(config) - # OpenRouter client + LLM analyzer - client = AsyncOpenAI( - api_key=config.openrouter_api_key, - base_url="https://openrouter.ai/api/v1", - default_headers={ - "HTTP-Referer": "https://trading.viktorbarzin.me", - "X-Title": "trading-bot meet-kevin", - }, + # Anthropic client + LLM analyzer (OAuth bearer token) + client = AsyncAnthropic( + auth_token=config.anthropic_oauth_token, ) analyzer = LlmAnalyzer( client=client, diff --git a/tests/services/meet_kevin_watcher/test_llm_analyzer.py b/tests/services/meet_kevin_watcher/test_llm_analyzer.py index ccb90a2..b09446c 100644 --- a/tests/services/meet_kevin_watcher/test_llm_analyzer.py +++ b/tests/services/meet_kevin_watcher/test_llm_analyzer.py @@ -1,9 +1,8 @@ -"""Tests for the OpenRouter LLM analyzer (Task 7). +"""Tests for the Anthropic SDK LLM analyzer (Task 7). Tests use MagicMock/AsyncMock to avoid real API calls. """ -import json from datetime import datetime, timezone from decimal import Decimal from unittest.mock import AsyncMock, MagicMock @@ -28,23 +27,17 @@ from shared.schemas.meet_kevin import ( # Test helpers # --------------------------------------------------------------------------- -def _make_openai_response(tool_args: dict, in_tokens: int = 5000, out_tokens: int = 800): - """Mock an OpenAI ChatCompletion response with one tool_call.""" - tool_call = MagicMock() - tool_call.function = MagicMock() - tool_call.function.name = "submit_analysis" - tool_call.function.arguments = json.dumps(tool_args) - - msg = MagicMock() - msg.tool_calls = [tool_call] - - choice = MagicMock() - choice.message = msg - choice.finish_reason = "tool_calls" +def _make_anthropic_response(tool_input: dict, in_tokens: int = 5000, out_tokens: int = 800): + """Mock an Anthropic Messages response with one tool_use block.""" + block = MagicMock() + block.type = "tool_use" + block.name = "submit_analysis" + block.input = tool_input resp = MagicMock() - resp.choices = [choice] - resp.usage = MagicMock(prompt_tokens=in_tokens, completion_tokens=out_tokens) + resp.content = [block] + resp.usage = MagicMock(input_tokens=in_tokens, output_tokens=out_tokens) + resp.stop_reason = "tool_use" return resp @@ -70,15 +63,13 @@ def _valid_analysis_input() -> dict: def _make_client(response=None): - """Return a mocked AsyncOpenAI client with chat.completions.create wired up.""" + """Return a mocked AsyncAnthropic client with messages.create wired up.""" mock_create = AsyncMock(return_value=response) - mock_completions = MagicMock() - mock_completions.create = mock_create - mock_chat = MagicMock() - mock_chat.completions = mock_completions + mock_messages = MagicMock() + mock_messages.create = mock_create client = MagicMock() - client.chat = mock_chat + client.messages = mock_messages return client, mock_create @@ -90,16 +81,16 @@ def _make_client(response=None): class TestComputeCostUsd: """Verify monetary cost calculations using Decimal arithmetic.""" - def test_sonnet_45_openrouter_pricing(self): - """anthropic/claude-sonnet-4.5: $3.10/M input + $15.50/M output.""" - # 1M input + 1M output = $3.10 + $15.50 = $18.60 - result = compute_cost_usd("anthropic/claude-sonnet-4.5", 1_000_000, 1_000_000) - assert result == Decimal("18.6000") + def test_sonnet_45_native_pricing(self): + """claude-sonnet-4-5: $3/M input + $15/M output = $18/M total.""" + # 1M input + 1M output = $3 + $15 = $18 + result = compute_cost_usd("claude-sonnet-4-5", 1_000_000, 1_000_000) + assert result == Decimal("18.0000") - def test_sonnet_46_legacy_slug(self): - """claude-sonnet-4-6 (legacy slug) is also priced at $3.10/$15.50.""" + def test_sonnet_46_native_pricing(self): + """claude-sonnet-4-6: same pricing as 4-5 ($3/$15).""" result = compute_cost_usd("claude-sonnet-4-6", 1_000_000, 1_000_000) - assert result == Decimal("18.6000") + assert result == Decimal("18.0000") def test_opus_47_pricing(self): """claude-opus-4-7: $15/M input + $75/M output.""" @@ -118,21 +109,21 @@ class TestComputeCostUsd: def test_zero_tokens(self): """Zero tokens produce zero cost.""" - result = compute_cost_usd("anthropic/claude-sonnet-4.5", 0, 0) + result = compute_cost_usd("claude-sonnet-4-5", 0, 0) assert result == Decimal("0") def test_result_is_decimal(self): """Return type is always Decimal, not float.""" - result = compute_cost_usd("anthropic/claude-sonnet-4.5", 5000, 800) + result = compute_cost_usd("claude-sonnet-4-5", 5000, 800) assert isinstance(result, Decimal) def test_small_realistic_call(self): - """Realistic 10K input + 1K output token call (Sonnet 4.5 via OpenRouter).""" - # input: 10000/1_000_000 * 3.10 = 0.03100 - # output: 1000/1_000_000 * 15.50 = 0.01550 - # total: 0.04650 - result = compute_cost_usd("anthropic/claude-sonnet-4.5", 10_000, 1_000) - assert result == Decimal("0.0465") + """Realistic 10K input + 1K output token call (Sonnet 4.5 native).""" + # input: 10000/1_000_000 * 3 = 0.03000 + # output: 1000/1_000_000 * 15 = 0.01500 + # total: 0.04500 + result = compute_cost_usd("claude-sonnet-4-5", 10_000, 1_000) + assert result == Decimal("0.0450") # --------------------------------------------------------------------------- @@ -178,7 +169,7 @@ class TestLlmCallResult: analysis = MeetKevinAnalysis(**_valid_analysis_input()) result = LlmCallResult( analysis=analysis, - raw_response={"finish_reason": "tool_calls"}, + raw_response={"stop_reason": "tool_use"}, prompt_tokens=5000, completion_tokens=800, cost_usd=Decimal("0.027"), @@ -192,13 +183,13 @@ class TestLlmCallResult: cost = Decimal("0.027") result = LlmCallResult( analysis=analysis, - raw_response={"finish_reason": "tool_calls"}, + raw_response={"stop_reason": "tool_use"}, prompt_tokens=5000, completion_tokens=800, cost_usd=cost, ) assert result.analysis is analysis - assert result.raw_response == {"finish_reason": "tool_calls"} + assert result.raw_response == {"stop_reason": "tool_use"} assert result.prompt_tokens == 5000 assert result.completion_tokens == 800 assert result.cost_usd == cost @@ -216,10 +207,10 @@ class TestLlmAnalyzerHappyPath: async def test_returns_llm_call_result(self): """analyze() returns an LlmCallResult with parsed MeetKevinAnalysis.""" tool_input = _valid_analysis_input() - resp = _make_openai_response(tool_input, in_tokens=5000, out_tokens=800) + resp = _make_anthropic_response(tool_input, in_tokens=5000, out_tokens=800) client, mock_create = _make_client(resp) - analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1") + analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-5", prompt_version="v1") result = await analyzer.analyze( title="Market Update", description="Kevin covers the latest market trends.", @@ -240,10 +231,10 @@ class TestLlmAnalyzerHappyPath: async def test_analysis_fields_parsed_correctly(self): """Parsed MeetKevinAnalysis has correct field values from tool input.""" tool_input = _valid_analysis_input() - resp = _make_openai_response(tool_input) + resp = _make_anthropic_response(tool_input) client, _ = _make_client(resp) - analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1") + analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-5", prompt_version="v1") result = await analyzer.analyze( title="Test Video", description="Description", @@ -265,10 +256,10 @@ class TestLlmAnalyzerHappyPath: @pytest.mark.asyncio async def test_cost_usd_is_positive(self): """cost_usd is calculated and positive for a valid token count.""" - resp = _make_openai_response(_valid_analysis_input(), in_tokens=10_000, out_tokens=1_000) + resp = _make_anthropic_response(_valid_analysis_input(), in_tokens=10_000, out_tokens=1_000) client, _ = _make_client(resp) - analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1") + analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-5", prompt_version="v1") result = await analyzer.analyze( title="Test", description="", @@ -281,11 +272,11 @@ class TestLlmAnalyzerHappyPath: @pytest.mark.asyncio async def test_api_called_with_tool_choice_forcing(self): - """chat.completions.create is called with tool_choice forcing submit_analysis.""" - resp = _make_openai_response(_valid_analysis_input()) + """messages.create is called with tool_choice forcing submit_analysis.""" + resp = _make_anthropic_response(_valid_analysis_input()) client, mock_create = _make_client(resp) - analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1") + analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-5", prompt_version="v1") await analyzer.analyze( title="Test", description="", @@ -296,15 +287,15 @@ class TestLlmAnalyzerHappyPath: mock_create.assert_called_once() kwargs = mock_create.call_args.kwargs - assert kwargs["tool_choice"] == {"type": "function", "function": {"name": "submit_analysis"}} + assert kwargs["tool_choice"] == {"type": "tool", "name": "submit_analysis"} @pytest.mark.asyncio - async def test_api_called_with_system_prompt_in_messages(self): - """System prompt is passed as a system role message in the messages list.""" - resp = _make_openai_response(_valid_analysis_input()) + async def test_api_called_with_system_prompt_in_system_param(self): + """System prompt is passed as the system parameter (list with cache_control).""" + resp = _make_anthropic_response(_valid_analysis_input()) client, mock_create = _make_client(resp) - analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1") + analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-5", prompt_version="v1") await analyzer.analyze( title="Test", description="", @@ -314,18 +305,19 @@ class TestLlmAnalyzerHappyPath: ) kwargs = mock_create.call_args.kwargs - messages = kwargs["messages"] - assert isinstance(messages, list) - assert messages[0]["role"] == "system" - assert SYSTEM_PROMPT in messages[0]["content"] + system = kwargs["system"] + assert isinstance(system, list) + assert system[0]["type"] == "text" + assert SYSTEM_PROMPT in system[0]["text"] + assert system[0]["cache_control"] == {"type": "ephemeral"} @pytest.mark.asyncio async def test_api_called_with_correct_model(self): - """chat.completions.create is called with the model passed to LlmAnalyzer.""" - resp = _make_openai_response(_valid_analysis_input()) + """messages.create is called with the model passed to LlmAnalyzer.""" + resp = _make_anthropic_response(_valid_analysis_input()) client, mock_create = _make_client(resp) - analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1") + analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-5", prompt_version="v1") await analyzer.analyze( title="Test", description="", @@ -335,15 +327,15 @@ class TestLlmAnalyzerHappyPath: ) kwargs = mock_create.call_args.kwargs - assert kwargs["model"] == "anthropic/claude-sonnet-4.5" + assert kwargs["model"] == "claude-sonnet-4-5" @pytest.mark.asyncio async def test_api_called_with_submit_analysis_tool(self): - """Tool definition includes function name 'submit_analysis'.""" - resp = _make_openai_response(_valid_analysis_input()) + """Tool definition includes name 'submit_analysis' with input_schema.""" + resp = _make_anthropic_response(_valid_analysis_input()) client, mock_create = _make_client(resp) - analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1") + analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-5", prompt_version="v1") await analyzer.analyze( title="Test", description="", @@ -355,17 +347,17 @@ class TestLlmAnalyzerHappyPath: kwargs = mock_create.call_args.kwargs tools = kwargs["tools"] assert any( - t.get("type") == "function" and t.get("function", {}).get("name") == "submit_analysis" + t.get("name") == "submit_analysis" and "input_schema" in t for t in tools ) @pytest.mark.asyncio async def test_raw_response_is_captured(self): """raw_response in LlmCallResult holds serializable dict.""" - resp = _make_openai_response(_valid_analysis_input()) + resp = _make_anthropic_response(_valid_analysis_input()) client, _ = _make_client(resp) - analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1") + analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-5", prompt_version="v1") result = await analyzer.analyze( title="Test", description="", @@ -379,7 +371,7 @@ class TestLlmAnalyzerHappyPath: @pytest.mark.asyncio async def test_transcript_segments_included_in_user_message(self): """User message contains timestamped segment lines from transcript_segments.""" - resp = _make_openai_response(_valid_analysis_input()) + resp = _make_anthropic_response(_valid_analysis_input()) client, mock_create = _make_client(resp) segments = [ @@ -387,7 +379,7 @@ class TestLlmAnalyzerHappyPath: {"start": 5.0, "end": 10.0, "text": "Let's talk stocks."}, ] - analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1") + analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-5", prompt_version="v1") await analyzer.analyze( title="Test", description="", @@ -397,8 +389,8 @@ class TestLlmAnalyzerHappyPath: ) kwargs = mock_create.call_args.kwargs - # user message is the second entry in messages list - user_content = kwargs["messages"][1]["content"] + # user message is in the messages list + user_content = kwargs["messages"][0]["content"] assert "Hello world." in user_content assert "Let's talk stocks." in user_content @@ -412,23 +404,17 @@ class TestLlmAnalyzerFailurePaths: """Failure path tests.""" @pytest.mark.asyncio - async def test_no_tool_calls_raises_value_error(self): - """If response message has no tool_calls, raises ValueError.""" - msg = MagicMock() - msg.tool_calls = None - - choice = MagicMock() - choice.message = msg - choice.finish_reason = "stop" - + async def test_no_tool_use_block_raises_value_error(self): + """If response has no tool_use block, raises ValueError containing 'tool_use'.""" resp = MagicMock() - resp.choices = [choice] - resp.usage = MagicMock(prompt_tokens=5000, completion_tokens=800) + resp.content = [MagicMock(type="text")] + resp.usage = MagicMock(input_tokens=5000, output_tokens=800) + resp.stop_reason = "end_turn" client, _ = _make_client(resp) - analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1") - with pytest.raises(ValueError): + analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-5", prompt_version="v1") + with pytest.raises(ValueError, match="tool_use"): await analyzer.analyze( title="Test", description="", @@ -438,22 +424,16 @@ class TestLlmAnalyzerFailurePaths: ) @pytest.mark.asyncio - async def test_empty_tool_calls_raises_value_error(self): - """If response message has empty tool_calls list, raises ValueError.""" - msg = MagicMock() - msg.tool_calls = [] - - choice = MagicMock() - choice.message = msg - choice.finish_reason = "stop" - + async def test_empty_content_raises_value_error(self): + """If response.content is empty, raises ValueError.""" resp = MagicMock() - resp.choices = [choice] - resp.usage = MagicMock(prompt_tokens=5000, completion_tokens=800) + resp.content = [] + resp.usage = MagicMock(input_tokens=5000, output_tokens=800) + resp.stop_reason = "end_turn" client, _ = _make_client(resp) - analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1") + analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-5", prompt_version="v1") with pytest.raises(ValueError): await analyzer.analyze( title="Test", @@ -469,10 +449,10 @@ class TestLlmAnalyzerFailurePaths: bad_input = _valid_analysis_input() bad_input["market_outlook_direction"] = "extremely_bullish" # not a valid enum - resp = _make_openai_response(bad_input) + resp = _make_anthropic_response(bad_input) client, _ = _make_client(resp) - analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1") + analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-5", prompt_version="v1") with pytest.raises(Exception): # pydantic ValidationError or ValueError await analyzer.analyze( title="Test",