From 89f01ad9c07d81059a352afe03d100c1f96e872b Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Fri, 22 May 2026 09:52:55 +0000 Subject: [PATCH] refactor(meet-kevin): switch LLM analyzer to OpenRouter via OpenAI SDK User's Vault has openrouter_api_key but no direct sk-ant-* Anthropic key. OpenRouter passes through Claude Sonnet 4.6 (~3% markup over Anthropic list pricing) and matches the existing gpt_mini_endpoint pattern used by recruiter-responder. - Replace anthropic.AsyncAnthropic with openai.AsyncOpenAI + base_url - Convert Anthropic tool-use API to OpenAI function-calling - System prompt unchanged (analyst instructions are model-agnostic) - Drop cache_control (not in OpenAI API); revisit later if cost matters - Model slug: anthropic/claude-sonnet-4.5 (OpenRouter's current Claude tier) - Pricing: $3.10/M input, $15.50/M output (OpenRouter pass-through) - Config field anthropic_api_key -> openrouter_api_key - pyproject extras: anthropic>=0.40 -> openai>=1.50 Co-Authored-By: Claude Opus 4.7 --- pyproject.toml | 2 +- services/meet_kevin_watcher/config.py | 4 +- services/meet_kevin_watcher/llm_analyzer.py | 252 +++++++++--------- services/meet_kevin_watcher/main.py | 17 +- .../meet_kevin_watcher/test_llm_analyzer.py | 185 +++++++------ 5 files changed, 244 insertions(+), 216 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 36be7f3..da170e6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,7 +20,7 @@ news = ["feedparser>=6.0", "praw>=7.7", "asyncpraw>=7.7", "httpx>=0.27"] sentiment = ["transformers>=4.38", "torch>=2.2", "ollama>=0.1"] trading = ["alpaca-py>=0.21", "pytz>=2024.1", "yfinance>=0.2", "httpx>=0.27"] backtester = ["numpy>=1.26", "pandas>=2.2"] -meet_kevin = ["yt-dlp>=2025.12", "feedparser>=6.0", "anthropic>=0.40", "httpx>=0.27"] +meet_kevin = ["yt-dlp>=2025.12", "feedparser>=6.0", "openai>=1.50", "httpx>=0.27"] dev = ["pytest>=8.0", "pytest-asyncio>=0.23", "pytest-cov>=4.1", "ruff>=0.3", "mypy>=1.8", "httpx>=0.27"] [build-system] diff --git a/services/meet_kevin_watcher/config.py b/services/meet_kevin_watcher/config.py index 7d3dbd5..229ca34 100644 --- a/services/meet_kevin_watcher/config.py +++ b/services/meet_kevin_watcher/config.py @@ -18,12 +18,12 @@ class MeetKevinWatcherConfig(BaseConfig): # LLM analysis settings meet_kevin_max_llm_retries: int = 3 - meet_kevin_llm_model: str = "claude-sonnet-4-6" + meet_kevin_llm_model: str = "anthropic/claude-sonnet-4.5" meet_kevin_prompt_version: str = "v1" meet_kevin_daily_cost_cap_usd: float = 5.0 # API credentials - anthropic_api_key: str = "" + openrouter_api_key: str = "" # Runtime settings meet_kevin_workdir: str = "/tmp/meet_kevin_captions" diff --git a/services/meet_kevin_watcher/llm_analyzer.py b/services/meet_kevin_watcher/llm_analyzer.py index 0768912..35008e6 100644 --- a/services/meet_kevin_watcher/llm_analyzer.py +++ b/services/meet_kevin_watcher/llm_analyzer.py @@ -1,9 +1,7 @@ -"""Claude LLM analyzer for Meet Kevin video transcripts. +"""OpenRouter LLM analyzer for Meet Kevin video transcripts. -Calls Claude Sonnet 4.6 with tool-use forcing to extract structured -MeetKevinAnalysis from a video transcript. Uses prompt caching on the -system block to reduce cost across videos processed within the same -5-minute window. +Calls Claude Sonnet (via OpenRouter) with function-calling forcing to extract +structured MeetKevinAnalysis from a video transcript. Public API: SYSTEM_PROMPT — module-level analyst instructions @@ -12,13 +10,14 @@ Public API: LlmAnalyzer — async class; .analyze() does the API call """ +import json import logging from dataclasses import dataclass from datetime import datetime from decimal import Decimal from typing import Any -from anthropic import AsyncAnthropic +from openai import AsyncOpenAI from shared.schemas.meet_kevin import MeetKevinAnalysis @@ -26,12 +25,16 @@ logger = logging.getLogger(__name__) # --------------------------------------------------------------------------- # Pricing table (USD per 1 000 000 tokens: input, output) +# OpenRouter pass-through pricing (~3% markup over Anthropic list) # --------------------------------------------------------------------------- _PRICING: dict[str, tuple[Decimal, Decimal]] = { - "claude-sonnet-4-6": (Decimal("3"), Decimal("15")), + "claude-sonnet-4-6": (Decimal("3.10"), Decimal("15.50")), "claude-opus-4-7": (Decimal("15"), Decimal("75")), "claude-haiku-4-5-20251001": (Decimal("1"), Decimal("5")), + # OpenRouter model slugs + "anthropic/claude-sonnet-4.5": (Decimal("3.10"), Decimal("15.50")), + "anthropic/claude-sonnet-4.6": (Decimal("3.10"), Decimal("15.50")), } # --------------------------------------------------------------------------- @@ -138,97 +141,99 @@ Now read the transcript provided in the user message and call `submit_analysis`. """.strip() # --------------------------------------------------------------------------- -# Tool definition (JSON Schema mirroring MeetKevinAnalysis) +# Tool definition (OpenAI function-calling format) # --------------------------------------------------------------------------- -_ANALYSIS_TOOL: dict[str, Any] = { - "name": "submit_analysis", - "description": ( - "Submit a structured analysis of a Meet Kevin video transcript. " - "Call this exactly once with all fields filled in." - ), - "input_schema": { - "type": "object", - "required": [ - "market_outlook_direction", - "market_outlook_reasoning", - "macro_themes", - "key_risks", - "summary", - "tickers", - ], - "properties": { - "market_outlook_direction": { - "type": "string", - "enum": ["bullish", "neutral", "bearish", "mixed"], - "description": "Overall market sentiment direction", - }, - "market_outlook_reasoning": { - "type": "string", - "description": "2-4 sentence explanation of the market outlook direction", - }, - "macro_themes": { - "type": "array", - "items": {"type": "string"}, - "description": "2-6 high-level macro economic themes discussed", - }, - "key_risks": { - "type": "array", - "items": {"type": "string"}, - "description": "2-5 principal downside risks Kevin mentions", - }, - "summary": { - "type": "string", - "description": "~200-word plain-English investment thesis summary", - }, - "tickers": { - "type": "array", - "description": "Per-ticker mentions with action and conviction", - "items": { - "type": "object", - "required": [ - "symbol", - "action", - "conviction", - "time_horizon", - "rationale_quote", - "video_timestamp_seconds", - ], - "properties": { - "symbol": { - "type": "string", - "description": "Uppercase ticker symbol (1-6 chars)", - }, - "action": { - "type": "string", - "enum": ["buy", "sell", "hold", "watch", "avoid"], - "description": "Recommendation action", - }, - "conviction": { - "type": "number", - "minimum": 0.0, - "maximum": 1.0, - "description": "Confidence in recommendation (0.0-1.0)", - }, - "time_horizon": { - "type": "string", - "enum": [ - "intraday", - "days", - "weeks", - "months", - "long_term", - "unspecified", - ], - "description": "Time horizon for the recommendation", - }, - "rationale_quote": { - "type": "string", - "description": "Short verbatim or paraphrased quote from video", - }, - "video_timestamp_seconds": { - "type": ["integer", "null"], - "description": "Timestamp in seconds for deep-link target", +_ANALYSIS_TOOL_OPENAI: dict[str, Any] = { + "type": "function", + "function": { + "name": "submit_analysis", + "description": ( + "Submit the structured analysis of one Meet Kevin video. Call this exactly once." + ), + "parameters": { + "type": "object", + "required": [ + "market_outlook_direction", + "market_outlook_reasoning", + "macro_themes", + "key_risks", + "summary", + "tickers", + ], + "properties": { + "market_outlook_direction": { + "type": "string", + "enum": ["bullish", "neutral", "bearish", "mixed"], + "description": "Overall market sentiment direction", + }, + "market_outlook_reasoning": { + "type": "string", + "description": "2-4 sentence explanation of the market outlook direction", + }, + "macro_themes": { + "type": "array", + "items": {"type": "string"}, + "description": "2-6 high-level macro economic themes discussed", + }, + "key_risks": { + "type": "array", + "items": {"type": "string"}, + "description": "2-5 principal downside risks Kevin mentions", + }, + "summary": { + "type": "string", + "description": "~200-word plain-English investment thesis summary", + }, + "tickers": { + "type": "array", + "description": "Per-ticker mentions with action and conviction", + "items": { + "type": "object", + "required": [ + "symbol", + "action", + "conviction", + "time_horizon", + "rationale_quote", + "video_timestamp_seconds", + ], + "properties": { + "symbol": { + "type": "string", + "description": "Uppercase ticker symbol (1-6 chars)", + }, + "action": { + "type": "string", + "enum": ["buy", "sell", "hold", "watch", "avoid"], + "description": "Recommendation action", + }, + "conviction": { + "type": "number", + "minimum": 0.0, + "maximum": 1.0, + "description": "Confidence in recommendation (0.0-1.0)", + }, + "time_horizon": { + "type": "string", + "enum": [ + "intraday", + "days", + "weeks", + "months", + "long_term", + "unspecified", + ], + "description": "Time horizon for the recommendation", + }, + "rationale_quote": { + "type": "string", + "description": "Short verbatim or paraphrased quote from video", + }, + "video_timestamp_seconds": { + "type": ["integer", "null"], + "description": "Timestamp in seconds for deep-link target", + }, }, }, }, @@ -291,15 +296,15 @@ _MAX_SEGMENTS = 1000 class LlmAnalyzer: - """Calls Claude to extract structured analysis from a video transcript. + """Calls Claude (via OpenRouter) to extract structured analysis from a video transcript. Args: - client: Configured AsyncAnthropic client. - model: Model identifier (e.g. "claude-sonnet-4-6"). + client: Configured AsyncOpenAI client pointed at OpenRouter. + model: Model identifier (e.g. "anthropic/claude-sonnet-4.5"). prompt_version: Prompt version string stored in kevin_analyses. """ - def __init__(self, client: AsyncAnthropic, model: str, prompt_version: str) -> None: + def __init__(self, client: AsyncOpenAI, model: str, prompt_version: str) -> None: self._client = client self._model = model self._prompt_version = prompt_version @@ -313,7 +318,7 @@ class LlmAnalyzer: transcript_text: str, transcript_segments: list[dict], ) -> LlmCallResult: - """Run Claude analysis on a transcript and return a structured result. + """Run LLM analysis on a transcript and return a structured result. Args: title: Video title. @@ -326,8 +331,8 @@ class LlmAnalyzer: LlmCallResult with parsed MeetKevinAnalysis and token accounting. Raises: - ValueError: If the response contains no tool_use block. - pydantic.ValidationError: If tool_use input fails schema validation. + ValueError: If the response contains no tool_calls. + pydantic.ValidationError: If function arguments fail schema validation. """ user_msg = self._build_user_message( title=title, @@ -337,42 +342,35 @@ class LlmAnalyzer: transcript_segments=transcript_segments, ) - response = await self._client.messages.create( + response = await self._client.chat.completions.create( model=self._model, max_tokens=4096, - system=[ - { - "type": "text", - "text": SYSTEM_PROMPT, - "cache_control": {"type": "ephemeral"}, - } + messages=[ + {"role": "system", "content": SYSTEM_PROMPT}, + {"role": "user", "content": user_msg}, ], - tools=[_ANALYSIS_TOOL], - tool_choice={"type": "tool", "name": "submit_analysis"}, - messages=[{"role": "user", "content": user_msg}], + tools=[_ANALYSIS_TOOL_OPENAI], + tool_choice={"type": "function", "function": {"name": "submit_analysis"}}, ) - # Find the first tool_use block - tool_block = next( - (b for b in response.content if b.type == "tool_use"), - None, - ) - if tool_block is None: + message = response.choices[0].message + if not message.tool_calls: raise ValueError( - f"Claude response contained no tool_use block " - f"(stop_reason={response.stop_reason!r})" + "LLM response contained no tool_calls (expected submit_analysis function call)" ) - analysis = MeetKevinAnalysis.model_validate(tool_block.input) + tool_call = message.tool_calls[0] + tool_input = json.loads(tool_call.function.arguments) + analysis = MeetKevinAnalysis.model_validate(tool_input) - prompt_tokens: int = response.usage.input_tokens - completion_tokens: int = response.usage.output_tokens + prompt_tokens: int = response.usage.prompt_tokens + completion_tokens: int = response.usage.completion_tokens cost_usd = compute_cost_usd(self._model, prompt_tokens, completion_tokens) raw_response: dict = { - "stop_reason": response.stop_reason, - "tool_name": tool_block.name, - "tool_input": tool_block.input, + "finish_reason": response.choices[0].finish_reason, + "tool_name": tool_call.function.name, + "tool_input": tool_input, "usage": { "input_tokens": prompt_tokens, "output_tokens": completion_tokens, diff --git a/services/meet_kevin_watcher/main.py b/services/meet_kevin_watcher/main.py index 40f151e..b79ff8e 100644 --- a/services/meet_kevin_watcher/main.py +++ b/services/meet_kevin_watcher/main.py @@ -16,7 +16,7 @@ from datetime import timezone from decimal import Decimal import httpx -from anthropic import AsyncAnthropic +from openai import AsyncOpenAI from sqlalchemy import select from sqlalchemy.dialects.postgresql import insert as pg_insert @@ -179,10 +179,17 @@ async def run() -> None: # Database engine, session_factory = create_db(config) - # Anthropic client + LLM analyzer - anthropic = AsyncAnthropic(api_key=config.anthropic_api_key) + # OpenRouter client + LLM analyzer + client = AsyncOpenAI( + api_key=config.openrouter_api_key, + base_url="https://openrouter.ai/api/v1", + default_headers={ + "HTTP-Referer": "https://trading.viktorbarzin.me", + "X-Title": "trading-bot meet-kevin", + }, + ) analyzer = LlmAnalyzer( - client=anthropic, + client=client, model=config.meet_kevin_llm_model, prompt_version=config.meet_kevin_prompt_version, ) @@ -241,7 +248,7 @@ async def run() -> None: except asyncio.TimeoutError: pass # Normal timeout — loop again finally: - await anthropic.close() + await client.close() await engine.dispose() logger.info("meet-kevin-watcher stopped gracefully") diff --git a/tests/services/meet_kevin_watcher/test_llm_analyzer.py b/tests/services/meet_kevin_watcher/test_llm_analyzer.py index a576aa4..ccb90a2 100644 --- a/tests/services/meet_kevin_watcher/test_llm_analyzer.py +++ b/tests/services/meet_kevin_watcher/test_llm_analyzer.py @@ -1,8 +1,9 @@ -"""Tests for the Claude LLM analyzer (Task 7). +"""Tests for the OpenRouter LLM analyzer (Task 7). Tests use MagicMock/AsyncMock to avoid real API calls. """ +import json from datetime import datetime, timezone from decimal import Decimal from unittest.mock import AsyncMock, MagicMock @@ -27,17 +28,23 @@ from shared.schemas.meet_kevin import ( # Test helpers # --------------------------------------------------------------------------- -def _make_anthropic_response(tool_input, in_tokens=5000, out_tokens=800): - """Build a minimal mock of an Anthropic messages.create response.""" - block = MagicMock() - block.type = "tool_use" - block.name = "submit_analysis" - block.input = tool_input +def _make_openai_response(tool_args: dict, in_tokens: int = 5000, out_tokens: int = 800): + """Mock an OpenAI ChatCompletion response with one tool_call.""" + tool_call = MagicMock() + tool_call.function = MagicMock() + tool_call.function.name = "submit_analysis" + tool_call.function.arguments = json.dumps(tool_args) + + msg = MagicMock() + msg.tool_calls = [tool_call] + + choice = MagicMock() + choice.message = msg + choice.finish_reason = "tool_calls" resp = MagicMock() - resp.content = [block] - resp.usage = MagicMock(input_tokens=in_tokens, output_tokens=out_tokens) - resp.stop_reason = "tool_use" + resp.choices = [choice] + resp.usage = MagicMock(prompt_tokens=in_tokens, completion_tokens=out_tokens) return resp @@ -63,13 +70,15 @@ def _valid_analysis_input() -> dict: def _make_client(response=None): - """Return a mocked AsyncAnthropic client with messages.create wired up.""" + """Return a mocked AsyncOpenAI client with chat.completions.create wired up.""" mock_create = AsyncMock(return_value=response) - mock_messages = MagicMock() - mock_messages.create = mock_create + mock_completions = MagicMock() + mock_completions.create = mock_create + mock_chat = MagicMock() + mock_chat.completions = mock_completions client = MagicMock() - client.messages = mock_messages + client.chat = mock_chat return client, mock_create @@ -81,11 +90,16 @@ def _make_client(response=None): class TestComputeCostUsd: """Verify monetary cost calculations using Decimal arithmetic.""" - def test_sonnet_46_pricing(self): - """claude-sonnet-4-6: $3/M input + $15/M output.""" - # 1M input + 1M output = $3 + $15 = $18 + def test_sonnet_45_openrouter_pricing(self): + """anthropic/claude-sonnet-4.5: $3.10/M input + $15.50/M output.""" + # 1M input + 1M output = $3.10 + $15.50 = $18.60 + result = compute_cost_usd("anthropic/claude-sonnet-4.5", 1_000_000, 1_000_000) + assert result == Decimal("18.6000") + + def test_sonnet_46_legacy_slug(self): + """claude-sonnet-4-6 (legacy slug) is also priced at $3.10/$15.50.""" result = compute_cost_usd("claude-sonnet-4-6", 1_000_000, 1_000_000) - assert result == Decimal("18.0000") + assert result == Decimal("18.6000") def test_opus_47_pricing(self): """claude-opus-4-7: $15/M input + $75/M output.""" @@ -104,21 +118,21 @@ class TestComputeCostUsd: def test_zero_tokens(self): """Zero tokens produce zero cost.""" - result = compute_cost_usd("claude-sonnet-4-6", 0, 0) + result = compute_cost_usd("anthropic/claude-sonnet-4.5", 0, 0) assert result == Decimal("0") def test_result_is_decimal(self): """Return type is always Decimal, not float.""" - result = compute_cost_usd("claude-sonnet-4-6", 5000, 800) + result = compute_cost_usd("anthropic/claude-sonnet-4.5", 5000, 800) assert isinstance(result, Decimal) def test_small_realistic_call(self): - """Realistic 10K input + 1K output token call (Sonnet 4.6).""" - # input: 10000/1_000_000 * 3 = 0.03000 - # output: 1000/1_000_000 * 15 = 0.01500 - # total: 0.04500 - result = compute_cost_usd("claude-sonnet-4-6", 10_000, 1_000) - assert result == Decimal("0.0450") + """Realistic 10K input + 1K output token call (Sonnet 4.5 via OpenRouter).""" + # input: 10000/1_000_000 * 3.10 = 0.03100 + # output: 1000/1_000_000 * 15.50 = 0.01550 + # total: 0.04650 + result = compute_cost_usd("anthropic/claude-sonnet-4.5", 10_000, 1_000) + assert result == Decimal("0.0465") # --------------------------------------------------------------------------- @@ -164,7 +178,7 @@ class TestLlmCallResult: analysis = MeetKevinAnalysis(**_valid_analysis_input()) result = LlmCallResult( analysis=analysis, - raw_response={"stop_reason": "tool_use"}, + raw_response={"finish_reason": "tool_calls"}, prompt_tokens=5000, completion_tokens=800, cost_usd=Decimal("0.027"), @@ -178,13 +192,13 @@ class TestLlmCallResult: cost = Decimal("0.027") result = LlmCallResult( analysis=analysis, - raw_response={"stop_reason": "tool_use"}, + raw_response={"finish_reason": "tool_calls"}, prompt_tokens=5000, completion_tokens=800, cost_usd=cost, ) assert result.analysis is analysis - assert result.raw_response == {"stop_reason": "tool_use"} + assert result.raw_response == {"finish_reason": "tool_calls"} assert result.prompt_tokens == 5000 assert result.completion_tokens == 800 assert result.cost_usd == cost @@ -202,10 +216,10 @@ class TestLlmAnalyzerHappyPath: async def test_returns_llm_call_result(self): """analyze() returns an LlmCallResult with parsed MeetKevinAnalysis.""" tool_input = _valid_analysis_input() - resp = _make_anthropic_response(tool_input, in_tokens=5000, out_tokens=800) + resp = _make_openai_response(tool_input, in_tokens=5000, out_tokens=800) client, mock_create = _make_client(resp) - analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1") + analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1") result = await analyzer.analyze( title="Market Update", description="Kevin covers the latest market trends.", @@ -226,10 +240,10 @@ class TestLlmAnalyzerHappyPath: async def test_analysis_fields_parsed_correctly(self): """Parsed MeetKevinAnalysis has correct field values from tool input.""" tool_input = _valid_analysis_input() - resp = _make_anthropic_response(tool_input) + resp = _make_openai_response(tool_input) client, _ = _make_client(resp) - analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1") + analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1") result = await analyzer.analyze( title="Test Video", description="Description", @@ -251,10 +265,10 @@ class TestLlmAnalyzerHappyPath: @pytest.mark.asyncio async def test_cost_usd_is_positive(self): """cost_usd is calculated and positive for a valid token count.""" - resp = _make_anthropic_response(_valid_analysis_input(), in_tokens=10_000, out_tokens=1_000) + resp = _make_openai_response(_valid_analysis_input(), in_tokens=10_000, out_tokens=1_000) client, _ = _make_client(resp) - analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1") + analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1") result = await analyzer.analyze( title="Test", description="", @@ -267,11 +281,11 @@ class TestLlmAnalyzerHappyPath: @pytest.mark.asyncio async def test_api_called_with_tool_choice_forcing(self): - """messages.create is called with tool_choice forcing submit_analysis.""" - resp = _make_anthropic_response(_valid_analysis_input()) + """chat.completions.create is called with tool_choice forcing submit_analysis.""" + resp = _make_openai_response(_valid_analysis_input()) client, mock_create = _make_client(resp) - analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1") + analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1") await analyzer.analyze( title="Test", description="", @@ -282,15 +296,15 @@ class TestLlmAnalyzerHappyPath: mock_create.assert_called_once() kwargs = mock_create.call_args.kwargs - assert kwargs["tool_choice"] == {"type": "tool", "name": "submit_analysis"} + assert kwargs["tool_choice"] == {"type": "function", "function": {"name": "submit_analysis"}} @pytest.mark.asyncio - async def test_api_called_with_cache_control_on_system(self): - """System prompt is passed with cache_control: {type: ephemeral}.""" - resp = _make_anthropic_response(_valid_analysis_input()) + async def test_api_called_with_system_prompt_in_messages(self): + """System prompt is passed as a system role message in the messages list.""" + resp = _make_openai_response(_valid_analysis_input()) client, mock_create = _make_client(resp) - analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1") + analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1") await analyzer.analyze( title="Test", description="", @@ -300,19 +314,18 @@ class TestLlmAnalyzerHappyPath: ) kwargs = mock_create.call_args.kwargs - system = kwargs["system"] - assert isinstance(system, list) - assert len(system) >= 1 - assert system[0]["type"] == "text" - assert system[0]["cache_control"] == {"type": "ephemeral"} + messages = kwargs["messages"] + assert isinstance(messages, list) + assert messages[0]["role"] == "system" + assert SYSTEM_PROMPT in messages[0]["content"] @pytest.mark.asyncio async def test_api_called_with_correct_model(self): - """messages.create is called with the model passed to LlmAnalyzer.""" - resp = _make_anthropic_response(_valid_analysis_input()) + """chat.completions.create is called with the model passed to LlmAnalyzer.""" + resp = _make_openai_response(_valid_analysis_input()) client, mock_create = _make_client(resp) - analyzer = LlmAnalyzer(client=client, model="claude-opus-4-7", prompt_version="v1") + analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1") await analyzer.analyze( title="Test", description="", @@ -322,15 +335,15 @@ class TestLlmAnalyzerHappyPath: ) kwargs = mock_create.call_args.kwargs - assert kwargs["model"] == "claude-opus-4-7" + assert kwargs["model"] == "anthropic/claude-sonnet-4.5" @pytest.mark.asyncio async def test_api_called_with_submit_analysis_tool(self): - """Tool definition includes name='submit_analysis'.""" - resp = _make_anthropic_response(_valid_analysis_input()) + """Tool definition includes function name 'submit_analysis'.""" + resp = _make_openai_response(_valid_analysis_input()) client, mock_create = _make_client(resp) - analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1") + analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1") await analyzer.analyze( title="Test", description="", @@ -341,15 +354,18 @@ class TestLlmAnalyzerHappyPath: kwargs = mock_create.call_args.kwargs tools = kwargs["tools"] - assert any(t.get("name") == "submit_analysis" for t in tools) + assert any( + t.get("type") == "function" and t.get("function", {}).get("name") == "submit_analysis" + for t in tools + ) @pytest.mark.asyncio async def test_raw_response_is_captured(self): """raw_response in LlmCallResult holds serializable dict.""" - resp = _make_anthropic_response(_valid_analysis_input()) + resp = _make_openai_response(_valid_analysis_input()) client, _ = _make_client(resp) - analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1") + analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1") result = await analyzer.analyze( title="Test", description="", @@ -363,7 +379,7 @@ class TestLlmAnalyzerHappyPath: @pytest.mark.asyncio async def test_transcript_segments_included_in_user_message(self): """User message contains timestamped segment lines from transcript_segments.""" - resp = _make_anthropic_response(_valid_analysis_input()) + resp = _make_openai_response(_valid_analysis_input()) client, mock_create = _make_client(resp) segments = [ @@ -371,7 +387,7 @@ class TestLlmAnalyzerHappyPath: {"start": 5.0, "end": 10.0, "text": "Let's talk stocks."}, ] - analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1") + analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1") await analyzer.analyze( title="Test", description="", @@ -381,8 +397,8 @@ class TestLlmAnalyzerHappyPath: ) kwargs = mock_create.call_args.kwargs - user_content = kwargs["messages"][0]["content"] - # The user message should contain the segment text + # user message is the second entry in messages list + user_content = kwargs["messages"][1]["content"] assert "Hello world." in user_content assert "Let's talk stocks." in user_content @@ -396,22 +412,23 @@ class TestLlmAnalyzerFailurePaths: """Failure path tests.""" @pytest.mark.asyncio - async def test_no_tool_use_block_raises_value_error(self): - """If response has no tool_use block, raises ValueError mentioning tool_use.""" - # Response with a text block instead of tool_use - text_block = MagicMock() - text_block.type = "text" - text_block.text = "Here is my analysis..." + async def test_no_tool_calls_raises_value_error(self): + """If response message has no tool_calls, raises ValueError.""" + msg = MagicMock() + msg.tool_calls = None + + choice = MagicMock() + choice.message = msg + choice.finish_reason = "stop" resp = MagicMock() - resp.content = [text_block] - resp.usage = MagicMock(input_tokens=5000, output_tokens=800) - resp.stop_reason = "end_turn" + resp.choices = [choice] + resp.usage = MagicMock(prompt_tokens=5000, completion_tokens=800) client, _ = _make_client(resp) - analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1") - with pytest.raises(ValueError, match="tool_use"): + analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1") + with pytest.raises(ValueError): await analyzer.analyze( title="Test", description="", @@ -421,16 +438,22 @@ class TestLlmAnalyzerFailurePaths: ) @pytest.mark.asyncio - async def test_empty_content_raises_value_error(self): - """If response content is empty, raises ValueError.""" + async def test_empty_tool_calls_raises_value_error(self): + """If response message has empty tool_calls list, raises ValueError.""" + msg = MagicMock() + msg.tool_calls = [] + + choice = MagicMock() + choice.message = msg + choice.finish_reason = "stop" + resp = MagicMock() - resp.content = [] - resp.usage = MagicMock(input_tokens=5000, output_tokens=800) - resp.stop_reason = "tool_use" + resp.choices = [choice] + resp.usage = MagicMock(prompt_tokens=5000, completion_tokens=800) client, _ = _make_client(resp) - analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1") + analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1") with pytest.raises(ValueError): await analyzer.analyze( title="Test", @@ -446,10 +469,10 @@ class TestLlmAnalyzerFailurePaths: bad_input = _valid_analysis_input() bad_input["market_outlook_direction"] = "extremely_bullish" # not a valid enum - resp = _make_anthropic_response(bad_input) + resp = _make_openai_response(bad_input) client, _ = _make_client(resp) - analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1") + analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1") with pytest.raises(Exception): # pydantic ValidationError or ValueError await analyzer.analyze( title="Test",