refactor(meet-kevin): switch LLM analyzer to OpenRouter via OpenAI SDK
User's Vault has openrouter_api_key but no direct sk-ant-* Anthropic key. OpenRouter passes through Claude Sonnet 4.6 (~3% markup over Anthropic list pricing) and matches the existing gpt_mini_endpoint pattern used by recruiter-responder. - Replace anthropic.AsyncAnthropic with openai.AsyncOpenAI + base_url - Convert Anthropic tool-use API to OpenAI function-calling - System prompt unchanged (analyst instructions are model-agnostic) - Drop cache_control (not in OpenAI API); revisit later if cost matters - Model slug: anthropic/claude-sonnet-4.5 (OpenRouter's current Claude tier) - Pricing: $3.10/M input, $15.50/M output (OpenRouter pass-through) - Config field anthropic_api_key -> openrouter_api_key - pyproject extras: anthropic>=0.40 -> openai>=1.50 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
3c20c8c12c
commit
89f01ad9c0
5 changed files with 244 additions and 216 deletions
|
|
@ -1,8 +1,9 @@
|
|||
"""Tests for the Claude LLM analyzer (Task 7).
|
||||
"""Tests for the OpenRouter LLM analyzer (Task 7).
|
||||
|
||||
Tests use MagicMock/AsyncMock to avoid real API calls.
|
||||
"""
|
||||
|
||||
import json
|
||||
from datetime import datetime, timezone
|
||||
from decimal import Decimal
|
||||
from unittest.mock import AsyncMock, MagicMock
|
||||
|
|
@ -27,17 +28,23 @@ from shared.schemas.meet_kevin import (
|
|||
# Test helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _make_anthropic_response(tool_input, in_tokens=5000, out_tokens=800):
|
||||
"""Build a minimal mock of an Anthropic messages.create response."""
|
||||
block = MagicMock()
|
||||
block.type = "tool_use"
|
||||
block.name = "submit_analysis"
|
||||
block.input = tool_input
|
||||
def _make_openai_response(tool_args: dict, in_tokens: int = 5000, out_tokens: int = 800):
|
||||
"""Mock an OpenAI ChatCompletion response with one tool_call."""
|
||||
tool_call = MagicMock()
|
||||
tool_call.function = MagicMock()
|
||||
tool_call.function.name = "submit_analysis"
|
||||
tool_call.function.arguments = json.dumps(tool_args)
|
||||
|
||||
msg = MagicMock()
|
||||
msg.tool_calls = [tool_call]
|
||||
|
||||
choice = MagicMock()
|
||||
choice.message = msg
|
||||
choice.finish_reason = "tool_calls"
|
||||
|
||||
resp = MagicMock()
|
||||
resp.content = [block]
|
||||
resp.usage = MagicMock(input_tokens=in_tokens, output_tokens=out_tokens)
|
||||
resp.stop_reason = "tool_use"
|
||||
resp.choices = [choice]
|
||||
resp.usage = MagicMock(prompt_tokens=in_tokens, completion_tokens=out_tokens)
|
||||
return resp
|
||||
|
||||
|
||||
|
|
@ -63,13 +70,15 @@ def _valid_analysis_input() -> dict:
|
|||
|
||||
|
||||
def _make_client(response=None):
|
||||
"""Return a mocked AsyncAnthropic client with messages.create wired up."""
|
||||
"""Return a mocked AsyncOpenAI client with chat.completions.create wired up."""
|
||||
mock_create = AsyncMock(return_value=response)
|
||||
mock_messages = MagicMock()
|
||||
mock_messages.create = mock_create
|
||||
mock_completions = MagicMock()
|
||||
mock_completions.create = mock_create
|
||||
mock_chat = MagicMock()
|
||||
mock_chat.completions = mock_completions
|
||||
|
||||
client = MagicMock()
|
||||
client.messages = mock_messages
|
||||
client.chat = mock_chat
|
||||
return client, mock_create
|
||||
|
||||
|
||||
|
|
@ -81,11 +90,16 @@ def _make_client(response=None):
|
|||
class TestComputeCostUsd:
|
||||
"""Verify monetary cost calculations using Decimal arithmetic."""
|
||||
|
||||
def test_sonnet_46_pricing(self):
|
||||
"""claude-sonnet-4-6: $3/M input + $15/M output."""
|
||||
# 1M input + 1M output = $3 + $15 = $18
|
||||
def test_sonnet_45_openrouter_pricing(self):
|
||||
"""anthropic/claude-sonnet-4.5: $3.10/M input + $15.50/M output."""
|
||||
# 1M input + 1M output = $3.10 + $15.50 = $18.60
|
||||
result = compute_cost_usd("anthropic/claude-sonnet-4.5", 1_000_000, 1_000_000)
|
||||
assert result == Decimal("18.6000")
|
||||
|
||||
def test_sonnet_46_legacy_slug(self):
|
||||
"""claude-sonnet-4-6 (legacy slug) is also priced at $3.10/$15.50."""
|
||||
result = compute_cost_usd("claude-sonnet-4-6", 1_000_000, 1_000_000)
|
||||
assert result == Decimal("18.0000")
|
||||
assert result == Decimal("18.6000")
|
||||
|
||||
def test_opus_47_pricing(self):
|
||||
"""claude-opus-4-7: $15/M input + $75/M output."""
|
||||
|
|
@ -104,21 +118,21 @@ class TestComputeCostUsd:
|
|||
|
||||
def test_zero_tokens(self):
|
||||
"""Zero tokens produce zero cost."""
|
||||
result = compute_cost_usd("claude-sonnet-4-6", 0, 0)
|
||||
result = compute_cost_usd("anthropic/claude-sonnet-4.5", 0, 0)
|
||||
assert result == Decimal("0")
|
||||
|
||||
def test_result_is_decimal(self):
|
||||
"""Return type is always Decimal, not float."""
|
||||
result = compute_cost_usd("claude-sonnet-4-6", 5000, 800)
|
||||
result = compute_cost_usd("anthropic/claude-sonnet-4.5", 5000, 800)
|
||||
assert isinstance(result, Decimal)
|
||||
|
||||
def test_small_realistic_call(self):
|
||||
"""Realistic 10K input + 1K output token call (Sonnet 4.6)."""
|
||||
# input: 10000/1_000_000 * 3 = 0.03000
|
||||
# output: 1000/1_000_000 * 15 = 0.01500
|
||||
# total: 0.04500
|
||||
result = compute_cost_usd("claude-sonnet-4-6", 10_000, 1_000)
|
||||
assert result == Decimal("0.0450")
|
||||
"""Realistic 10K input + 1K output token call (Sonnet 4.5 via OpenRouter)."""
|
||||
# input: 10000/1_000_000 * 3.10 = 0.03100
|
||||
# output: 1000/1_000_000 * 15.50 = 0.01550
|
||||
# total: 0.04650
|
||||
result = compute_cost_usd("anthropic/claude-sonnet-4.5", 10_000, 1_000)
|
||||
assert result == Decimal("0.0465")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -164,7 +178,7 @@ class TestLlmCallResult:
|
|||
analysis = MeetKevinAnalysis(**_valid_analysis_input())
|
||||
result = LlmCallResult(
|
||||
analysis=analysis,
|
||||
raw_response={"stop_reason": "tool_use"},
|
||||
raw_response={"finish_reason": "tool_calls"},
|
||||
prompt_tokens=5000,
|
||||
completion_tokens=800,
|
||||
cost_usd=Decimal("0.027"),
|
||||
|
|
@ -178,13 +192,13 @@ class TestLlmCallResult:
|
|||
cost = Decimal("0.027")
|
||||
result = LlmCallResult(
|
||||
analysis=analysis,
|
||||
raw_response={"stop_reason": "tool_use"},
|
||||
raw_response={"finish_reason": "tool_calls"},
|
||||
prompt_tokens=5000,
|
||||
completion_tokens=800,
|
||||
cost_usd=cost,
|
||||
)
|
||||
assert result.analysis is analysis
|
||||
assert result.raw_response == {"stop_reason": "tool_use"}
|
||||
assert result.raw_response == {"finish_reason": "tool_calls"}
|
||||
assert result.prompt_tokens == 5000
|
||||
assert result.completion_tokens == 800
|
||||
assert result.cost_usd == cost
|
||||
|
|
@ -202,10 +216,10 @@ class TestLlmAnalyzerHappyPath:
|
|||
async def test_returns_llm_call_result(self):
|
||||
"""analyze() returns an LlmCallResult with parsed MeetKevinAnalysis."""
|
||||
tool_input = _valid_analysis_input()
|
||||
resp = _make_anthropic_response(tool_input, in_tokens=5000, out_tokens=800)
|
||||
resp = _make_openai_response(tool_input, in_tokens=5000, out_tokens=800)
|
||||
client, mock_create = _make_client(resp)
|
||||
|
||||
analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1")
|
||||
analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1")
|
||||
result = await analyzer.analyze(
|
||||
title="Market Update",
|
||||
description="Kevin covers the latest market trends.",
|
||||
|
|
@ -226,10 +240,10 @@ class TestLlmAnalyzerHappyPath:
|
|||
async def test_analysis_fields_parsed_correctly(self):
|
||||
"""Parsed MeetKevinAnalysis has correct field values from tool input."""
|
||||
tool_input = _valid_analysis_input()
|
||||
resp = _make_anthropic_response(tool_input)
|
||||
resp = _make_openai_response(tool_input)
|
||||
client, _ = _make_client(resp)
|
||||
|
||||
analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1")
|
||||
analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1")
|
||||
result = await analyzer.analyze(
|
||||
title="Test Video",
|
||||
description="Description",
|
||||
|
|
@ -251,10 +265,10 @@ class TestLlmAnalyzerHappyPath:
|
|||
@pytest.mark.asyncio
|
||||
async def test_cost_usd_is_positive(self):
|
||||
"""cost_usd is calculated and positive for a valid token count."""
|
||||
resp = _make_anthropic_response(_valid_analysis_input(), in_tokens=10_000, out_tokens=1_000)
|
||||
resp = _make_openai_response(_valid_analysis_input(), in_tokens=10_000, out_tokens=1_000)
|
||||
client, _ = _make_client(resp)
|
||||
|
||||
analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1")
|
||||
analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1")
|
||||
result = await analyzer.analyze(
|
||||
title="Test",
|
||||
description="",
|
||||
|
|
@ -267,11 +281,11 @@ class TestLlmAnalyzerHappyPath:
|
|||
|
||||
@pytest.mark.asyncio
|
||||
async def test_api_called_with_tool_choice_forcing(self):
|
||||
"""messages.create is called with tool_choice forcing submit_analysis."""
|
||||
resp = _make_anthropic_response(_valid_analysis_input())
|
||||
"""chat.completions.create is called with tool_choice forcing submit_analysis."""
|
||||
resp = _make_openai_response(_valid_analysis_input())
|
||||
client, mock_create = _make_client(resp)
|
||||
|
||||
analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1")
|
||||
analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1")
|
||||
await analyzer.analyze(
|
||||
title="Test",
|
||||
description="",
|
||||
|
|
@ -282,15 +296,15 @@ class TestLlmAnalyzerHappyPath:
|
|||
|
||||
mock_create.assert_called_once()
|
||||
kwargs = mock_create.call_args.kwargs
|
||||
assert kwargs["tool_choice"] == {"type": "tool", "name": "submit_analysis"}
|
||||
assert kwargs["tool_choice"] == {"type": "function", "function": {"name": "submit_analysis"}}
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_api_called_with_cache_control_on_system(self):
|
||||
"""System prompt is passed with cache_control: {type: ephemeral}."""
|
||||
resp = _make_anthropic_response(_valid_analysis_input())
|
||||
async def test_api_called_with_system_prompt_in_messages(self):
|
||||
"""System prompt is passed as a system role message in the messages list."""
|
||||
resp = _make_openai_response(_valid_analysis_input())
|
||||
client, mock_create = _make_client(resp)
|
||||
|
||||
analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1")
|
||||
analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1")
|
||||
await analyzer.analyze(
|
||||
title="Test",
|
||||
description="",
|
||||
|
|
@ -300,19 +314,18 @@ class TestLlmAnalyzerHappyPath:
|
|||
)
|
||||
|
||||
kwargs = mock_create.call_args.kwargs
|
||||
system = kwargs["system"]
|
||||
assert isinstance(system, list)
|
||||
assert len(system) >= 1
|
||||
assert system[0]["type"] == "text"
|
||||
assert system[0]["cache_control"] == {"type": "ephemeral"}
|
||||
messages = kwargs["messages"]
|
||||
assert isinstance(messages, list)
|
||||
assert messages[0]["role"] == "system"
|
||||
assert SYSTEM_PROMPT in messages[0]["content"]
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_api_called_with_correct_model(self):
|
||||
"""messages.create is called with the model passed to LlmAnalyzer."""
|
||||
resp = _make_anthropic_response(_valid_analysis_input())
|
||||
"""chat.completions.create is called with the model passed to LlmAnalyzer."""
|
||||
resp = _make_openai_response(_valid_analysis_input())
|
||||
client, mock_create = _make_client(resp)
|
||||
|
||||
analyzer = LlmAnalyzer(client=client, model="claude-opus-4-7", prompt_version="v1")
|
||||
analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1")
|
||||
await analyzer.analyze(
|
||||
title="Test",
|
||||
description="",
|
||||
|
|
@ -322,15 +335,15 @@ class TestLlmAnalyzerHappyPath:
|
|||
)
|
||||
|
||||
kwargs = mock_create.call_args.kwargs
|
||||
assert kwargs["model"] == "claude-opus-4-7"
|
||||
assert kwargs["model"] == "anthropic/claude-sonnet-4.5"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_api_called_with_submit_analysis_tool(self):
|
||||
"""Tool definition includes name='submit_analysis'."""
|
||||
resp = _make_anthropic_response(_valid_analysis_input())
|
||||
"""Tool definition includes function name 'submit_analysis'."""
|
||||
resp = _make_openai_response(_valid_analysis_input())
|
||||
client, mock_create = _make_client(resp)
|
||||
|
||||
analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1")
|
||||
analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1")
|
||||
await analyzer.analyze(
|
||||
title="Test",
|
||||
description="",
|
||||
|
|
@ -341,15 +354,18 @@ class TestLlmAnalyzerHappyPath:
|
|||
|
||||
kwargs = mock_create.call_args.kwargs
|
||||
tools = kwargs["tools"]
|
||||
assert any(t.get("name") == "submit_analysis" for t in tools)
|
||||
assert any(
|
||||
t.get("type") == "function" and t.get("function", {}).get("name") == "submit_analysis"
|
||||
for t in tools
|
||||
)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_raw_response_is_captured(self):
|
||||
"""raw_response in LlmCallResult holds serializable dict."""
|
||||
resp = _make_anthropic_response(_valid_analysis_input())
|
||||
resp = _make_openai_response(_valid_analysis_input())
|
||||
client, _ = _make_client(resp)
|
||||
|
||||
analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1")
|
||||
analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1")
|
||||
result = await analyzer.analyze(
|
||||
title="Test",
|
||||
description="",
|
||||
|
|
@ -363,7 +379,7 @@ class TestLlmAnalyzerHappyPath:
|
|||
@pytest.mark.asyncio
|
||||
async def test_transcript_segments_included_in_user_message(self):
|
||||
"""User message contains timestamped segment lines from transcript_segments."""
|
||||
resp = _make_anthropic_response(_valid_analysis_input())
|
||||
resp = _make_openai_response(_valid_analysis_input())
|
||||
client, mock_create = _make_client(resp)
|
||||
|
||||
segments = [
|
||||
|
|
@ -371,7 +387,7 @@ class TestLlmAnalyzerHappyPath:
|
|||
{"start": 5.0, "end": 10.0, "text": "Let's talk stocks."},
|
||||
]
|
||||
|
||||
analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1")
|
||||
analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1")
|
||||
await analyzer.analyze(
|
||||
title="Test",
|
||||
description="",
|
||||
|
|
@ -381,8 +397,8 @@ class TestLlmAnalyzerHappyPath:
|
|||
)
|
||||
|
||||
kwargs = mock_create.call_args.kwargs
|
||||
user_content = kwargs["messages"][0]["content"]
|
||||
# The user message should contain the segment text
|
||||
# user message is the second entry in messages list
|
||||
user_content = kwargs["messages"][1]["content"]
|
||||
assert "Hello world." in user_content
|
||||
assert "Let's talk stocks." in user_content
|
||||
|
||||
|
|
@ -396,22 +412,23 @@ class TestLlmAnalyzerFailurePaths:
|
|||
"""Failure path tests."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_no_tool_use_block_raises_value_error(self):
|
||||
"""If response has no tool_use block, raises ValueError mentioning tool_use."""
|
||||
# Response with a text block instead of tool_use
|
||||
text_block = MagicMock()
|
||||
text_block.type = "text"
|
||||
text_block.text = "Here is my analysis..."
|
||||
async def test_no_tool_calls_raises_value_error(self):
|
||||
"""If response message has no tool_calls, raises ValueError."""
|
||||
msg = MagicMock()
|
||||
msg.tool_calls = None
|
||||
|
||||
choice = MagicMock()
|
||||
choice.message = msg
|
||||
choice.finish_reason = "stop"
|
||||
|
||||
resp = MagicMock()
|
||||
resp.content = [text_block]
|
||||
resp.usage = MagicMock(input_tokens=5000, output_tokens=800)
|
||||
resp.stop_reason = "end_turn"
|
||||
resp.choices = [choice]
|
||||
resp.usage = MagicMock(prompt_tokens=5000, completion_tokens=800)
|
||||
|
||||
client, _ = _make_client(resp)
|
||||
|
||||
analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1")
|
||||
with pytest.raises(ValueError, match="tool_use"):
|
||||
analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1")
|
||||
with pytest.raises(ValueError):
|
||||
await analyzer.analyze(
|
||||
title="Test",
|
||||
description="",
|
||||
|
|
@ -421,16 +438,22 @@ class TestLlmAnalyzerFailurePaths:
|
|||
)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_empty_content_raises_value_error(self):
|
||||
"""If response content is empty, raises ValueError."""
|
||||
async def test_empty_tool_calls_raises_value_error(self):
|
||||
"""If response message has empty tool_calls list, raises ValueError."""
|
||||
msg = MagicMock()
|
||||
msg.tool_calls = []
|
||||
|
||||
choice = MagicMock()
|
||||
choice.message = msg
|
||||
choice.finish_reason = "stop"
|
||||
|
||||
resp = MagicMock()
|
||||
resp.content = []
|
||||
resp.usage = MagicMock(input_tokens=5000, output_tokens=800)
|
||||
resp.stop_reason = "tool_use"
|
||||
resp.choices = [choice]
|
||||
resp.usage = MagicMock(prompt_tokens=5000, completion_tokens=800)
|
||||
|
||||
client, _ = _make_client(resp)
|
||||
|
||||
analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1")
|
||||
analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1")
|
||||
with pytest.raises(ValueError):
|
||||
await analyzer.analyze(
|
||||
title="Test",
|
||||
|
|
@ -446,10 +469,10 @@ class TestLlmAnalyzerFailurePaths:
|
|||
bad_input = _valid_analysis_input()
|
||||
bad_input["market_outlook_direction"] = "extremely_bullish" # not a valid enum
|
||||
|
||||
resp = _make_anthropic_response(bad_input)
|
||||
resp = _make_openai_response(bad_input)
|
||||
client, _ = _make_client(resp)
|
||||
|
||||
analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1")
|
||||
analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1")
|
||||
with pytest.raises(Exception): # pydantic ValidationError or ValueError
|
||||
await analyzer.analyze(
|
||||
title="Test",
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue