refactor(meet-kevin): switch LLM back to native Anthropic SDK with OAuth bearer
Previous refactor (89f01ad) moved to OpenRouter because no sk-ant-api-* key
was found in Vault. Turns out claude-agent-service-spare-{1,2} hold
sk-ant-oat01-* OAuth tokens (108 chars, scope user:inference, 1-year TTL,
minted via 'claude setup-token' — see memory id=832).
These tokens work with the Anthropic SDK via the auth_token= constructor
argument (routes to Authorization: Bearer ... instead of x-api-key: ...).
They consume the Enterprise Claude subscription quota rather than
per-call billing, so the OpenRouter zero-credit problem goes away.
- llm_analyzer.py: revert OpenAI client to AsyncAnthropic; tool-use API
+ cache_control restored
- config.py: openrouter_api_key -> anthropic_oauth_token; model slug
reverted from anthropic/claude-sonnet-4.5 -> claude-sonnet-4-5
- main.py: AsyncOpenAI -> AsyncAnthropic(auth_token=...), drop OpenRouter
attribution headers
- pyproject: openai>=1.50 -> anthropic>=0.40 in meet_kevin extras
- tests: mocks ported back to messages.create + tool_use blocks
This commit is contained in:
parent
4f4d365652
commit
8a1d03a967
5 changed files with 211 additions and 235 deletions
|
|
@ -20,7 +20,7 @@ news = ["feedparser>=6.0", "praw>=7.7", "asyncpraw>=7.7", "httpx>=0.27"]
|
||||||
sentiment = ["transformers>=4.38", "torch>=2.2", "ollama>=0.1"]
|
sentiment = ["transformers>=4.38", "torch>=2.2", "ollama>=0.1"]
|
||||||
trading = ["alpaca-py>=0.21", "pytz>=2024.1", "yfinance>=0.2", "httpx>=0.27"]
|
trading = ["alpaca-py>=0.21", "pytz>=2024.1", "yfinance>=0.2", "httpx>=0.27"]
|
||||||
backtester = ["numpy>=1.26", "pandas>=2.2"]
|
backtester = ["numpy>=1.26", "pandas>=2.2"]
|
||||||
meet_kevin = ["yt-dlp>=2025.12", "feedparser>=6.0", "openai>=1.50", "httpx>=0.27"]
|
meet_kevin = ["yt-dlp>=2025.12", "feedparser>=6.0", "anthropic>=0.40", "httpx>=0.27"]
|
||||||
dev = ["pytest>=8.0", "pytest-asyncio>=0.23", "pytest-cov>=4.1", "ruff>=0.3", "mypy>=1.8", "httpx>=0.27"]
|
dev = ["pytest>=8.0", "pytest-asyncio>=0.23", "pytest-cov>=4.1", "ruff>=0.3", "mypy>=1.8", "httpx>=0.27"]
|
||||||
|
|
||||||
[build-system]
|
[build-system]
|
||||||
|
|
|
||||||
|
|
@ -18,12 +18,12 @@ class MeetKevinWatcherConfig(BaseConfig):
|
||||||
|
|
||||||
# LLM analysis settings
|
# LLM analysis settings
|
||||||
meet_kevin_max_llm_retries: int = 3
|
meet_kevin_max_llm_retries: int = 3
|
||||||
meet_kevin_llm_model: str = "anthropic/claude-sonnet-4.5"
|
meet_kevin_llm_model: str = "claude-sonnet-4-5"
|
||||||
meet_kevin_prompt_version: str = "v1"
|
meet_kevin_prompt_version: str = "v1"
|
||||||
meet_kevin_daily_cost_cap_usd: float = 5.0
|
meet_kevin_daily_cost_cap_usd: float = 5.0
|
||||||
|
|
||||||
# API credentials
|
# API credentials
|
||||||
openrouter_api_key: str = ""
|
anthropic_oauth_token: str = ""
|
||||||
|
|
||||||
# Runtime settings
|
# Runtime settings
|
||||||
meet_kevin_workdir: str = "/tmp/meet_kevin_captions"
|
meet_kevin_workdir: str = "/tmp/meet_kevin_captions"
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
"""OpenRouter LLM analyzer for Meet Kevin video transcripts.
|
"""Anthropic SDK LLM analyzer for Meet Kevin video transcripts.
|
||||||
|
|
||||||
Calls Claude Sonnet (via OpenRouter) with function-calling forcing to extract
|
Calls Claude Sonnet (via native Anthropic SDK with OAuth bearer token) with
|
||||||
structured MeetKevinAnalysis from a video transcript.
|
tool-use forcing to extract structured MeetKevinAnalysis from a video transcript.
|
||||||
|
|
||||||
Public API:
|
Public API:
|
||||||
SYSTEM_PROMPT — module-level analyst instructions
|
SYSTEM_PROMPT — module-level analyst instructions
|
||||||
|
|
@ -10,14 +10,13 @@ Public API:
|
||||||
LlmAnalyzer — async class; .analyze() does the API call
|
LlmAnalyzer — async class; .analyze() does the API call
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import json
|
|
||||||
import logging
|
import logging
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from decimal import Decimal
|
from decimal import Decimal
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from openai import AsyncOpenAI
|
from anthropic import AsyncAnthropic
|
||||||
|
|
||||||
from shared.schemas.meet_kevin import MeetKevinAnalysis
|
from shared.schemas.meet_kevin import MeetKevinAnalysis
|
||||||
|
|
||||||
|
|
@ -25,16 +24,16 @@ logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Pricing table (USD per 1 000 000 tokens: input, output)
|
# Pricing table (USD per 1 000 000 tokens: input, output)
|
||||||
# OpenRouter pass-through pricing (~3% markup over Anthropic list)
|
# Native Anthropic list pricing. With OAuth/Enterprise tokens real billing
|
||||||
|
# is via subscription quota, but we still compute notional USD for the
|
||||||
|
# daily-cap accounting logic.
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
_PRICING: dict[str, tuple[Decimal, Decimal]] = {
|
_PRICING: dict[str, tuple[Decimal, Decimal]] = {
|
||||||
"claude-sonnet-4-6": (Decimal("3.10"), Decimal("15.50")),
|
"claude-sonnet-4-5": (Decimal("3"), Decimal("15")),
|
||||||
|
"claude-sonnet-4-6": (Decimal("3"), Decimal("15")),
|
||||||
"claude-opus-4-7": (Decimal("15"), Decimal("75")),
|
"claude-opus-4-7": (Decimal("15"), Decimal("75")),
|
||||||
"claude-haiku-4-5-20251001": (Decimal("1"), Decimal("5")),
|
"claude-haiku-4-5-20251001": (Decimal("1"), Decimal("5")),
|
||||||
# OpenRouter model slugs
|
|
||||||
"anthropic/claude-sonnet-4.5": (Decimal("3.10"), Decimal("15.50")),
|
|
||||||
"anthropic/claude-sonnet-4.6": (Decimal("3.10"), Decimal("15.50")),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
@ -141,17 +140,15 @@ Now read the transcript provided in the user message and call `submit_analysis`.
|
||||||
""".strip()
|
""".strip()
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Tool definition (OpenAI function-calling format)
|
# Tool definition (Anthropic tool-use format)
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
_ANALYSIS_TOOL_OPENAI: dict[str, Any] = {
|
_ANALYSIS_TOOL: dict[str, Any] = {
|
||||||
"type": "function",
|
|
||||||
"function": {
|
|
||||||
"name": "submit_analysis",
|
"name": "submit_analysis",
|
||||||
"description": (
|
"description": (
|
||||||
"Submit the structured analysis of one Meet Kevin video. Call this exactly once."
|
"Submit the structured analysis of one Meet Kevin video. Call this exactly once."
|
||||||
),
|
),
|
||||||
"parameters": {
|
"input_schema": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"required": [
|
"required": [
|
||||||
"market_outlook_direction",
|
"market_outlook_direction",
|
||||||
|
|
@ -239,7 +236,6 @@ _ANALYSIS_TOOL_OPENAI: dict[str, Any] = {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
@ -296,15 +292,15 @@ _MAX_SEGMENTS = 1000
|
||||||
|
|
||||||
|
|
||||||
class LlmAnalyzer:
|
class LlmAnalyzer:
|
||||||
"""Calls Claude (via OpenRouter) to extract structured analysis from a video transcript.
|
"""Calls Claude (via native Anthropic SDK) to extract structured analysis from a video transcript.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
client: Configured AsyncOpenAI client pointed at OpenRouter.
|
client: Configured AsyncAnthropic client with OAuth bearer token.
|
||||||
model: Model identifier (e.g. "anthropic/claude-sonnet-4.5").
|
model: Model identifier (e.g. "claude-sonnet-4-5").
|
||||||
prompt_version: Prompt version string stored in kevin_analyses.
|
prompt_version: Prompt version string stored in kevin_analyses.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, client: AsyncOpenAI, model: str, prompt_version: str) -> None:
|
def __init__(self, client: AsyncAnthropic, model: str, prompt_version: str) -> None:
|
||||||
self._client = client
|
self._client = client
|
||||||
self._model = model
|
self._model = model
|
||||||
self._prompt_version = prompt_version
|
self._prompt_version = prompt_version
|
||||||
|
|
@ -331,8 +327,8 @@ class LlmAnalyzer:
|
||||||
LlmCallResult with parsed MeetKevinAnalysis and token accounting.
|
LlmCallResult with parsed MeetKevinAnalysis and token accounting.
|
||||||
|
|
||||||
Raises:
|
Raises:
|
||||||
ValueError: If the response contains no tool_calls.
|
ValueError: If the response contains no tool_use block.
|
||||||
pydantic.ValidationError: If function arguments fail schema validation.
|
pydantic.ValidationError: If tool input fails schema validation.
|
||||||
"""
|
"""
|
||||||
user_msg = self._build_user_message(
|
user_msg = self._build_user_message(
|
||||||
title=title,
|
title=title,
|
||||||
|
|
@ -342,34 +338,39 @@ class LlmAnalyzer:
|
||||||
transcript_segments=transcript_segments,
|
transcript_segments=transcript_segments,
|
||||||
)
|
)
|
||||||
|
|
||||||
response = await self._client.chat.completions.create(
|
response = await self._client.messages.create(
|
||||||
model=self._model,
|
model=self._model,
|
||||||
max_tokens=4096,
|
max_tokens=4096,
|
||||||
messages=[
|
system=[
|
||||||
{"role": "system", "content": SYSTEM_PROMPT},
|
{"type": "text", "text": SYSTEM_PROMPT, "cache_control": {"type": "ephemeral"}}
|
||||||
{"role": "user", "content": user_msg},
|
|
||||||
],
|
],
|
||||||
tools=[_ANALYSIS_TOOL_OPENAI],
|
tools=[_ANALYSIS_TOOL],
|
||||||
tool_choice={"type": "function", "function": {"name": "submit_analysis"}},
|
tool_choice={"type": "tool", "name": "submit_analysis"},
|
||||||
|
messages=[{"role": "user", "content": user_msg}],
|
||||||
)
|
)
|
||||||
|
|
||||||
message = response.choices[0].message
|
# Find the first tool_use block in the response
|
||||||
if not message.tool_calls:
|
tool_use_block = None
|
||||||
|
for block in response.content:
|
||||||
|
if block.type == "tool_use":
|
||||||
|
tool_use_block = block
|
||||||
|
break
|
||||||
|
|
||||||
|
if tool_use_block is None:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"LLM response contained no tool_calls (expected submit_analysis function call)"
|
"LLM response contained no tool_use block (expected submit_analysis call)"
|
||||||
)
|
)
|
||||||
|
|
||||||
tool_call = message.tool_calls[0]
|
tool_input: dict = tool_use_block.input
|
||||||
tool_input = json.loads(tool_call.function.arguments)
|
|
||||||
analysis = MeetKevinAnalysis.model_validate(tool_input)
|
analysis = MeetKevinAnalysis.model_validate(tool_input)
|
||||||
|
|
||||||
prompt_tokens: int = response.usage.prompt_tokens
|
prompt_tokens: int = response.usage.input_tokens
|
||||||
completion_tokens: int = response.usage.completion_tokens
|
completion_tokens: int = response.usage.output_tokens
|
||||||
cost_usd = compute_cost_usd(self._model, prompt_tokens, completion_tokens)
|
cost_usd = compute_cost_usd(self._model, prompt_tokens, completion_tokens)
|
||||||
|
|
||||||
raw_response: dict = {
|
raw_response: dict = {
|
||||||
"finish_reason": response.choices[0].finish_reason,
|
"stop_reason": response.stop_reason,
|
||||||
"tool_name": tool_call.function.name,
|
"tool_name": tool_use_block.name,
|
||||||
"tool_input": tool_input,
|
"tool_input": tool_input,
|
||||||
"usage": {
|
"usage": {
|
||||||
"input_tokens": prompt_tokens,
|
"input_tokens": prompt_tokens,
|
||||||
|
|
|
||||||
|
|
@ -16,7 +16,7 @@ from datetime import timezone
|
||||||
from decimal import Decimal
|
from decimal import Decimal
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
from openai import AsyncOpenAI
|
from anthropic import AsyncAnthropic
|
||||||
from sqlalchemy import select
|
from sqlalchemy import select
|
||||||
from sqlalchemy.dialects.postgresql import insert as pg_insert
|
from sqlalchemy.dialects.postgresql import insert as pg_insert
|
||||||
|
|
||||||
|
|
@ -179,14 +179,9 @@ async def run() -> None:
|
||||||
# Database
|
# Database
|
||||||
engine, session_factory = create_db(config)
|
engine, session_factory = create_db(config)
|
||||||
|
|
||||||
# OpenRouter client + LLM analyzer
|
# Anthropic client + LLM analyzer (OAuth bearer token)
|
||||||
client = AsyncOpenAI(
|
client = AsyncAnthropic(
|
||||||
api_key=config.openrouter_api_key,
|
auth_token=config.anthropic_oauth_token,
|
||||||
base_url="https://openrouter.ai/api/v1",
|
|
||||||
default_headers={
|
|
||||||
"HTTP-Referer": "https://trading.viktorbarzin.me",
|
|
||||||
"X-Title": "trading-bot meet-kevin",
|
|
||||||
},
|
|
||||||
)
|
)
|
||||||
analyzer = LlmAnalyzer(
|
analyzer = LlmAnalyzer(
|
||||||
client=client,
|
client=client,
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,8 @@
|
||||||
"""Tests for the OpenRouter LLM analyzer (Task 7).
|
"""Tests for the Anthropic SDK LLM analyzer (Task 7).
|
||||||
|
|
||||||
Tests use MagicMock/AsyncMock to avoid real API calls.
|
Tests use MagicMock/AsyncMock to avoid real API calls.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import json
|
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
from decimal import Decimal
|
from decimal import Decimal
|
||||||
from unittest.mock import AsyncMock, MagicMock
|
from unittest.mock import AsyncMock, MagicMock
|
||||||
|
|
@ -28,23 +27,17 @@ from shared.schemas.meet_kevin import (
|
||||||
# Test helpers
|
# Test helpers
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
def _make_openai_response(tool_args: dict, in_tokens: int = 5000, out_tokens: int = 800):
|
def _make_anthropic_response(tool_input: dict, in_tokens: int = 5000, out_tokens: int = 800):
|
||||||
"""Mock an OpenAI ChatCompletion response with one tool_call."""
|
"""Mock an Anthropic Messages response with one tool_use block."""
|
||||||
tool_call = MagicMock()
|
block = MagicMock()
|
||||||
tool_call.function = MagicMock()
|
block.type = "tool_use"
|
||||||
tool_call.function.name = "submit_analysis"
|
block.name = "submit_analysis"
|
||||||
tool_call.function.arguments = json.dumps(tool_args)
|
block.input = tool_input
|
||||||
|
|
||||||
msg = MagicMock()
|
|
||||||
msg.tool_calls = [tool_call]
|
|
||||||
|
|
||||||
choice = MagicMock()
|
|
||||||
choice.message = msg
|
|
||||||
choice.finish_reason = "tool_calls"
|
|
||||||
|
|
||||||
resp = MagicMock()
|
resp = MagicMock()
|
||||||
resp.choices = [choice]
|
resp.content = [block]
|
||||||
resp.usage = MagicMock(prompt_tokens=in_tokens, completion_tokens=out_tokens)
|
resp.usage = MagicMock(input_tokens=in_tokens, output_tokens=out_tokens)
|
||||||
|
resp.stop_reason = "tool_use"
|
||||||
return resp
|
return resp
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -70,15 +63,13 @@ def _valid_analysis_input() -> dict:
|
||||||
|
|
||||||
|
|
||||||
def _make_client(response=None):
|
def _make_client(response=None):
|
||||||
"""Return a mocked AsyncOpenAI client with chat.completions.create wired up."""
|
"""Return a mocked AsyncAnthropic client with messages.create wired up."""
|
||||||
mock_create = AsyncMock(return_value=response)
|
mock_create = AsyncMock(return_value=response)
|
||||||
mock_completions = MagicMock()
|
mock_messages = MagicMock()
|
||||||
mock_completions.create = mock_create
|
mock_messages.create = mock_create
|
||||||
mock_chat = MagicMock()
|
|
||||||
mock_chat.completions = mock_completions
|
|
||||||
|
|
||||||
client = MagicMock()
|
client = MagicMock()
|
||||||
client.chat = mock_chat
|
client.messages = mock_messages
|
||||||
return client, mock_create
|
return client, mock_create
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -90,16 +81,16 @@ def _make_client(response=None):
|
||||||
class TestComputeCostUsd:
|
class TestComputeCostUsd:
|
||||||
"""Verify monetary cost calculations using Decimal arithmetic."""
|
"""Verify monetary cost calculations using Decimal arithmetic."""
|
||||||
|
|
||||||
def test_sonnet_45_openrouter_pricing(self):
|
def test_sonnet_45_native_pricing(self):
|
||||||
"""anthropic/claude-sonnet-4.5: $3.10/M input + $15.50/M output."""
|
"""claude-sonnet-4-5: $3/M input + $15/M output = $18/M total."""
|
||||||
# 1M input + 1M output = $3.10 + $15.50 = $18.60
|
# 1M input + 1M output = $3 + $15 = $18
|
||||||
result = compute_cost_usd("anthropic/claude-sonnet-4.5", 1_000_000, 1_000_000)
|
result = compute_cost_usd("claude-sonnet-4-5", 1_000_000, 1_000_000)
|
||||||
assert result == Decimal("18.6000")
|
assert result == Decimal("18.0000")
|
||||||
|
|
||||||
def test_sonnet_46_legacy_slug(self):
|
def test_sonnet_46_native_pricing(self):
|
||||||
"""claude-sonnet-4-6 (legacy slug) is also priced at $3.10/$15.50."""
|
"""claude-sonnet-4-6: same pricing as 4-5 ($3/$15)."""
|
||||||
result = compute_cost_usd("claude-sonnet-4-6", 1_000_000, 1_000_000)
|
result = compute_cost_usd("claude-sonnet-4-6", 1_000_000, 1_000_000)
|
||||||
assert result == Decimal("18.6000")
|
assert result == Decimal("18.0000")
|
||||||
|
|
||||||
def test_opus_47_pricing(self):
|
def test_opus_47_pricing(self):
|
||||||
"""claude-opus-4-7: $15/M input + $75/M output."""
|
"""claude-opus-4-7: $15/M input + $75/M output."""
|
||||||
|
|
@ -118,21 +109,21 @@ class TestComputeCostUsd:
|
||||||
|
|
||||||
def test_zero_tokens(self):
|
def test_zero_tokens(self):
|
||||||
"""Zero tokens produce zero cost."""
|
"""Zero tokens produce zero cost."""
|
||||||
result = compute_cost_usd("anthropic/claude-sonnet-4.5", 0, 0)
|
result = compute_cost_usd("claude-sonnet-4-5", 0, 0)
|
||||||
assert result == Decimal("0")
|
assert result == Decimal("0")
|
||||||
|
|
||||||
def test_result_is_decimal(self):
|
def test_result_is_decimal(self):
|
||||||
"""Return type is always Decimal, not float."""
|
"""Return type is always Decimal, not float."""
|
||||||
result = compute_cost_usd("anthropic/claude-sonnet-4.5", 5000, 800)
|
result = compute_cost_usd("claude-sonnet-4-5", 5000, 800)
|
||||||
assert isinstance(result, Decimal)
|
assert isinstance(result, Decimal)
|
||||||
|
|
||||||
def test_small_realistic_call(self):
|
def test_small_realistic_call(self):
|
||||||
"""Realistic 10K input + 1K output token call (Sonnet 4.5 via OpenRouter)."""
|
"""Realistic 10K input + 1K output token call (Sonnet 4.5 native)."""
|
||||||
# input: 10000/1_000_000 * 3.10 = 0.03100
|
# input: 10000/1_000_000 * 3 = 0.03000
|
||||||
# output: 1000/1_000_000 * 15.50 = 0.01550
|
# output: 1000/1_000_000 * 15 = 0.01500
|
||||||
# total: 0.04650
|
# total: 0.04500
|
||||||
result = compute_cost_usd("anthropic/claude-sonnet-4.5", 10_000, 1_000)
|
result = compute_cost_usd("claude-sonnet-4-5", 10_000, 1_000)
|
||||||
assert result == Decimal("0.0465")
|
assert result == Decimal("0.0450")
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
@ -178,7 +169,7 @@ class TestLlmCallResult:
|
||||||
analysis = MeetKevinAnalysis(**_valid_analysis_input())
|
analysis = MeetKevinAnalysis(**_valid_analysis_input())
|
||||||
result = LlmCallResult(
|
result = LlmCallResult(
|
||||||
analysis=analysis,
|
analysis=analysis,
|
||||||
raw_response={"finish_reason": "tool_calls"},
|
raw_response={"stop_reason": "tool_use"},
|
||||||
prompt_tokens=5000,
|
prompt_tokens=5000,
|
||||||
completion_tokens=800,
|
completion_tokens=800,
|
||||||
cost_usd=Decimal("0.027"),
|
cost_usd=Decimal("0.027"),
|
||||||
|
|
@ -192,13 +183,13 @@ class TestLlmCallResult:
|
||||||
cost = Decimal("0.027")
|
cost = Decimal("0.027")
|
||||||
result = LlmCallResult(
|
result = LlmCallResult(
|
||||||
analysis=analysis,
|
analysis=analysis,
|
||||||
raw_response={"finish_reason": "tool_calls"},
|
raw_response={"stop_reason": "tool_use"},
|
||||||
prompt_tokens=5000,
|
prompt_tokens=5000,
|
||||||
completion_tokens=800,
|
completion_tokens=800,
|
||||||
cost_usd=cost,
|
cost_usd=cost,
|
||||||
)
|
)
|
||||||
assert result.analysis is analysis
|
assert result.analysis is analysis
|
||||||
assert result.raw_response == {"finish_reason": "tool_calls"}
|
assert result.raw_response == {"stop_reason": "tool_use"}
|
||||||
assert result.prompt_tokens == 5000
|
assert result.prompt_tokens == 5000
|
||||||
assert result.completion_tokens == 800
|
assert result.completion_tokens == 800
|
||||||
assert result.cost_usd == cost
|
assert result.cost_usd == cost
|
||||||
|
|
@ -216,10 +207,10 @@ class TestLlmAnalyzerHappyPath:
|
||||||
async def test_returns_llm_call_result(self):
|
async def test_returns_llm_call_result(self):
|
||||||
"""analyze() returns an LlmCallResult with parsed MeetKevinAnalysis."""
|
"""analyze() returns an LlmCallResult with parsed MeetKevinAnalysis."""
|
||||||
tool_input = _valid_analysis_input()
|
tool_input = _valid_analysis_input()
|
||||||
resp = _make_openai_response(tool_input, in_tokens=5000, out_tokens=800)
|
resp = _make_anthropic_response(tool_input, in_tokens=5000, out_tokens=800)
|
||||||
client, mock_create = _make_client(resp)
|
client, mock_create = _make_client(resp)
|
||||||
|
|
||||||
analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1")
|
analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-5", prompt_version="v1")
|
||||||
result = await analyzer.analyze(
|
result = await analyzer.analyze(
|
||||||
title="Market Update",
|
title="Market Update",
|
||||||
description="Kevin covers the latest market trends.",
|
description="Kevin covers the latest market trends.",
|
||||||
|
|
@ -240,10 +231,10 @@ class TestLlmAnalyzerHappyPath:
|
||||||
async def test_analysis_fields_parsed_correctly(self):
|
async def test_analysis_fields_parsed_correctly(self):
|
||||||
"""Parsed MeetKevinAnalysis has correct field values from tool input."""
|
"""Parsed MeetKevinAnalysis has correct field values from tool input."""
|
||||||
tool_input = _valid_analysis_input()
|
tool_input = _valid_analysis_input()
|
||||||
resp = _make_openai_response(tool_input)
|
resp = _make_anthropic_response(tool_input)
|
||||||
client, _ = _make_client(resp)
|
client, _ = _make_client(resp)
|
||||||
|
|
||||||
analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1")
|
analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-5", prompt_version="v1")
|
||||||
result = await analyzer.analyze(
|
result = await analyzer.analyze(
|
||||||
title="Test Video",
|
title="Test Video",
|
||||||
description="Description",
|
description="Description",
|
||||||
|
|
@ -265,10 +256,10 @@ class TestLlmAnalyzerHappyPath:
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_cost_usd_is_positive(self):
|
async def test_cost_usd_is_positive(self):
|
||||||
"""cost_usd is calculated and positive for a valid token count."""
|
"""cost_usd is calculated and positive for a valid token count."""
|
||||||
resp = _make_openai_response(_valid_analysis_input(), in_tokens=10_000, out_tokens=1_000)
|
resp = _make_anthropic_response(_valid_analysis_input(), in_tokens=10_000, out_tokens=1_000)
|
||||||
client, _ = _make_client(resp)
|
client, _ = _make_client(resp)
|
||||||
|
|
||||||
analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1")
|
analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-5", prompt_version="v1")
|
||||||
result = await analyzer.analyze(
|
result = await analyzer.analyze(
|
||||||
title="Test",
|
title="Test",
|
||||||
description="",
|
description="",
|
||||||
|
|
@ -281,11 +272,11 @@ class TestLlmAnalyzerHappyPath:
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_api_called_with_tool_choice_forcing(self):
|
async def test_api_called_with_tool_choice_forcing(self):
|
||||||
"""chat.completions.create is called with tool_choice forcing submit_analysis."""
|
"""messages.create is called with tool_choice forcing submit_analysis."""
|
||||||
resp = _make_openai_response(_valid_analysis_input())
|
resp = _make_anthropic_response(_valid_analysis_input())
|
||||||
client, mock_create = _make_client(resp)
|
client, mock_create = _make_client(resp)
|
||||||
|
|
||||||
analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1")
|
analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-5", prompt_version="v1")
|
||||||
await analyzer.analyze(
|
await analyzer.analyze(
|
||||||
title="Test",
|
title="Test",
|
||||||
description="",
|
description="",
|
||||||
|
|
@ -296,15 +287,15 @@ class TestLlmAnalyzerHappyPath:
|
||||||
|
|
||||||
mock_create.assert_called_once()
|
mock_create.assert_called_once()
|
||||||
kwargs = mock_create.call_args.kwargs
|
kwargs = mock_create.call_args.kwargs
|
||||||
assert kwargs["tool_choice"] == {"type": "function", "function": {"name": "submit_analysis"}}
|
assert kwargs["tool_choice"] == {"type": "tool", "name": "submit_analysis"}
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_api_called_with_system_prompt_in_messages(self):
|
async def test_api_called_with_system_prompt_in_system_param(self):
|
||||||
"""System prompt is passed as a system role message in the messages list."""
|
"""System prompt is passed as the system parameter (list with cache_control)."""
|
||||||
resp = _make_openai_response(_valid_analysis_input())
|
resp = _make_anthropic_response(_valid_analysis_input())
|
||||||
client, mock_create = _make_client(resp)
|
client, mock_create = _make_client(resp)
|
||||||
|
|
||||||
analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1")
|
analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-5", prompt_version="v1")
|
||||||
await analyzer.analyze(
|
await analyzer.analyze(
|
||||||
title="Test",
|
title="Test",
|
||||||
description="",
|
description="",
|
||||||
|
|
@ -314,18 +305,19 @@ class TestLlmAnalyzerHappyPath:
|
||||||
)
|
)
|
||||||
|
|
||||||
kwargs = mock_create.call_args.kwargs
|
kwargs = mock_create.call_args.kwargs
|
||||||
messages = kwargs["messages"]
|
system = kwargs["system"]
|
||||||
assert isinstance(messages, list)
|
assert isinstance(system, list)
|
||||||
assert messages[0]["role"] == "system"
|
assert system[0]["type"] == "text"
|
||||||
assert SYSTEM_PROMPT in messages[0]["content"]
|
assert SYSTEM_PROMPT in system[0]["text"]
|
||||||
|
assert system[0]["cache_control"] == {"type": "ephemeral"}
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_api_called_with_correct_model(self):
|
async def test_api_called_with_correct_model(self):
|
||||||
"""chat.completions.create is called with the model passed to LlmAnalyzer."""
|
"""messages.create is called with the model passed to LlmAnalyzer."""
|
||||||
resp = _make_openai_response(_valid_analysis_input())
|
resp = _make_anthropic_response(_valid_analysis_input())
|
||||||
client, mock_create = _make_client(resp)
|
client, mock_create = _make_client(resp)
|
||||||
|
|
||||||
analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1")
|
analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-5", prompt_version="v1")
|
||||||
await analyzer.analyze(
|
await analyzer.analyze(
|
||||||
title="Test",
|
title="Test",
|
||||||
description="",
|
description="",
|
||||||
|
|
@ -335,15 +327,15 @@ class TestLlmAnalyzerHappyPath:
|
||||||
)
|
)
|
||||||
|
|
||||||
kwargs = mock_create.call_args.kwargs
|
kwargs = mock_create.call_args.kwargs
|
||||||
assert kwargs["model"] == "anthropic/claude-sonnet-4.5"
|
assert kwargs["model"] == "claude-sonnet-4-5"
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_api_called_with_submit_analysis_tool(self):
|
async def test_api_called_with_submit_analysis_tool(self):
|
||||||
"""Tool definition includes function name 'submit_analysis'."""
|
"""Tool definition includes name 'submit_analysis' with input_schema."""
|
||||||
resp = _make_openai_response(_valid_analysis_input())
|
resp = _make_anthropic_response(_valid_analysis_input())
|
||||||
client, mock_create = _make_client(resp)
|
client, mock_create = _make_client(resp)
|
||||||
|
|
||||||
analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1")
|
analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-5", prompt_version="v1")
|
||||||
await analyzer.analyze(
|
await analyzer.analyze(
|
||||||
title="Test",
|
title="Test",
|
||||||
description="",
|
description="",
|
||||||
|
|
@ -355,17 +347,17 @@ class TestLlmAnalyzerHappyPath:
|
||||||
kwargs = mock_create.call_args.kwargs
|
kwargs = mock_create.call_args.kwargs
|
||||||
tools = kwargs["tools"]
|
tools = kwargs["tools"]
|
||||||
assert any(
|
assert any(
|
||||||
t.get("type") == "function" and t.get("function", {}).get("name") == "submit_analysis"
|
t.get("name") == "submit_analysis" and "input_schema" in t
|
||||||
for t in tools
|
for t in tools
|
||||||
)
|
)
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_raw_response_is_captured(self):
|
async def test_raw_response_is_captured(self):
|
||||||
"""raw_response in LlmCallResult holds serializable dict."""
|
"""raw_response in LlmCallResult holds serializable dict."""
|
||||||
resp = _make_openai_response(_valid_analysis_input())
|
resp = _make_anthropic_response(_valid_analysis_input())
|
||||||
client, _ = _make_client(resp)
|
client, _ = _make_client(resp)
|
||||||
|
|
||||||
analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1")
|
analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-5", prompt_version="v1")
|
||||||
result = await analyzer.analyze(
|
result = await analyzer.analyze(
|
||||||
title="Test",
|
title="Test",
|
||||||
description="",
|
description="",
|
||||||
|
|
@ -379,7 +371,7 @@ class TestLlmAnalyzerHappyPath:
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_transcript_segments_included_in_user_message(self):
|
async def test_transcript_segments_included_in_user_message(self):
|
||||||
"""User message contains timestamped segment lines from transcript_segments."""
|
"""User message contains timestamped segment lines from transcript_segments."""
|
||||||
resp = _make_openai_response(_valid_analysis_input())
|
resp = _make_anthropic_response(_valid_analysis_input())
|
||||||
client, mock_create = _make_client(resp)
|
client, mock_create = _make_client(resp)
|
||||||
|
|
||||||
segments = [
|
segments = [
|
||||||
|
|
@ -387,7 +379,7 @@ class TestLlmAnalyzerHappyPath:
|
||||||
{"start": 5.0, "end": 10.0, "text": "Let's talk stocks."},
|
{"start": 5.0, "end": 10.0, "text": "Let's talk stocks."},
|
||||||
]
|
]
|
||||||
|
|
||||||
analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1")
|
analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-5", prompt_version="v1")
|
||||||
await analyzer.analyze(
|
await analyzer.analyze(
|
||||||
title="Test",
|
title="Test",
|
||||||
description="",
|
description="",
|
||||||
|
|
@ -397,8 +389,8 @@ class TestLlmAnalyzerHappyPath:
|
||||||
)
|
)
|
||||||
|
|
||||||
kwargs = mock_create.call_args.kwargs
|
kwargs = mock_create.call_args.kwargs
|
||||||
# user message is the second entry in messages list
|
# user message is in the messages list
|
||||||
user_content = kwargs["messages"][1]["content"]
|
user_content = kwargs["messages"][0]["content"]
|
||||||
assert "Hello world." in user_content
|
assert "Hello world." in user_content
|
||||||
assert "Let's talk stocks." in user_content
|
assert "Let's talk stocks." in user_content
|
||||||
|
|
||||||
|
|
@ -412,23 +404,17 @@ class TestLlmAnalyzerFailurePaths:
|
||||||
"""Failure path tests."""
|
"""Failure path tests."""
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_no_tool_calls_raises_value_error(self):
|
async def test_no_tool_use_block_raises_value_error(self):
|
||||||
"""If response message has no tool_calls, raises ValueError."""
|
"""If response has no tool_use block, raises ValueError containing 'tool_use'."""
|
||||||
msg = MagicMock()
|
|
||||||
msg.tool_calls = None
|
|
||||||
|
|
||||||
choice = MagicMock()
|
|
||||||
choice.message = msg
|
|
||||||
choice.finish_reason = "stop"
|
|
||||||
|
|
||||||
resp = MagicMock()
|
resp = MagicMock()
|
||||||
resp.choices = [choice]
|
resp.content = [MagicMock(type="text")]
|
||||||
resp.usage = MagicMock(prompt_tokens=5000, completion_tokens=800)
|
resp.usage = MagicMock(input_tokens=5000, output_tokens=800)
|
||||||
|
resp.stop_reason = "end_turn"
|
||||||
|
|
||||||
client, _ = _make_client(resp)
|
client, _ = _make_client(resp)
|
||||||
|
|
||||||
analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1")
|
analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-5", prompt_version="v1")
|
||||||
with pytest.raises(ValueError):
|
with pytest.raises(ValueError, match="tool_use"):
|
||||||
await analyzer.analyze(
|
await analyzer.analyze(
|
||||||
title="Test",
|
title="Test",
|
||||||
description="",
|
description="",
|
||||||
|
|
@ -438,22 +424,16 @@ class TestLlmAnalyzerFailurePaths:
|
||||||
)
|
)
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_empty_tool_calls_raises_value_error(self):
|
async def test_empty_content_raises_value_error(self):
|
||||||
"""If response message has empty tool_calls list, raises ValueError."""
|
"""If response.content is empty, raises ValueError."""
|
||||||
msg = MagicMock()
|
|
||||||
msg.tool_calls = []
|
|
||||||
|
|
||||||
choice = MagicMock()
|
|
||||||
choice.message = msg
|
|
||||||
choice.finish_reason = "stop"
|
|
||||||
|
|
||||||
resp = MagicMock()
|
resp = MagicMock()
|
||||||
resp.choices = [choice]
|
resp.content = []
|
||||||
resp.usage = MagicMock(prompt_tokens=5000, completion_tokens=800)
|
resp.usage = MagicMock(input_tokens=5000, output_tokens=800)
|
||||||
|
resp.stop_reason = "end_turn"
|
||||||
|
|
||||||
client, _ = _make_client(resp)
|
client, _ = _make_client(resp)
|
||||||
|
|
||||||
analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1")
|
analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-5", prompt_version="v1")
|
||||||
with pytest.raises(ValueError):
|
with pytest.raises(ValueError):
|
||||||
await analyzer.analyze(
|
await analyzer.analyze(
|
||||||
title="Test",
|
title="Test",
|
||||||
|
|
@ -469,10 +449,10 @@ class TestLlmAnalyzerFailurePaths:
|
||||||
bad_input = _valid_analysis_input()
|
bad_input = _valid_analysis_input()
|
||||||
bad_input["market_outlook_direction"] = "extremely_bullish" # not a valid enum
|
bad_input["market_outlook_direction"] = "extremely_bullish" # not a valid enum
|
||||||
|
|
||||||
resp = _make_openai_response(bad_input)
|
resp = _make_anthropic_response(bad_input)
|
||||||
client, _ = _make_client(resp)
|
client, _ = _make_client(resp)
|
||||||
|
|
||||||
analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1")
|
analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-5", prompt_version="v1")
|
||||||
with pytest.raises(Exception): # pydantic ValidationError or ValueError
|
with pytest.raises(Exception): # pydantic ValidationError or ValueError
|
||||||
await analyzer.analyze(
|
await analyzer.analyze(
|
||||||
title="Test",
|
title="Test",
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue