"""Tests for the Anthropic SDK LLM analyzer (Task 7). Tests use MagicMock/AsyncMock to avoid real API calls. """ from datetime import datetime, timezone from decimal import Decimal from unittest.mock import AsyncMock, MagicMock import pytest from services.meet_kevin_watcher.llm_analyzer import ( SYSTEM_PROMPT, LlmAnalyzer, LlmCallResult, compute_cost_usd, ) from shared.schemas.meet_kevin import ( MarketOutlook, MeetKevinAnalysis, TickerAction, TimeHorizon, ) # --------------------------------------------------------------------------- # Test helpers # --------------------------------------------------------------------------- def _make_anthropic_response(tool_input: dict, in_tokens: int = 5000, out_tokens: int = 800): """Mock an Anthropic Messages response with one tool_use block.""" block = MagicMock() block.type = "tool_use" block.name = "submit_analysis" block.input = tool_input resp = MagicMock() resp.content = [block] resp.usage = MagicMock(input_tokens=in_tokens, output_tokens=out_tokens) resp.stop_reason = "tool_use" return resp def _valid_analysis_input() -> dict: """Return a dict that Pydantic can validate into MeetKevinAnalysis.""" return { "market_outlook_direction": "bullish", "market_outlook_reasoning": "Strong earnings and low unemployment.", "macro_themes": ["Fed pivot", "AI boom"], "key_risks": ["Inflation rebound", "Credit crunch"], "summary": "Kevin discussed the current bull market and highlighted several tech stocks.", "tickers": [ { "symbol": "NVDA", "action": "buy", "conviction": 0.85, "time_horizon": "months", "rationale_quote": "AI infrastructure buildout has years to run", "video_timestamp_seconds": 320, } ], } def _make_client(response=None): """Return a mocked AsyncAnthropic client with messages.create wired up.""" mock_create = AsyncMock(return_value=response) mock_messages = MagicMock() mock_messages.create = mock_create client = MagicMock() client.messages = mock_messages return client, mock_create # --------------------------------------------------------------------------- # compute_cost_usd # --------------------------------------------------------------------------- class TestComputeCostUsd: """Verify monetary cost calculations using Decimal arithmetic.""" def test_sonnet_45_native_pricing(self): """claude-sonnet-4-5: $3/M input + $15/M output = $18/M total.""" # 1M input + 1M output = $3 + $15 = $18 result = compute_cost_usd("claude-sonnet-4-5", 1_000_000, 1_000_000) assert result == Decimal("18.0000") def test_sonnet_46_native_pricing(self): """claude-sonnet-4-6: same pricing as 4-5 ($3/$15).""" result = compute_cost_usd("claude-sonnet-4-6", 1_000_000, 1_000_000) assert result == Decimal("18.0000") def test_opus_47_pricing(self): """claude-opus-4-7: $15/M input + $75/M output.""" result = compute_cost_usd("claude-opus-4-7", 1_000_000, 1_000_000) assert result == Decimal("90.0000") def test_haiku_45_pricing(self): """claude-haiku-4-5-20251001: $1/M input + $5/M output.""" result = compute_cost_usd("claude-haiku-4-5-20251001", 1_000_000, 1_000_000) assert result == Decimal("6.0000") def test_unknown_model_returns_zero(self): """Unknown model logs warning and returns Decimal('0').""" result = compute_cost_usd("unknown-model", 1000, 1000) assert result == Decimal("0") def test_zero_tokens(self): """Zero tokens produce zero cost.""" result = compute_cost_usd("claude-sonnet-4-5", 0, 0) assert result == Decimal("0") def test_result_is_decimal(self): """Return type is always Decimal, not float.""" result = compute_cost_usd("claude-sonnet-4-5", 5000, 800) assert isinstance(result, Decimal) def test_small_realistic_call(self): """Realistic 10K input + 1K output token call (Sonnet 4.5 native).""" # input: 10000/1_000_000 * 3 = 0.03000 # output: 1000/1_000_000 * 15 = 0.01500 # total: 0.04500 result = compute_cost_usd("claude-sonnet-4-5", 10_000, 1_000) assert result == Decimal("0.0450") # --------------------------------------------------------------------------- # SYSTEM_PROMPT # --------------------------------------------------------------------------- class TestSystemPrompt: """Verify the system prompt has the required content markers.""" def test_contains_submit_analysis(self): """SYSTEM_PROMPT must reference the tool name 'submit_analysis'.""" assert "submit_analysis" in SYSTEM_PROMPT def test_contains_ticker(self): """SYSTEM_PROMPT must mention 'ticker' (case-insensitive).""" assert "ticker" in SYSTEM_PROMPT.lower() def test_is_substantial(self): """SYSTEM_PROMPT should be at least 300 words (analyst guidance).""" word_count = len(SYSTEM_PROMPT.split()) assert word_count >= 300, f"SYSTEM_PROMPT is only {word_count} words" def test_mentions_conviction(self): """SYSTEM_PROMPT should mention conviction scoring.""" assert "conviction" in SYSTEM_PROMPT.lower() def test_mentions_time_horizon(self): """SYSTEM_PROMPT should describe time_horizon field.""" assert "time_horizon" in SYSTEM_PROMPT or "time horizon" in SYSTEM_PROMPT.lower() # --------------------------------------------------------------------------- # LlmCallResult dataclass # --------------------------------------------------------------------------- class TestLlmCallResult: """Verify LlmCallResult shape and immutability.""" def test_is_frozen(self): """LlmCallResult must be a frozen dataclass.""" analysis = MeetKevinAnalysis(**_valid_analysis_input()) result = LlmCallResult( analysis=analysis, raw_response={"stop_reason": "tool_use"}, prompt_tokens=5000, completion_tokens=800, cost_usd=Decimal("0.027"), ) with pytest.raises((AttributeError, TypeError)): result.prompt_tokens = 9999 # type: ignore def test_fields_accessible(self): """All five fields are accessible on LlmCallResult.""" analysis = MeetKevinAnalysis(**_valid_analysis_input()) cost = Decimal("0.027") result = LlmCallResult( analysis=analysis, raw_response={"stop_reason": "tool_use"}, prompt_tokens=5000, completion_tokens=800, cost_usd=cost, ) assert result.analysis is analysis assert result.raw_response == {"stop_reason": "tool_use"} assert result.prompt_tokens == 5000 assert result.completion_tokens == 800 assert result.cost_usd == cost # --------------------------------------------------------------------------- # LlmAnalyzer.analyze — happy path # --------------------------------------------------------------------------- class TestLlmAnalyzerHappyPath: """Happy-path tests for the analyzer.""" @pytest.mark.asyncio async def test_returns_llm_call_result(self): """analyze() returns an LlmCallResult with parsed MeetKevinAnalysis.""" tool_input = _valid_analysis_input() resp = _make_anthropic_response(tool_input, in_tokens=5000, out_tokens=800) client, mock_create = _make_client(resp) analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-5", prompt_version="v1") result = await analyzer.analyze( title="Market Update", description="Kevin covers the latest market trends.", published_at=datetime(2026, 5, 21, 12, 0, 0, tzinfo=timezone.utc), transcript_text="Welcome to today's update. NVDA is looking strong.", transcript_segments=[ {"start": 0.0, "end": 5.0, "text": "Welcome to today's update."}, {"start": 5.0, "end": 10.0, "text": "NVDA is looking strong."}, ], ) assert isinstance(result, LlmCallResult) assert isinstance(result.analysis, MeetKevinAnalysis) assert result.prompt_tokens == 5000 assert result.completion_tokens == 800 @pytest.mark.asyncio async def test_analysis_fields_parsed_correctly(self): """Parsed MeetKevinAnalysis has correct field values from tool input.""" tool_input = _valid_analysis_input() resp = _make_anthropic_response(tool_input) client, _ = _make_client(resp) analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-5", prompt_version="v1") result = await analyzer.analyze( title="Test Video", description="Description", published_at=datetime(2026, 5, 21, tzinfo=timezone.utc), transcript_text="Some transcript.", transcript_segments=[], ) analysis = result.analysis assert analysis.market_outlook_direction == MarketOutlook.BULLISH assert analysis.market_outlook_reasoning == "Strong earnings and low unemployment." assert "Fed pivot" in analysis.macro_themes assert len(analysis.tickers) == 1 assert analysis.tickers[0].symbol == "NVDA" assert analysis.tickers[0].action == TickerAction.BUY assert analysis.tickers[0].conviction == pytest.approx(0.85) assert analysis.tickers[0].time_horizon == TimeHorizon.MONTHS @pytest.mark.asyncio async def test_cost_usd_is_positive(self): """cost_usd is calculated and positive for a valid token count.""" resp = _make_anthropic_response(_valid_analysis_input(), in_tokens=10_000, out_tokens=1_000) client, _ = _make_client(resp) analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-5", prompt_version="v1") result = await analyzer.analyze( title="Test", description="", published_at=datetime(2026, 5, 21, tzinfo=timezone.utc), transcript_text="", transcript_segments=[], ) assert result.cost_usd > Decimal("0") @pytest.mark.asyncio async def test_api_called_with_tool_choice_forcing(self): """messages.create is called with tool_choice forcing submit_analysis.""" resp = _make_anthropic_response(_valid_analysis_input()) client, mock_create = _make_client(resp) analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-5", prompt_version="v1") await analyzer.analyze( title="Test", description="", published_at=datetime(2026, 5, 21, tzinfo=timezone.utc), transcript_text="", transcript_segments=[], ) mock_create.assert_called_once() kwargs = mock_create.call_args.kwargs assert kwargs["tool_choice"] == {"type": "tool", "name": "submit_analysis"} @pytest.mark.asyncio async def test_api_called_with_system_prompt_in_system_param(self): """System prompt is passed as the system parameter (list with cache_control).""" resp = _make_anthropic_response(_valid_analysis_input()) client, mock_create = _make_client(resp) analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-5", prompt_version="v1") await analyzer.analyze( title="Test", description="", published_at=datetime(2026, 5, 21, tzinfo=timezone.utc), transcript_text="", transcript_segments=[], ) kwargs = mock_create.call_args.kwargs system = kwargs["system"] assert isinstance(system, list) assert system[0]["type"] == "text" assert SYSTEM_PROMPT in system[0]["text"] assert system[0]["cache_control"] == {"type": "ephemeral"} @pytest.mark.asyncio async def test_api_called_with_correct_model(self): """messages.create is called with the model passed to LlmAnalyzer.""" resp = _make_anthropic_response(_valid_analysis_input()) client, mock_create = _make_client(resp) analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-5", prompt_version="v1") await analyzer.analyze( title="Test", description="", published_at=datetime(2026, 5, 21, tzinfo=timezone.utc), transcript_text="", transcript_segments=[], ) kwargs = mock_create.call_args.kwargs assert kwargs["model"] == "claude-sonnet-4-5" @pytest.mark.asyncio async def test_api_called_with_submit_analysis_tool(self): """Tool definition includes name 'submit_analysis' with input_schema.""" resp = _make_anthropic_response(_valid_analysis_input()) client, mock_create = _make_client(resp) analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-5", prompt_version="v1") await analyzer.analyze( title="Test", description="", published_at=datetime(2026, 5, 21, tzinfo=timezone.utc), transcript_text="", transcript_segments=[], ) kwargs = mock_create.call_args.kwargs tools = kwargs["tools"] assert any( t.get("name") == "submit_analysis" and "input_schema" in t for t in tools ) @pytest.mark.asyncio async def test_raw_response_is_captured(self): """raw_response in LlmCallResult holds serializable dict.""" resp = _make_anthropic_response(_valid_analysis_input()) client, _ = _make_client(resp) analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-5", prompt_version="v1") result = await analyzer.analyze( title="Test", description="", published_at=datetime(2026, 5, 21, tzinfo=timezone.utc), transcript_text="", transcript_segments=[], ) assert isinstance(result.raw_response, dict) @pytest.mark.asyncio async def test_transcript_segments_included_in_user_message(self): """User message contains timestamped segment lines from transcript_segments.""" resp = _make_anthropic_response(_valid_analysis_input()) client, mock_create = _make_client(resp) segments = [ {"start": 0.0, "end": 5.0, "text": "Hello world."}, {"start": 5.0, "end": 10.0, "text": "Let's talk stocks."}, ] analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-5", prompt_version="v1") await analyzer.analyze( title="Test", description="", published_at=datetime(2026, 5, 21, tzinfo=timezone.utc), transcript_text="Hello world. Let's talk stocks.", transcript_segments=segments, ) kwargs = mock_create.call_args.kwargs # user message is in the messages list user_content = kwargs["messages"][0]["content"] assert "Hello world." in user_content assert "Let's talk stocks." in user_content # --------------------------------------------------------------------------- # LlmAnalyzer.analyze — failure paths # --------------------------------------------------------------------------- class TestLlmAnalyzerFailurePaths: """Failure path tests.""" @pytest.mark.asyncio async def test_no_tool_use_block_raises_value_error(self): """If response has no tool_use block, raises ValueError containing 'tool_use'.""" resp = MagicMock() resp.content = [MagicMock(type="text")] resp.usage = MagicMock(input_tokens=5000, output_tokens=800) resp.stop_reason = "end_turn" client, _ = _make_client(resp) analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-5", prompt_version="v1") with pytest.raises(ValueError, match="tool_use"): await analyzer.analyze( title="Test", description="", published_at=datetime(2026, 5, 21, tzinfo=timezone.utc), transcript_text="", transcript_segments=[], ) @pytest.mark.asyncio async def test_empty_content_raises_value_error(self): """If response.content is empty, raises ValueError.""" resp = MagicMock() resp.content = [] resp.usage = MagicMock(input_tokens=5000, output_tokens=800) resp.stop_reason = "end_turn" client, _ = _make_client(resp) analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-5", prompt_version="v1") with pytest.raises(ValueError): await analyzer.analyze( title="Test", description="", published_at=datetime(2026, 5, 21, tzinfo=timezone.utc), transcript_text="", transcript_segments=[], ) @pytest.mark.asyncio async def test_invalid_tool_input_raises_validation_error(self): """Malformed tool input (invalid enum) raises a validation error.""" bad_input = _valid_analysis_input() bad_input["market_outlook_direction"] = "extremely_bullish" # not a valid enum resp = _make_anthropic_response(bad_input) client, _ = _make_client(resp) analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-5", prompt_version="v1") with pytest.raises(Exception): # pydantic ValidationError or ValueError await analyzer.analyze( title="Test", description="", published_at=datetime(2026, 5, 21, tzinfo=timezone.utc), transcript_text="", transcript_segments=[], )