trading/tests/services/meet_kevin_watcher/test_llm_analyzer.py

"""Tests for the OpenRouter LLM analyzer (Task 7).

Tests use MagicMock/AsyncMock to avoid real API calls.
"""

import json
from datetime import datetime, timezone
from decimal import Decimal
from unittest.mock import AsyncMock, MagicMock

import pytest

from services.meet_kevin_watcher.llm_analyzer import (
    SYSTEM_PROMPT,
    LlmAnalyzer,
    LlmCallResult,
    compute_cost_usd,
)
from shared.schemas.meet_kevin import (
    MarketOutlook,
    MeetKevinAnalysis,
    TickerAction,
    TimeHorizon,
)


# ---------------------------------------------------------------------------
# Test helpers
# ---------------------------------------------------------------------------

def _make_openai_response(tool_args: dict, in_tokens: int = 5000, out_tokens: int = 800):
    """Mock an OpenAI ChatCompletion response with one tool_call."""
    tool_call = MagicMock()
    tool_call.function = MagicMock()
    tool_call.function.name = "submit_analysis"
    tool_call.function.arguments = json.dumps(tool_args)

    msg = MagicMock()
    msg.tool_calls = [tool_call]

    choice = MagicMock()
    choice.message = msg
    choice.finish_reason = "tool_calls"

    resp = MagicMock()
    resp.choices = [choice]
    resp.usage = MagicMock(prompt_tokens=in_tokens, completion_tokens=out_tokens)
    return resp


def _valid_analysis_input() -> dict:
    """Return a dict that Pydantic can validate into MeetKevinAnalysis."""
    return {
        "market_outlook_direction": "bullish",
        "market_outlook_reasoning": "Strong earnings and low unemployment.",
        "macro_themes": ["Fed pivot", "AI boom"],
        "key_risks": ["Inflation rebound", "Credit crunch"],
        "summary": "Kevin discussed the current bull market and highlighted several tech stocks.",
        "tickers": [
            {
                "symbol": "NVDA",
                "action": "buy",
                "conviction": 0.85,
                "time_horizon": "months",
                "rationale_quote": "AI infrastructure buildout has years to run",
                "video_timestamp_seconds": 320,
            }
        ],
    }


def _make_client(response=None):
    """Return a mocked AsyncOpenAI client with chat.completions.create wired up."""
    mock_create = AsyncMock(return_value=response)
    mock_completions = MagicMock()
    mock_completions.create = mock_create
    mock_chat = MagicMock()
    mock_chat.completions = mock_completions

    client = MagicMock()
    client.chat = mock_chat
    return client, mock_create


# ---------------------------------------------------------------------------
# compute_cost_usd
# ---------------------------------------------------------------------------


class TestComputeCostUsd:
    """Verify monetary cost calculations using Decimal arithmetic."""

    def test_sonnet_45_openrouter_pricing(self):
        """anthropic/claude-sonnet-4.5: $3.10/M input + $15.50/M output."""
        # 1M input + 1M output = $3.10 + $15.50 = $18.60
        result = compute_cost_usd("anthropic/claude-sonnet-4.5", 1_000_000, 1_000_000)
        assert result == Decimal("18.6000")

    def test_sonnet_46_legacy_slug(self):
        """claude-sonnet-4-6 (legacy slug) is also priced at $3.10/$15.50."""
        result = compute_cost_usd("claude-sonnet-4-6", 1_000_000, 1_000_000)
        assert result == Decimal("18.6000")

    def test_opus_47_pricing(self):
        """claude-opus-4-7: $15/M input + $75/M output."""
        result = compute_cost_usd("claude-opus-4-7", 1_000_000, 1_000_000)
        assert result == Decimal("90.0000")

    def test_haiku_45_pricing(self):
        """claude-haiku-4-5-20251001: $1/M input + $5/M output."""
        result = compute_cost_usd("claude-haiku-4-5-20251001", 1_000_000, 1_000_000)
        assert result == Decimal("6.0000")

    def test_unknown_model_returns_zero(self):
        """Unknown model logs warning and returns Decimal('0')."""
        result = compute_cost_usd("unknown-model", 1000, 1000)
        assert result == Decimal("0")

    def test_zero_tokens(self):
        """Zero tokens produce zero cost."""
        result = compute_cost_usd("anthropic/claude-sonnet-4.5", 0, 0)
        assert result == Decimal("0")

    def test_result_is_decimal(self):
        """Return type is always Decimal, not float."""
        result = compute_cost_usd("anthropic/claude-sonnet-4.5", 5000, 800)
        assert isinstance(result, Decimal)

    def test_small_realistic_call(self):
        """Realistic 10K input + 1K output token call (Sonnet 4.5 via OpenRouter)."""
        # input:  10000/1_000_000 * 3.10 = 0.03100
        # output:  1000/1_000_000 * 15.50 = 0.01550
        # total: 0.04650
        result = compute_cost_usd("anthropic/claude-sonnet-4.5", 10_000, 1_000)
        assert result == Decimal("0.0465")


# ---------------------------------------------------------------------------
# SYSTEM_PROMPT
# ---------------------------------------------------------------------------


class TestSystemPrompt:
    """Verify the system prompt has the required content markers."""

    def test_contains_submit_analysis(self):
        """SYSTEM_PROMPT must reference the tool name 'submit_analysis'."""
        assert "submit_analysis" in SYSTEM_PROMPT

    def test_contains_ticker(self):
        """SYSTEM_PROMPT must mention 'ticker' (case-insensitive)."""
        assert "ticker" in SYSTEM_PROMPT.lower()

    def test_is_substantial(self):
        """SYSTEM_PROMPT should be at least 300 words (analyst guidance)."""
        word_count = len(SYSTEM_PROMPT.split())
        assert word_count >= 300, f"SYSTEM_PROMPT is only {word_count} words"

    def test_mentions_conviction(self):
        """SYSTEM_PROMPT should mention conviction scoring."""
        assert "conviction" in SYSTEM_PROMPT.lower()

    def test_mentions_time_horizon(self):
        """SYSTEM_PROMPT should describe time_horizon field."""
        assert "time_horizon" in SYSTEM_PROMPT or "time horizon" in SYSTEM_PROMPT.lower()


# ---------------------------------------------------------------------------
# LlmCallResult dataclass
# ---------------------------------------------------------------------------


class TestLlmCallResult:
    """Verify LlmCallResult shape and immutability."""

    def test_is_frozen(self):
        """LlmCallResult must be a frozen dataclass."""
        analysis = MeetKevinAnalysis(**_valid_analysis_input())
        result = LlmCallResult(
            analysis=analysis,
            raw_response={"finish_reason": "tool_calls"},
            prompt_tokens=5000,
            completion_tokens=800,
            cost_usd=Decimal("0.027"),
        )
        with pytest.raises((AttributeError, TypeError)):
            result.prompt_tokens = 9999  # type: ignore

    def test_fields_accessible(self):
        """All five fields are accessible on LlmCallResult."""
        analysis = MeetKevinAnalysis(**_valid_analysis_input())
        cost = Decimal("0.027")
        result = LlmCallResult(
            analysis=analysis,
            raw_response={"finish_reason": "tool_calls"},
            prompt_tokens=5000,
            completion_tokens=800,
            cost_usd=cost,
        )
        assert result.analysis is analysis
        assert result.raw_response == {"finish_reason": "tool_calls"}
        assert result.prompt_tokens == 5000
        assert result.completion_tokens == 800
        assert result.cost_usd == cost


# ---------------------------------------------------------------------------
# LlmAnalyzer.analyze — happy path
# ---------------------------------------------------------------------------


class TestLlmAnalyzerHappyPath:
    """Happy-path tests for the analyzer."""

    @pytest.mark.asyncio
    async def test_returns_llm_call_result(self):
        """analyze() returns an LlmCallResult with parsed MeetKevinAnalysis."""
        tool_input = _valid_analysis_input()
        resp = _make_openai_response(tool_input, in_tokens=5000, out_tokens=800)
        client, mock_create = _make_client(resp)

        analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1")
        result = await analyzer.analyze(
            title="Market Update",
            description="Kevin covers the latest market trends.",
            published_at=datetime(2026, 5, 21, 12, 0, 0, tzinfo=timezone.utc),
            transcript_text="Welcome to today's update. NVDA is looking strong.",
            transcript_segments=[
                {"start": 0.0, "end": 5.0, "text": "Welcome to today's update."},
                {"start": 5.0, "end": 10.0, "text": "NVDA is looking strong."},
            ],
        )

        assert isinstance(result, LlmCallResult)
        assert isinstance(result.analysis, MeetKevinAnalysis)
        assert result.prompt_tokens == 5000
        assert result.completion_tokens == 800

    @pytest.mark.asyncio
    async def test_analysis_fields_parsed_correctly(self):
        """Parsed MeetKevinAnalysis has correct field values from tool input."""
        tool_input = _valid_analysis_input()
        resp = _make_openai_response(tool_input)
        client, _ = _make_client(resp)

        analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1")
        result = await analyzer.analyze(
            title="Test Video",
            description="Description",
            published_at=datetime(2026, 5, 21, tzinfo=timezone.utc),
            transcript_text="Some transcript.",
            transcript_segments=[],
        )

        analysis = result.analysis
        assert analysis.market_outlook_direction == MarketOutlook.BULLISH
        assert analysis.market_outlook_reasoning == "Strong earnings and low unemployment."
        assert "Fed pivot" in analysis.macro_themes
        assert len(analysis.tickers) == 1
        assert analysis.tickers[0].symbol == "NVDA"
        assert analysis.tickers[0].action == TickerAction.BUY
        assert analysis.tickers[0].conviction == pytest.approx(0.85)
        assert analysis.tickers[0].time_horizon == TimeHorizon.MONTHS

    @pytest.mark.asyncio
    async def test_cost_usd_is_positive(self):
        """cost_usd is calculated and positive for a valid token count."""
        resp = _make_openai_response(_valid_analysis_input(), in_tokens=10_000, out_tokens=1_000)
        client, _ = _make_client(resp)

        analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1")
        result = await analyzer.analyze(
            title="Test",
            description="",
            published_at=datetime(2026, 5, 21, tzinfo=timezone.utc),
            transcript_text="",
            transcript_segments=[],
        )

        assert result.cost_usd > Decimal("0")

    @pytest.mark.asyncio
    async def test_api_called_with_tool_choice_forcing(self):
        """chat.completions.create is called with tool_choice forcing submit_analysis."""
        resp = _make_openai_response(_valid_analysis_input())
        client, mock_create = _make_client(resp)

        analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1")
        await analyzer.analyze(
            title="Test",
            description="",
            published_at=datetime(2026, 5, 21, tzinfo=timezone.utc),
            transcript_text="",
            transcript_segments=[],
        )

        mock_create.assert_called_once()
        kwargs = mock_create.call_args.kwargs
        assert kwargs["tool_choice"] == {"type": "function", "function": {"name": "submit_analysis"}}

    @pytest.mark.asyncio
    async def test_api_called_with_system_prompt_in_messages(self):
        """System prompt is passed as a system role message in the messages list."""
        resp = _make_openai_response(_valid_analysis_input())
        client, mock_create = _make_client(resp)

        analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1")
        await analyzer.analyze(
            title="Test",
            description="",
            published_at=datetime(2026, 5, 21, tzinfo=timezone.utc),
            transcript_text="",
            transcript_segments=[],
        )

        kwargs = mock_create.call_args.kwargs
        messages = kwargs["messages"]
        assert isinstance(messages, list)
        assert messages[0]["role"] == "system"
        assert SYSTEM_PROMPT in messages[0]["content"]

    @pytest.mark.asyncio
    async def test_api_called_with_correct_model(self):
        """chat.completions.create is called with the model passed to LlmAnalyzer."""
        resp = _make_openai_response(_valid_analysis_input())
        client, mock_create = _make_client(resp)

        analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1")
        await analyzer.analyze(
            title="Test",
            description="",
            published_at=datetime(2026, 5, 21, tzinfo=timezone.utc),
            transcript_text="",
            transcript_segments=[],
        )

        kwargs = mock_create.call_args.kwargs
        assert kwargs["model"] == "anthropic/claude-sonnet-4.5"

    @pytest.mark.asyncio
    async def test_api_called_with_submit_analysis_tool(self):
        """Tool definition includes function name 'submit_analysis'."""
        resp = _make_openai_response(_valid_analysis_input())
        client, mock_create = _make_client(resp)

        analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1")
        await analyzer.analyze(
            title="Test",
            description="",
            published_at=datetime(2026, 5, 21, tzinfo=timezone.utc),
            transcript_text="",
            transcript_segments=[],
        )

        kwargs = mock_create.call_args.kwargs
        tools = kwargs["tools"]
        assert any(
            t.get("type") == "function" and t.get("function", {}).get("name") == "submit_analysis"
            for t in tools
        )

    @pytest.mark.asyncio
    async def test_raw_response_is_captured(self):
        """raw_response in LlmCallResult holds serializable dict."""
        resp = _make_openai_response(_valid_analysis_input())
        client, _ = _make_client(resp)

        analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1")
        result = await analyzer.analyze(
            title="Test",
            description="",
            published_at=datetime(2026, 5, 21, tzinfo=timezone.utc),
            transcript_text="",
            transcript_segments=[],
        )

        assert isinstance(result.raw_response, dict)

    @pytest.mark.asyncio
    async def test_transcript_segments_included_in_user_message(self):
        """User message contains timestamped segment lines from transcript_segments."""
        resp = _make_openai_response(_valid_analysis_input())
        client, mock_create = _make_client(resp)

        segments = [
            {"start": 0.0, "end": 5.0, "text": "Hello world."},
            {"start": 5.0, "end": 10.0, "text": "Let's talk stocks."},
        ]

        analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1")
        await analyzer.analyze(
            title="Test",
            description="",
            published_at=datetime(2026, 5, 21, tzinfo=timezone.utc),
            transcript_text="Hello world. Let's talk stocks.",
            transcript_segments=segments,
        )

        kwargs = mock_create.call_args.kwargs
        # user message is the second entry in messages list
        user_content = kwargs["messages"][1]["content"]
        assert "Hello world." in user_content
        assert "Let's talk stocks." in user_content


# ---------------------------------------------------------------------------
# LlmAnalyzer.analyze — failure paths
# ---------------------------------------------------------------------------


class TestLlmAnalyzerFailurePaths:
    """Failure path tests."""

    @pytest.mark.asyncio
    async def test_no_tool_calls_raises_value_error(self):
        """If response message has no tool_calls, raises ValueError."""
        msg = MagicMock()
        msg.tool_calls = None

        choice = MagicMock()
        choice.message = msg
        choice.finish_reason = "stop"

        resp = MagicMock()
        resp.choices = [choice]
        resp.usage = MagicMock(prompt_tokens=5000, completion_tokens=800)

        client, _ = _make_client(resp)

        analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1")
        with pytest.raises(ValueError):
            await analyzer.analyze(
                title="Test",
                description="",
                published_at=datetime(2026, 5, 21, tzinfo=timezone.utc),
                transcript_text="",
                transcript_segments=[],
            )

    @pytest.mark.asyncio
    async def test_empty_tool_calls_raises_value_error(self):
        """If response message has empty tool_calls list, raises ValueError."""
        msg = MagicMock()
        msg.tool_calls = []

        choice = MagicMock()
        choice.message = msg
        choice.finish_reason = "stop"

        resp = MagicMock()
        resp.choices = [choice]
        resp.usage = MagicMock(prompt_tokens=5000, completion_tokens=800)

        client, _ = _make_client(resp)

        analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1")
        with pytest.raises(ValueError):
            await analyzer.analyze(
                title="Test",
                description="",
                published_at=datetime(2026, 5, 21, tzinfo=timezone.utc),
                transcript_text="",
                transcript_segments=[],
            )

    @pytest.mark.asyncio
    async def test_invalid_tool_input_raises_validation_error(self):
        """Malformed tool input (invalid enum) raises a validation error."""
        bad_input = _valid_analysis_input()
        bad_input["market_outlook_direction"] = "extremely_bullish"  # not a valid enum

        resp = _make_openai_response(bad_input)
        client, _ = _make_client(resp)

        analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1")
        with pytest.raises(Exception):  # pydantic ValidationError or ValueError
            await analyzer.analyze(
                title="Test",
                description="",
                published_at=datetime(2026, 5, 21, tzinfo=timezone.utc),
                transcript_text="",
                transcript_segments=[],
            )