From 89f01ad9c07d81059a352afe03d100c1f96e872b Mon Sep 17 00:00:00 2001
From: Viktor Barzin <me@viktorbarzin.me>
Date: Fri, 22 May 2026 09:52:55 +0000
Subject: [PATCH] refactor(meet-kevin): switch LLM analyzer to OpenRouter via
 OpenAI SDK

User's Vault has openrouter_api_key but no direct sk-ant-* Anthropic key.
OpenRouter passes through Claude Sonnet 4.6 (~3% markup over Anthropic
list pricing) and matches the existing gpt_mini_endpoint pattern used
by recruiter-responder.

- Replace anthropic.AsyncAnthropic with openai.AsyncOpenAI + base_url
- Convert Anthropic tool-use API to OpenAI function-calling
- System prompt unchanged (analyst instructions are model-agnostic)
- Drop cache_control (not in OpenAI API); revisit later if cost matters
- Model slug: anthropic/claude-sonnet-4.5 (OpenRouter's current Claude tier)
- Pricing: $3.10/M input, $15.50/M output (OpenRouter pass-through)
- Config field anthropic_api_key -> openrouter_api_key
- pyproject extras: anthropic>=0.40 -> openai>=1.50

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 pyproject.toml                                |   2 +-
 services/meet_kevin_watcher/config.py         |   4 +-
 services/meet_kevin_watcher/llm_analyzer.py   | 252 +++++++++---------
 services/meet_kevin_watcher/main.py           |  17 +-
 .../meet_kevin_watcher/test_llm_analyzer.py   | 185 +++++++------
 5 files changed, 244 insertions(+), 216 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 36be7f3..da170e6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -20,7 +20,7 @@ news = ["feedparser>=6.0", "praw>=7.7", "asyncpraw>=7.7", "httpx>=0.27"]
 sentiment = ["transformers>=4.38", "torch>=2.2", "ollama>=0.1"]
 trading = ["alpaca-py>=0.21", "pytz>=2024.1", "yfinance>=0.2", "httpx>=0.27"]
 backtester = ["numpy>=1.26", "pandas>=2.2"]
-meet_kevin = ["yt-dlp>=2025.12", "feedparser>=6.0", "anthropic>=0.40", "httpx>=0.27"]
+meet_kevin = ["yt-dlp>=2025.12", "feedparser>=6.0", "openai>=1.50", "httpx>=0.27"]
 dev = ["pytest>=8.0", "pytest-asyncio>=0.23", "pytest-cov>=4.1", "ruff>=0.3", "mypy>=1.8", "httpx>=0.27"]
 
 [build-system]
diff --git a/services/meet_kevin_watcher/config.py b/services/meet_kevin_watcher/config.py
index 7d3dbd5..229ca34 100644
--- a/services/meet_kevin_watcher/config.py
+++ b/services/meet_kevin_watcher/config.py
@@ -18,12 +18,12 @@ class MeetKevinWatcherConfig(BaseConfig):
 
     # LLM analysis settings
     meet_kevin_max_llm_retries: int = 3
-    meet_kevin_llm_model: str = "claude-sonnet-4-6"
+    meet_kevin_llm_model: str = "anthropic/claude-sonnet-4.5"
     meet_kevin_prompt_version: str = "v1"
     meet_kevin_daily_cost_cap_usd: float = 5.0
 
     # API credentials
-    anthropic_api_key: str = ""
+    openrouter_api_key: str = ""
 
     # Runtime settings
     meet_kevin_workdir: str = "/tmp/meet_kevin_captions"
diff --git a/services/meet_kevin_watcher/llm_analyzer.py b/services/meet_kevin_watcher/llm_analyzer.py
index 0768912..35008e6 100644
--- a/services/meet_kevin_watcher/llm_analyzer.py
+++ b/services/meet_kevin_watcher/llm_analyzer.py
@@ -1,9 +1,7 @@
-"""Claude LLM analyzer for Meet Kevin video transcripts.
+"""OpenRouter LLM analyzer for Meet Kevin video transcripts.
 
-Calls Claude Sonnet 4.6 with tool-use forcing to extract structured
-MeetKevinAnalysis from a video transcript. Uses prompt caching on the
-system block to reduce cost across videos processed within the same
-5-minute window.
+Calls Claude Sonnet (via OpenRouter) with function-calling forcing to extract
+structured MeetKevinAnalysis from a video transcript.
 
 Public API:
   SYSTEM_PROMPT         — module-level analyst instructions
@@ -12,13 +10,14 @@ Public API:
   LlmAnalyzer           — async class; .analyze() does the API call
 """
 
+import json
 import logging
 from dataclasses import dataclass
 from datetime import datetime
 from decimal import Decimal
 from typing import Any
 
-from anthropic import AsyncAnthropic
+from openai import AsyncOpenAI
 
 from shared.schemas.meet_kevin import MeetKevinAnalysis
 
@@ -26,12 +25,16 @@ logger = logging.getLogger(__name__)
 
 # ---------------------------------------------------------------------------
 # Pricing table  (USD per 1 000 000 tokens: input, output)
+# OpenRouter pass-through pricing (~3% markup over Anthropic list)
 # ---------------------------------------------------------------------------
 
 _PRICING: dict[str, tuple[Decimal, Decimal]] = {
-    "claude-sonnet-4-6": (Decimal("3"), Decimal("15")),
+    "claude-sonnet-4-6": (Decimal("3.10"), Decimal("15.50")),
     "claude-opus-4-7": (Decimal("15"), Decimal("75")),
     "claude-haiku-4-5-20251001": (Decimal("1"), Decimal("5")),
+    # OpenRouter model slugs
+    "anthropic/claude-sonnet-4.5": (Decimal("3.10"), Decimal("15.50")),
+    "anthropic/claude-sonnet-4.6": (Decimal("3.10"), Decimal("15.50")),
 }
 
 # ---------------------------------------------------------------------------
@@ -138,97 +141,99 @@ Now read the transcript provided in the user message and call `submit_analysis`.
 """.strip()
 
 # ---------------------------------------------------------------------------
-# Tool definition  (JSON Schema mirroring MeetKevinAnalysis)
+# Tool definition  (OpenAI function-calling format)
 # ---------------------------------------------------------------------------
 
-_ANALYSIS_TOOL: dict[str, Any] = {
-    "name": "submit_analysis",
-    "description": (
-        "Submit a structured analysis of a Meet Kevin video transcript. "
-        "Call this exactly once with all fields filled in."
-    ),
-    "input_schema": {
-        "type": "object",
-        "required": [
-            "market_outlook_direction",
-            "market_outlook_reasoning",
-            "macro_themes",
-            "key_risks",
-            "summary",
-            "tickers",
-        ],
-        "properties": {
-            "market_outlook_direction": {
-                "type": "string",
-                "enum": ["bullish", "neutral", "bearish", "mixed"],
-                "description": "Overall market sentiment direction",
-            },
-            "market_outlook_reasoning": {
-                "type": "string",
-                "description": "2-4 sentence explanation of the market outlook direction",
-            },
-            "macro_themes": {
-                "type": "array",
-                "items": {"type": "string"},
-                "description": "2-6 high-level macro economic themes discussed",
-            },
-            "key_risks": {
-                "type": "array",
-                "items": {"type": "string"},
-                "description": "2-5 principal downside risks Kevin mentions",
-            },
-            "summary": {
-                "type": "string",
-                "description": "~200-word plain-English investment thesis summary",
-            },
-            "tickers": {
-                "type": "array",
-                "description": "Per-ticker mentions with action and conviction",
-                "items": {
-                    "type": "object",
-                    "required": [
-                        "symbol",
-                        "action",
-                        "conviction",
-                        "time_horizon",
-                        "rationale_quote",
-                        "video_timestamp_seconds",
-                    ],
-                    "properties": {
-                        "symbol": {
-                            "type": "string",
-                            "description": "Uppercase ticker symbol (1-6 chars)",
-                        },
-                        "action": {
-                            "type": "string",
-                            "enum": ["buy", "sell", "hold", "watch", "avoid"],
-                            "description": "Recommendation action",
-                        },
-                        "conviction": {
-                            "type": "number",
-                            "minimum": 0.0,
-                            "maximum": 1.0,
-                            "description": "Confidence in recommendation (0.0-1.0)",
-                        },
-                        "time_horizon": {
-                            "type": "string",
-                            "enum": [
-                                "intraday",
-                                "days",
-                                "weeks",
-                                "months",
-                                "long_term",
-                                "unspecified",
-                            ],
-                            "description": "Time horizon for the recommendation",
-                        },
-                        "rationale_quote": {
-                            "type": "string",
-                            "description": "Short verbatim or paraphrased quote from video",
-                        },
-                        "video_timestamp_seconds": {
-                            "type": ["integer", "null"],
-                            "description": "Timestamp in seconds for deep-link target",
+_ANALYSIS_TOOL_OPENAI: dict[str, Any] = {
+    "type": "function",
+    "function": {
+        "name": "submit_analysis",
+        "description": (
+            "Submit the structured analysis of one Meet Kevin video. Call this exactly once."
+        ),
+        "parameters": {
+            "type": "object",
+            "required": [
+                "market_outlook_direction",
+                "market_outlook_reasoning",
+                "macro_themes",
+                "key_risks",
+                "summary",
+                "tickers",
+            ],
+            "properties": {
+                "market_outlook_direction": {
+                    "type": "string",
+                    "enum": ["bullish", "neutral", "bearish", "mixed"],
+                    "description": "Overall market sentiment direction",
+                },
+                "market_outlook_reasoning": {
+                    "type": "string",
+                    "description": "2-4 sentence explanation of the market outlook direction",
+                },
+                "macro_themes": {
+                    "type": "array",
+                    "items": {"type": "string"},
+                    "description": "2-6 high-level macro economic themes discussed",
+                },
+                "key_risks": {
+                    "type": "array",
+                    "items": {"type": "string"},
+                    "description": "2-5 principal downside risks Kevin mentions",
+                },
+                "summary": {
+                    "type": "string",
+                    "description": "~200-word plain-English investment thesis summary",
+                },
+                "tickers": {
+                    "type": "array",
+                    "description": "Per-ticker mentions with action and conviction",
+                    "items": {
+                        "type": "object",
+                        "required": [
+                            "symbol",
+                            "action",
+                            "conviction",
+                            "time_horizon",
+                            "rationale_quote",
+                            "video_timestamp_seconds",
+                        ],
+                        "properties": {
+                            "symbol": {
+                                "type": "string",
+                                "description": "Uppercase ticker symbol (1-6 chars)",
+                            },
+                            "action": {
+                                "type": "string",
+                                "enum": ["buy", "sell", "hold", "watch", "avoid"],
+                                "description": "Recommendation action",
+                            },
+                            "conviction": {
+                                "type": "number",
+                                "minimum": 0.0,
+                                "maximum": 1.0,
+                                "description": "Confidence in recommendation (0.0-1.0)",
+                            },
+                            "time_horizon": {
+                                "type": "string",
+                                "enum": [
+                                    "intraday",
+                                    "days",
+                                    "weeks",
+                                    "months",
+                                    "long_term",
+                                    "unspecified",
+                                ],
+                                "description": "Time horizon for the recommendation",
+                            },
+                            "rationale_quote": {
+                                "type": "string",
+                                "description": "Short verbatim or paraphrased quote from video",
+                            },
+                            "video_timestamp_seconds": {
+                                "type": ["integer", "null"],
+                                "description": "Timestamp in seconds for deep-link target",
+                            },
                         },
                     },
                 },
@@ -291,15 +296,15 @@ _MAX_SEGMENTS = 1000
 
 
 class LlmAnalyzer:
-    """Calls Claude to extract structured analysis from a video transcript.
+    """Calls Claude (via OpenRouter) to extract structured analysis from a video transcript.
 
     Args:
-        client: Configured AsyncAnthropic client.
-        model: Model identifier (e.g. "claude-sonnet-4-6").
+        client: Configured AsyncOpenAI client pointed at OpenRouter.
+        model: Model identifier (e.g. "anthropic/claude-sonnet-4.5").
         prompt_version: Prompt version string stored in kevin_analyses.
     """
 
-    def __init__(self, client: AsyncAnthropic, model: str, prompt_version: str) -> None:
+    def __init__(self, client: AsyncOpenAI, model: str, prompt_version: str) -> None:
         self._client = client
         self._model = model
         self._prompt_version = prompt_version
@@ -313,7 +318,7 @@ class LlmAnalyzer:
         transcript_text: str,
         transcript_segments: list[dict],
     ) -> LlmCallResult:
-        """Run Claude analysis on a transcript and return a structured result.
+        """Run LLM analysis on a transcript and return a structured result.
 
         Args:
             title: Video title.
@@ -326,8 +331,8 @@ class LlmAnalyzer:
             LlmCallResult with parsed MeetKevinAnalysis and token accounting.
 
         Raises:
-            ValueError: If the response contains no tool_use block.
-            pydantic.ValidationError: If tool_use input fails schema validation.
+            ValueError: If the response contains no tool_calls.
+            pydantic.ValidationError: If function arguments fail schema validation.
         """
         user_msg = self._build_user_message(
             title=title,
@@ -337,42 +342,35 @@ class LlmAnalyzer:
             transcript_segments=transcript_segments,
         )
 
-        response = await self._client.messages.create(
+        response = await self._client.chat.completions.create(
             model=self._model,
             max_tokens=4096,
-            system=[
-                {
-                    "type": "text",
-                    "text": SYSTEM_PROMPT,
-                    "cache_control": {"type": "ephemeral"},
-                }
+            messages=[
+                {"role": "system", "content": SYSTEM_PROMPT},
+                {"role": "user", "content": user_msg},
             ],
-            tools=[_ANALYSIS_TOOL],
-            tool_choice={"type": "tool", "name": "submit_analysis"},
-            messages=[{"role": "user", "content": user_msg}],
+            tools=[_ANALYSIS_TOOL_OPENAI],
+            tool_choice={"type": "function", "function": {"name": "submit_analysis"}},
         )
 
-        # Find the first tool_use block
-        tool_block = next(
-            (b for b in response.content if b.type == "tool_use"),
-            None,
-        )
-        if tool_block is None:
+        message = response.choices[0].message
+        if not message.tool_calls:
             raise ValueError(
-                f"Claude response contained no tool_use block "
-                f"(stop_reason={response.stop_reason!r})"
+                "LLM response contained no tool_calls (expected submit_analysis function call)"
             )
 
-        analysis = MeetKevinAnalysis.model_validate(tool_block.input)
+        tool_call = message.tool_calls[0]
+        tool_input = json.loads(tool_call.function.arguments)
+        analysis = MeetKevinAnalysis.model_validate(tool_input)
 
-        prompt_tokens: int = response.usage.input_tokens
-        completion_tokens: int = response.usage.output_tokens
+        prompt_tokens: int = response.usage.prompt_tokens
+        completion_tokens: int = response.usage.completion_tokens
         cost_usd = compute_cost_usd(self._model, prompt_tokens, completion_tokens)
 
         raw_response: dict = {
-            "stop_reason": response.stop_reason,
-            "tool_name": tool_block.name,
-            "tool_input": tool_block.input,
+            "finish_reason": response.choices[0].finish_reason,
+            "tool_name": tool_call.function.name,
+            "tool_input": tool_input,
             "usage": {
                 "input_tokens": prompt_tokens,
                 "output_tokens": completion_tokens,
diff --git a/services/meet_kevin_watcher/main.py b/services/meet_kevin_watcher/main.py
index 40f151e..b79ff8e 100644
--- a/services/meet_kevin_watcher/main.py
+++ b/services/meet_kevin_watcher/main.py
@@ -16,7 +16,7 @@ from datetime import timezone
 from decimal import Decimal
 
 import httpx
-from anthropic import AsyncAnthropic
+from openai import AsyncOpenAI
 from sqlalchemy import select
 from sqlalchemy.dialects.postgresql import insert as pg_insert
 
@@ -179,10 +179,17 @@ async def run() -> None:
     # Database
     engine, session_factory = create_db(config)
 
-    # Anthropic client + LLM analyzer
-    anthropic = AsyncAnthropic(api_key=config.anthropic_api_key)
+    # OpenRouter client + LLM analyzer
+    client = AsyncOpenAI(
+        api_key=config.openrouter_api_key,
+        base_url="https://openrouter.ai/api/v1",
+        default_headers={
+            "HTTP-Referer": "https://trading.viktorbarzin.me",
+            "X-Title": "trading-bot meet-kevin",
+        },
+    )
     analyzer = LlmAnalyzer(
-        client=anthropic,
+        client=client,
         model=config.meet_kevin_llm_model,
         prompt_version=config.meet_kevin_prompt_version,
     )
@@ -241,7 +248,7 @@ async def run() -> None:
                 except asyncio.TimeoutError:
                     pass  # Normal timeout — loop again
     finally:
-        await anthropic.close()
+        await client.close()
         await engine.dispose()
         logger.info("meet-kevin-watcher stopped gracefully")
 
diff --git a/tests/services/meet_kevin_watcher/test_llm_analyzer.py b/tests/services/meet_kevin_watcher/test_llm_analyzer.py
index a576aa4..ccb90a2 100644
--- a/tests/services/meet_kevin_watcher/test_llm_analyzer.py
+++ b/tests/services/meet_kevin_watcher/test_llm_analyzer.py
@@ -1,8 +1,9 @@
-"""Tests for the Claude LLM analyzer (Task 7).
+"""Tests for the OpenRouter LLM analyzer (Task 7).
 
 Tests use MagicMock/AsyncMock to avoid real API calls.
 """
 
+import json
 from datetime import datetime, timezone
 from decimal import Decimal
 from unittest.mock import AsyncMock, MagicMock
@@ -27,17 +28,23 @@ from shared.schemas.meet_kevin import (
 # Test helpers
 # ---------------------------------------------------------------------------
 
-def _make_anthropic_response(tool_input, in_tokens=5000, out_tokens=800):
-    """Build a minimal mock of an Anthropic messages.create response."""
-    block = MagicMock()
-    block.type = "tool_use"
-    block.name = "submit_analysis"
-    block.input = tool_input
+def _make_openai_response(tool_args: dict, in_tokens: int = 5000, out_tokens: int = 800):
+    """Mock an OpenAI ChatCompletion response with one tool_call."""
+    tool_call = MagicMock()
+    tool_call.function = MagicMock()
+    tool_call.function.name = "submit_analysis"
+    tool_call.function.arguments = json.dumps(tool_args)
+
+    msg = MagicMock()
+    msg.tool_calls = [tool_call]
+
+    choice = MagicMock()
+    choice.message = msg
+    choice.finish_reason = "tool_calls"
 
     resp = MagicMock()
-    resp.content = [block]
-    resp.usage = MagicMock(input_tokens=in_tokens, output_tokens=out_tokens)
-    resp.stop_reason = "tool_use"
+    resp.choices = [choice]
+    resp.usage = MagicMock(prompt_tokens=in_tokens, completion_tokens=out_tokens)
     return resp
 
 
@@ -63,13 +70,15 @@ def _valid_analysis_input() -> dict:
 
 
 def _make_client(response=None):
-    """Return a mocked AsyncAnthropic client with messages.create wired up."""
+    """Return a mocked AsyncOpenAI client with chat.completions.create wired up."""
     mock_create = AsyncMock(return_value=response)
-    mock_messages = MagicMock()
-    mock_messages.create = mock_create
+    mock_completions = MagicMock()
+    mock_completions.create = mock_create
+    mock_chat = MagicMock()
+    mock_chat.completions = mock_completions
 
     client = MagicMock()
-    client.messages = mock_messages
+    client.chat = mock_chat
     return client, mock_create
 
 
@@ -81,11 +90,16 @@ def _make_client(response=None):
 class TestComputeCostUsd:
     """Verify monetary cost calculations using Decimal arithmetic."""
 
-    def test_sonnet_46_pricing(self):
-        """claude-sonnet-4-6: $3/M input + $15/M output."""
-        # 1M input + 1M output = $3 + $15 = $18
+    def test_sonnet_45_openrouter_pricing(self):
+        """anthropic/claude-sonnet-4.5: $3.10/M input + $15.50/M output."""
+        # 1M input + 1M output = $3.10 + $15.50 = $18.60
+        result = compute_cost_usd("anthropic/claude-sonnet-4.5", 1_000_000, 1_000_000)
+        assert result == Decimal("18.6000")
+
+    def test_sonnet_46_legacy_slug(self):
+        """claude-sonnet-4-6 (legacy slug) is also priced at $3.10/$15.50."""
         result = compute_cost_usd("claude-sonnet-4-6", 1_000_000, 1_000_000)
-        assert result == Decimal("18.0000")
+        assert result == Decimal("18.6000")
 
     def test_opus_47_pricing(self):
         """claude-opus-4-7: $15/M input + $75/M output."""
@@ -104,21 +118,21 @@ class TestComputeCostUsd:
 
     def test_zero_tokens(self):
         """Zero tokens produce zero cost."""
-        result = compute_cost_usd("claude-sonnet-4-6", 0, 0)
+        result = compute_cost_usd("anthropic/claude-sonnet-4.5", 0, 0)
         assert result == Decimal("0")
 
     def test_result_is_decimal(self):
         """Return type is always Decimal, not float."""
-        result = compute_cost_usd("claude-sonnet-4-6", 5000, 800)
+        result = compute_cost_usd("anthropic/claude-sonnet-4.5", 5000, 800)
         assert isinstance(result, Decimal)
 
     def test_small_realistic_call(self):
-        """Realistic 10K input + 1K output token call (Sonnet 4.6)."""
-        # input: 10000/1_000_000 * 3 = 0.03000
-        # output: 1000/1_000_000 * 15 = 0.01500
-        # total: 0.04500
-        result = compute_cost_usd("claude-sonnet-4-6", 10_000, 1_000)
-        assert result == Decimal("0.0450")
+        """Realistic 10K input + 1K output token call (Sonnet 4.5 via OpenRouter)."""
+        # input:  10000/1_000_000 * 3.10 = 0.03100
+        # output:  1000/1_000_000 * 15.50 = 0.01550
+        # total: 0.04650
+        result = compute_cost_usd("anthropic/claude-sonnet-4.5", 10_000, 1_000)
+        assert result == Decimal("0.0465")
 
 
 # ---------------------------------------------------------------------------
@@ -164,7 +178,7 @@ class TestLlmCallResult:
         analysis = MeetKevinAnalysis(**_valid_analysis_input())
         result = LlmCallResult(
             analysis=analysis,
-            raw_response={"stop_reason": "tool_use"},
+            raw_response={"finish_reason": "tool_calls"},
             prompt_tokens=5000,
             completion_tokens=800,
             cost_usd=Decimal("0.027"),
@@ -178,13 +192,13 @@ class TestLlmCallResult:
         cost = Decimal("0.027")
         result = LlmCallResult(
             analysis=analysis,
-            raw_response={"stop_reason": "tool_use"},
+            raw_response={"finish_reason": "tool_calls"},
             prompt_tokens=5000,
             completion_tokens=800,
             cost_usd=cost,
         )
         assert result.analysis is analysis
-        assert result.raw_response == {"stop_reason": "tool_use"}
+        assert result.raw_response == {"finish_reason": "tool_calls"}
         assert result.prompt_tokens == 5000
         assert result.completion_tokens == 800
         assert result.cost_usd == cost
@@ -202,10 +216,10 @@ class TestLlmAnalyzerHappyPath:
     async def test_returns_llm_call_result(self):
         """analyze() returns an LlmCallResult with parsed MeetKevinAnalysis."""
         tool_input = _valid_analysis_input()
-        resp = _make_anthropic_response(tool_input, in_tokens=5000, out_tokens=800)
+        resp = _make_openai_response(tool_input, in_tokens=5000, out_tokens=800)
         client, mock_create = _make_client(resp)
 
-        analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1")
+        analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1")
         result = await analyzer.analyze(
             title="Market Update",
             description="Kevin covers the latest market trends.",
@@ -226,10 +240,10 @@ class TestLlmAnalyzerHappyPath:
     async def test_analysis_fields_parsed_correctly(self):
         """Parsed MeetKevinAnalysis has correct field values from tool input."""
         tool_input = _valid_analysis_input()
-        resp = _make_anthropic_response(tool_input)
+        resp = _make_openai_response(tool_input)
         client, _ = _make_client(resp)
 
-        analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1")
+        analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1")
         result = await analyzer.analyze(
             title="Test Video",
             description="Description",
@@ -251,10 +265,10 @@ class TestLlmAnalyzerHappyPath:
     @pytest.mark.asyncio
     async def test_cost_usd_is_positive(self):
         """cost_usd is calculated and positive for a valid token count."""
-        resp = _make_anthropic_response(_valid_analysis_input(), in_tokens=10_000, out_tokens=1_000)
+        resp = _make_openai_response(_valid_analysis_input(), in_tokens=10_000, out_tokens=1_000)
         client, _ = _make_client(resp)
 
-        analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1")
+        analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1")
         result = await analyzer.analyze(
             title="Test",
             description="",
@@ -267,11 +281,11 @@ class TestLlmAnalyzerHappyPath:
 
     @pytest.mark.asyncio
     async def test_api_called_with_tool_choice_forcing(self):
-        """messages.create is called with tool_choice forcing submit_analysis."""
-        resp = _make_anthropic_response(_valid_analysis_input())
+        """chat.completions.create is called with tool_choice forcing submit_analysis."""
+        resp = _make_openai_response(_valid_analysis_input())
         client, mock_create = _make_client(resp)
 
-        analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1")
+        analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1")
         await analyzer.analyze(
             title="Test",
             description="",
@@ -282,15 +296,15 @@ class TestLlmAnalyzerHappyPath:
 
         mock_create.assert_called_once()
         kwargs = mock_create.call_args.kwargs
-        assert kwargs["tool_choice"] == {"type": "tool", "name": "submit_analysis"}
+        assert kwargs["tool_choice"] == {"type": "function", "function": {"name": "submit_analysis"}}
 
     @pytest.mark.asyncio
-    async def test_api_called_with_cache_control_on_system(self):
-        """System prompt is passed with cache_control: {type: ephemeral}."""
-        resp = _make_anthropic_response(_valid_analysis_input())
+    async def test_api_called_with_system_prompt_in_messages(self):
+        """System prompt is passed as a system role message in the messages list."""
+        resp = _make_openai_response(_valid_analysis_input())
         client, mock_create = _make_client(resp)
 
-        analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1")
+        analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1")
         await analyzer.analyze(
             title="Test",
             description="",
@@ -300,19 +314,18 @@ class TestLlmAnalyzerHappyPath:
         )
 
         kwargs = mock_create.call_args.kwargs
-        system = kwargs["system"]
-        assert isinstance(system, list)
-        assert len(system) >= 1
-        assert system[0]["type"] == "text"
-        assert system[0]["cache_control"] == {"type": "ephemeral"}
+        messages = kwargs["messages"]
+        assert isinstance(messages, list)
+        assert messages[0]["role"] == "system"
+        assert SYSTEM_PROMPT in messages[0]["content"]
 
     @pytest.mark.asyncio
     async def test_api_called_with_correct_model(self):
-        """messages.create is called with the model passed to LlmAnalyzer."""
-        resp = _make_anthropic_response(_valid_analysis_input())
+        """chat.completions.create is called with the model passed to LlmAnalyzer."""
+        resp = _make_openai_response(_valid_analysis_input())
         client, mock_create = _make_client(resp)
 
-        analyzer = LlmAnalyzer(client=client, model="claude-opus-4-7", prompt_version="v1")
+        analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1")
         await analyzer.analyze(
             title="Test",
             description="",
@@ -322,15 +335,15 @@ class TestLlmAnalyzerHappyPath:
         )
 
         kwargs = mock_create.call_args.kwargs
-        assert kwargs["model"] == "claude-opus-4-7"
+        assert kwargs["model"] == "anthropic/claude-sonnet-4.5"
 
     @pytest.mark.asyncio
     async def test_api_called_with_submit_analysis_tool(self):
-        """Tool definition includes name='submit_analysis'."""
-        resp = _make_anthropic_response(_valid_analysis_input())
+        """Tool definition includes function name 'submit_analysis'."""
+        resp = _make_openai_response(_valid_analysis_input())
         client, mock_create = _make_client(resp)
 
-        analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1")
+        analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1")
         await analyzer.analyze(
             title="Test",
             description="",
@@ -341,15 +354,18 @@ class TestLlmAnalyzerHappyPath:
 
         kwargs = mock_create.call_args.kwargs
         tools = kwargs["tools"]
-        assert any(t.get("name") == "submit_analysis" for t in tools)
+        assert any(
+            t.get("type") == "function" and t.get("function", {}).get("name") == "submit_analysis"
+            for t in tools
+        )
 
     @pytest.mark.asyncio
     async def test_raw_response_is_captured(self):
         """raw_response in LlmCallResult holds serializable dict."""
-        resp = _make_anthropic_response(_valid_analysis_input())
+        resp = _make_openai_response(_valid_analysis_input())
         client, _ = _make_client(resp)
 
-        analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1")
+        analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1")
         result = await analyzer.analyze(
             title="Test",
             description="",
@@ -363,7 +379,7 @@ class TestLlmAnalyzerHappyPath:
     @pytest.mark.asyncio
     async def test_transcript_segments_included_in_user_message(self):
         """User message contains timestamped segment lines from transcript_segments."""
-        resp = _make_anthropic_response(_valid_analysis_input())
+        resp = _make_openai_response(_valid_analysis_input())
         client, mock_create = _make_client(resp)
 
         segments = [
@@ -371,7 +387,7 @@ class TestLlmAnalyzerHappyPath:
             {"start": 5.0, "end": 10.0, "text": "Let's talk stocks."},
         ]
 
-        analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1")
+        analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1")
         await analyzer.analyze(
             title="Test",
             description="",
@@ -381,8 +397,8 @@ class TestLlmAnalyzerHappyPath:
         )
 
         kwargs = mock_create.call_args.kwargs
-        user_content = kwargs["messages"][0]["content"]
-        # The user message should contain the segment text
+        # user message is the second entry in messages list
+        user_content = kwargs["messages"][1]["content"]
         assert "Hello world." in user_content
         assert "Let's talk stocks." in user_content
 
@@ -396,22 +412,23 @@ class TestLlmAnalyzerFailurePaths:
     """Failure path tests."""
 
     @pytest.mark.asyncio
-    async def test_no_tool_use_block_raises_value_error(self):
-        """If response has no tool_use block, raises ValueError mentioning tool_use."""
-        # Response with a text block instead of tool_use
-        text_block = MagicMock()
-        text_block.type = "text"
-        text_block.text = "Here is my analysis..."
+    async def test_no_tool_calls_raises_value_error(self):
+        """If response message has no tool_calls, raises ValueError."""
+        msg = MagicMock()
+        msg.tool_calls = None
+
+        choice = MagicMock()
+        choice.message = msg
+        choice.finish_reason = "stop"
 
         resp = MagicMock()
-        resp.content = [text_block]
-        resp.usage = MagicMock(input_tokens=5000, output_tokens=800)
-        resp.stop_reason = "end_turn"
+        resp.choices = [choice]
+        resp.usage = MagicMock(prompt_tokens=5000, completion_tokens=800)
 
         client, _ = _make_client(resp)
 
-        analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1")
-        with pytest.raises(ValueError, match="tool_use"):
+        analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1")
+        with pytest.raises(ValueError):
             await analyzer.analyze(
                 title="Test",
                 description="",
@@ -421,16 +438,22 @@ class TestLlmAnalyzerFailurePaths:
             )
 
     @pytest.mark.asyncio
-    async def test_empty_content_raises_value_error(self):
-        """If response content is empty, raises ValueError."""
+    async def test_empty_tool_calls_raises_value_error(self):
+        """If response message has empty tool_calls list, raises ValueError."""
+        msg = MagicMock()
+        msg.tool_calls = []
+
+        choice = MagicMock()
+        choice.message = msg
+        choice.finish_reason = "stop"
+
         resp = MagicMock()
-        resp.content = []
-        resp.usage = MagicMock(input_tokens=5000, output_tokens=800)
-        resp.stop_reason = "tool_use"
+        resp.choices = [choice]
+        resp.usage = MagicMock(prompt_tokens=5000, completion_tokens=800)
 
         client, _ = _make_client(resp)
 
-        analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1")
+        analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1")
         with pytest.raises(ValueError):
             await analyzer.analyze(
                 title="Test",
@@ -446,10 +469,10 @@ class TestLlmAnalyzerFailurePaths:
         bad_input = _valid_analysis_input()
         bad_input["market_outlook_direction"] = "extremely_bullish"  # not a valid enum
 
-        resp = _make_anthropic_response(bad_input)
+        resp = _make_openai_response(bad_input)
         client, _ = _make_client(resp)
 
-        analyzer = LlmAnalyzer(client=client, model="claude-sonnet-4-6", prompt_version="v1")
+        analyzer = LlmAnalyzer(client=client, model="anthropic/claude-sonnet-4.5", prompt_version="v1")
         with pytest.raises(Exception):  # pydantic ValidationError or ValueError
             await analyzer.analyze(
                 title="Test",