"""OpenRouter LLM analyzer for Meet Kevin video transcripts.

Calls Claude Sonnet (via OpenRouter) with function-calling forcing to extract
structured MeetKevinAnalysis from a video transcript.

Public API:
  SYSTEM_PROMPT         — module-level analyst instructions
  compute_cost_usd()    — Decimal-precise cost from token counts
  LlmCallResult         — frozen dataclass returned by analyze()
  LlmAnalyzer           — async class; .analyze() does the API call
"""

import json
import logging
from dataclasses import dataclass
from datetime import datetime
from decimal import Decimal
from typing import Any

from openai import AsyncOpenAI

from shared.schemas.meet_kevin import MeetKevinAnalysis

logger = logging.getLogger(__name__)

# ---------------------------------------------------------------------------
# Pricing table  (USD per 1 000 000 tokens: input, output)
# OpenRouter pass-through pricing (~3% markup over Anthropic list)
# ---------------------------------------------------------------------------

_PRICING: dict[str, tuple[Decimal, Decimal]] = {
    "claude-sonnet-4-6": (Decimal("3.10"), Decimal("15.50")),
    "claude-opus-4-7": (Decimal("15"), Decimal("75")),
    "claude-haiku-4-5-20251001": (Decimal("1"), Decimal("5")),
    # OpenRouter model slugs
    "anthropic/claude-sonnet-4.5": (Decimal("3.10"), Decimal("15.50")),
    "anthropic/claude-sonnet-4.6": (Decimal("3.10"), Decimal("15.50")),
}

# ---------------------------------------------------------------------------
# System prompt
# ---------------------------------------------------------------------------

SYSTEM_PROMPT = """
You are a professional financial analyst specialising in retail investor sentiment.
Your task is to read the full transcript of a Meet Kevin (Kevin Paffrath) YouTube
video and extract a structured investment analysis from it.

## Your mission

Read the transcript carefully and produce a single, precise call to the
`submit_analysis` tool. Do **not** respond with prose — your entire output must be
that one tool call with all required fields filled in correctly.

## What to extract

### Market outlook
Identify the overall market direction Kevin is expressing: bullish, bearish, neutral,
or mixed. Write a concise `market_outlook_reasoning` (2–4 sentences) that explains
*why* you assigned that direction, grounded in specific statements from the video.

### Macro themes
List the 2–6 highest-level economic or policy themes Kevin discusses (e.g.
"Federal Reserve rate path", "AI capex cycle", "commercial real estate stress",
"dollar strength", "energy transition"). These should be phrase-length labels, not
full sentences.

### Key risks
List the 2–5 principal downside risks Kevin flags. Again, short phrase labels, not
paragraphs. Only include risks Kevin explicitly names or clearly implies — do not
invent risks he did not discuss.

### Summary
Write a ~200-word plain-English summary of the video's investment thesis. Focus on
actionable takeaways and any specific catalysts Kevin mentions. Avoid filler phrases
like "In this video Kevin discusses…" — start directly with the insight.

### Per-ticker mentions (tickers field)
Extract every stock, ETF, or crypto ticker that Kevin makes a substantive statement
about. For each one, fill in the following:

- **symbol** — The uppercase ticker symbol (e.g. "NVDA", "SPY", "BTC"). If Kevin
  mentions the company name but not the ticker, infer the ticker from the name (e.g.
  "Nvidia" → "NVDA"). Max 6 characters. Only include tickers you are confident about.

- **action** — The clearest action signal you can infer from what Kevin says. Use
  exactly one of: `buy`, `sell`, `hold`, `watch`, `avoid`. If Kevin expresses
  interest but no clear directional view, use `watch`. If he says he is exiting or
  would not touch it, use `sell` or `avoid` respectively. Do not default to `hold`
  just because you are unsure — skip the ticker instead.

- **conviction** — A float between 0.0 and 1.0 representing how confident Kevin
  sounds. Use 0.8–1.0 for "I'm buying this aggressively / this is my top pick",
  0.5–0.7 for a clear directional view with some hedging, 0.2–0.4 for a tentative
  or heavily-caveated take. A ticker Kevin mentions only in passing (< 20 words of
  commentary) should be **skipped entirely** rather than assigned low conviction.

- **time_horizon** — Pick the closest match from: `intraday`, `days`, `weeks`,
  `months`, `long_term`, `unspecified`. If Kevin does not say, use `unspecified`.

- **rationale_quote** — A short verbatim or lightly paraphrased quote (20–80 words)
  from the transcript that best justifies the action you assigned. Include enough
  context to be meaningful on its own.

- **video_timestamp_seconds** — If the transcript includes segment timestamps (lines
  formatted as `[<N>s] <text>`), set this to the integer second where Kevin first
  makes the substantive statement about this ticker. If no timestamps are available,
  set to null.

## Rules for ticker inclusion

1. **Skip tickers mentioned only in passing.** Kevin often references tickers as
   examples or comparisons without making any recommendation. If he says fewer than
   ~20 words about a ticker with no clear directional signal, omit it from `tickers`.

2. **Do not duplicate tickers.** If Kevin mentions the same ticker multiple times,
   merge the signals into a single entry that represents his overall view from the
   video. Use the timestamp of the *first* substantive mention.

3. **Symbols only, no company names.** The `symbol` field must be a ticker, not a
   company name. "Nvidia" is wrong; "NVDA" is correct.

4. **Conviction scores are comparative.** Calibrate them relative to each other
   within the video — Kevin's "top conviction" pick in a video might be 0.85, while
   a hedged mention is 0.45.

## Quality checklist (review before calling submit_analysis)

- [ ] `market_outlook_direction` is one of: bullish, neutral, bearish, mixed
- [ ] `macro_themes` has 2–6 items, each a concise phrase
- [ ] `key_risks` has 2–5 items, each a concise phrase
- [ ] `summary` is approximately 200 words
- [ ] Every ticker in `tickers` has a clear actionable signal (no "I'm not sure")
- [ ] Tickers mentioned only in passing are omitted
- [ ] `conviction` values are floats in [0.0, 1.0]
- [ ] `time_horizon` is one of the six allowed values
- [ ] `rationale_quote` is grounded in something Kevin actually said
- [ ] You are calling `submit_analysis` exactly once with all required fields

Now read the transcript provided in the user message and call `submit_analysis`.
""".strip()

# ---------------------------------------------------------------------------
# Tool definition  (OpenAI function-calling format)
# ---------------------------------------------------------------------------

_ANALYSIS_TOOL_OPENAI: dict[str, Any] = {
    "type": "function",
    "function": {
        "name": "submit_analysis",
        "description": (
            "Submit the structured analysis of one Meet Kevin video. Call this exactly once."
        ),
        "parameters": {
            "type": "object",
            "required": [
                "market_outlook_direction",
                "market_outlook_reasoning",
                "macro_themes",
                "key_risks",
                "summary",
                "tickers",
            ],
            "properties": {
                "market_outlook_direction": {
                    "type": "string",
                    "enum": ["bullish", "neutral", "bearish", "mixed"],
                    "description": "Overall market sentiment direction",
                },
                "market_outlook_reasoning": {
                    "type": "string",
                    "description": "2-4 sentence explanation of the market outlook direction",
                },
                "macro_themes": {
                    "type": "array",
                    "items": {"type": "string"},
                    "description": "2-6 high-level macro economic themes discussed",
                },
                "key_risks": {
                    "type": "array",
                    "items": {"type": "string"},
                    "description": "2-5 principal downside risks Kevin mentions",
                },
                "summary": {
                    "type": "string",
                    "description": "~200-word plain-English investment thesis summary",
                },
                "tickers": {
                    "type": "array",
                    "description": "Per-ticker mentions with action and conviction",
                    "items": {
                        "type": "object",
                        "required": [
                            "symbol",
                            "action",
                            "conviction",
                            "time_horizon",
                            "rationale_quote",
                            "video_timestamp_seconds",
                        ],
                        "properties": {
                            "symbol": {
                                "type": "string",
                                "description": "Uppercase ticker symbol (1-6 chars)",
                            },
                            "action": {
                                "type": "string",
                                "enum": ["buy", "sell", "hold", "watch", "avoid"],
                                "description": "Recommendation action",
                            },
                            "conviction": {
                                "type": "number",
                                "minimum": 0.0,
                                "maximum": 1.0,
                                "description": "Confidence in recommendation (0.0-1.0)",
                            },
                            "time_horizon": {
                                "type": "string",
                                "enum": [
                                    "intraday",
                                    "days",
                                    "weeks",
                                    "months",
                                    "long_term",
                                    "unspecified",
                                ],
                                "description": "Time horizon for the recommendation",
                            },
                            "rationale_quote": {
                                "type": "string",
                                "description": "Short verbatim or paraphrased quote from video",
                            },
                            "video_timestamp_seconds": {
                                "type": ["integer", "null"],
                                "description": "Timestamp in seconds for deep-link target",
                            },
                        },
                    },
                },
            },
        },
    },
}

# ---------------------------------------------------------------------------
# Public helpers
# ---------------------------------------------------------------------------


def compute_cost_usd(model: str, input_tokens: int, output_tokens: int) -> Decimal:
    """Compute LLM call cost in USD using pinned per-model pricing.

    Args:
        model: Model identifier string (must be a key in _PRICING).
        input_tokens: Number of input/prompt tokens consumed.
        output_tokens: Number of output/completion tokens generated.

    Returns:
        Cost as a Decimal. Returns Decimal("0") for unknown models (logs warning).
    """
    pricing = _PRICING.get(model)
    if pricing is None:
        logger.warning("compute_cost_usd: unknown model %r — returning zero cost", model)
        return Decimal("0")

    price_per_m_input, price_per_m_output = pricing
    million = Decimal("1000000")
    cost = (
        Decimal(input_tokens) / million * price_per_m_input
        + Decimal(output_tokens) / million * price_per_m_output
    )
    return cost.quantize(Decimal("0.0001"))


# ---------------------------------------------------------------------------
# Result dataclass
# ---------------------------------------------------------------------------


@dataclass(frozen=True)
class LlmCallResult:
    """Immutable result of one LLM analyze() call."""

    analysis: MeetKevinAnalysis
    raw_response: dict
    prompt_tokens: int
    completion_tokens: int
    cost_usd: Decimal


# ---------------------------------------------------------------------------
# Analyzer class
# ---------------------------------------------------------------------------

_MAX_SEGMENTS = 1000


class LlmAnalyzer:
    """Calls Claude (via OpenRouter) to extract structured analysis from a video transcript.

    Args:
        client: Configured AsyncOpenAI client pointed at OpenRouter.
        model: Model identifier (e.g. "anthropic/claude-sonnet-4.5").
        prompt_version: Prompt version string stored in kevin_analyses.
    """

    def __init__(self, client: AsyncOpenAI, model: str, prompt_version: str) -> None:
        self._client = client
        self._model = model
        self._prompt_version = prompt_version

    async def analyze(
        self,
        *,
        title: str,
        description: str,
        published_at: datetime,
        transcript_text: str,
        transcript_segments: list[dict],
    ) -> LlmCallResult:
        """Run LLM analysis on a transcript and return a structured result.

        Args:
            title: Video title.
            description: Video description (may be empty).
            published_at: UTC publication timestamp.
            transcript_text: Full concatenated transcript text.
            transcript_segments: List of {start, end, text} dicts.

        Returns:
            LlmCallResult with parsed MeetKevinAnalysis and token accounting.

        Raises:
            ValueError: If the response contains no tool_calls.
            pydantic.ValidationError: If function arguments fail schema validation.
        """
        user_msg = self._build_user_message(
            title=title,
            description=description,
            published_at=published_at,
            transcript_text=transcript_text,
            transcript_segments=transcript_segments,
        )

        response = await self._client.chat.completions.create(
            model=self._model,
            max_tokens=4096,
            messages=[
                {"role": "system", "content": SYSTEM_PROMPT},
                {"role": "user", "content": user_msg},
            ],
            tools=[_ANALYSIS_TOOL_OPENAI],
            tool_choice={"type": "function", "function": {"name": "submit_analysis"}},
        )

        message = response.choices[0].message
        if not message.tool_calls:
            raise ValueError(
                "LLM response contained no tool_calls (expected submit_analysis function call)"
            )

        tool_call = message.tool_calls[0]
        tool_input = json.loads(tool_call.function.arguments)
        analysis = MeetKevinAnalysis.model_validate(tool_input)

        prompt_tokens: int = response.usage.prompt_tokens
        completion_tokens: int = response.usage.completion_tokens
        cost_usd = compute_cost_usd(self._model, prompt_tokens, completion_tokens)

        raw_response: dict = {
            "finish_reason": response.choices[0].finish_reason,
            "tool_name": tool_call.function.name,
            "tool_input": tool_input,
            "usage": {
                "input_tokens": prompt_tokens,
                "output_tokens": completion_tokens,
            },
        }

        return LlmCallResult(
            analysis=analysis,
            raw_response=raw_response,
            prompt_tokens=prompt_tokens,
            completion_tokens=completion_tokens,
            cost_usd=cost_usd,
        )

    # ------------------------------------------------------------------
    # Private helpers
    # ------------------------------------------------------------------

    def _build_user_message(
        self,
        *,
        title: str,
        description: str,
        published_at: datetime,
        transcript_text: str,
        transcript_segments: list[dict],
    ) -> str:
        """Build the user-turn message for the API call."""
        parts: list[str] = [
            f"Title: {title}",
            f"Published: {published_at.strftime('%Y-%m-%d %H:%M UTC')}",
        ]
        if description:
            parts.append(f"Description: {description}")

        parts.append("")  # blank line before transcript

        if transcript_segments:
            # Prefer timestamped segments (up to _MAX_SEGMENTS)
            segment_lines = [
                f"[{int(seg.get('start', 0))}s] {seg.get('text', '').strip()}"
                for seg in transcript_segments[:_MAX_SEGMENTS]
            ]
            parts.append("Transcript (with timestamps):")
            parts.extend(segment_lines)
        elif transcript_text:
            parts.append("Transcript:")
            parts.append(transcript_text)
        else:
            parts.append("Transcript: (no transcript available)")

        return "\n".join(parts)