trading/services/sentiment_analyzer/analyzers/ollama_analyzer.py

"""Ollama-based sentiment analyzer (LLM fallback).

Used when the FinBERT model's confidence is below the configured threshold.
Sends a structured prompt to a local Ollama instance and parses JSON output.
"""

from __future__ import annotations

import json
import logging

logger = logging.getLogger(__name__)

_SYSTEM_PROMPT = (
    "You are a financial sentiment analysis assistant. "
    "You will be given a news article title and content. "
    "Analyze the sentiment and respond with ONLY valid JSON in this exact format:\n"
    '{"sentiment_score": <float between -1.0 and 1.0>, '
    '"confidence": <float between 0.0 and 1.0>, '
    '"entities": [<list of mentioned company/ticker names>]}\n'
    "Where sentiment_score: -1.0 = very negative, 0.0 = neutral, 1.0 = very positive.\n"
    "Respond with ONLY the JSON object, no other text."
)


class OllamaAnalyzer:
    """Fallback sentiment analyzer using a local Ollama LLM."""

    def __init__(self, model: str = "mistral", host: str = "http://localhost:11434") -> None:
        self.model = model
        self.host = host
        self._client: object | None = None

    def _get_client(self) -> object:
        """Lazily create the Ollama async client."""
        if self._client is None:
            import ollama  # type: ignore[import-untyped]

            self._client = ollama.AsyncClient(host=self.host)
        return self._client

    async def analyze(self, title: str, content: str) -> tuple[float, float]:
        """Analyze sentiment using the Ollama LLM.

        Parameters
        ----------
        title:
            Article headline.
        content:
            Article body text.

        Returns
        -------
        tuple[float, float]
            ``(sentiment_score, confidence)``.  On any parse error or
            communication failure, returns ``(0.0, 0.0)`` as a safe fallback.
        """
        user_prompt = f"Title: {title}\n\nContent: {content}"

        try:
            client = self._get_client()
            response = await client.chat(  # type: ignore[union-attr]
                model=self.model,
                messages=[
                    {"role": "system", "content": _SYSTEM_PROMPT},
                    {"role": "user", "content": user_prompt},
                ],
            )
            raw_text: str = response["message"]["content"]  # type: ignore[index]
            return self._parse_response(raw_text)
        except Exception:
            logger.exception("Ollama analysis failed")
            return 0.0, 0.0

    @staticmethod
    def _parse_response(raw_text: str) -> tuple[float, float]:
        """Extract sentiment_score and confidence from LLM JSON output.

        Robust against markdown code fences and leading/trailing whitespace.
        Returns ``(0.0, 0.0)`` on any parsing failure.
        """
        try:
            # Strip potential markdown code fences.
            text = raw_text.strip()
            if text.startswith("```"):
                # Remove ```json ... ``` wrapper
                lines = text.split("\n")
                lines = [ln for ln in lines if not ln.strip().startswith("```")]
                text = "\n".join(lines).strip()

            data = json.loads(text)
            score = float(data["sentiment_score"])
            confidence = float(data["confidence"])

            # Clamp to valid ranges.
            score = max(-1.0, min(1.0, score))
            confidence = max(0.0, min(1.0, confidence))

            return score, confidence
        except (json.JSONDecodeError, KeyError, TypeError, ValueError):
            logger.warning("Failed to parse Ollama response: %s", raw_text[:200])
            return 0.0, 0.0