feat: sentiment analyzer — FinBERT + Ollama tiered analysis

2026-02-22 15:27:06 +00:00 · 2026-02-22 15:27:06 +00:00 · 6952a829ae
commit 6952a829ae
parent 9f46071502
11 changed files with 976 additions and 1 deletions
--- a/services/sentiment_analyzer/analyzers/ollama_analyzer.py
+++ b/services/sentiment_analyzer/analyzers/ollama_analyzer.py
@ -0,0 +1,102 @@
+"""Ollama-based sentiment analyzer (LLM fallback).
+
+Used when the FinBERT model's confidence is below the configured threshold.
+Sends a structured prompt to a local Ollama instance and parses JSON output.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+
+logger = logging.getLogger(__name__)
+
+_SYSTEM_PROMPT = (
+    "You are a financial sentiment analysis assistant. "
+    "You will be given a news article title and content. "
+    "Analyze the sentiment and respond with ONLY valid JSON in this exact format:\n"
+    '{"sentiment_score": <float between -1.0 and 1.0>, '
+    '"confidence": <float between 0.0 and 1.0>, '
+    '"entities": [<list of mentioned company/ticker names>]}\n'
+    "Where sentiment_score: -1.0 = very negative, 0.0 = neutral, 1.0 = very positive.\n"
+    "Respond with ONLY the JSON object, no other text."
+)
+
+
+class OllamaAnalyzer:
+    """Fallback sentiment analyzer using a local Ollama LLM."""
+
+    def __init__(self, model: str = "mistral", host: str = "http://localhost:11434") -> None:
+        self.model = model
+        self.host = host
+        self._client: object | None = None
+
+    def _get_client(self) -> object:
+        """Lazily create the Ollama async client."""
+        if self._client is None:
+            import ollama  # type: ignore[import-untyped]
+
+            self._client = ollama.AsyncClient(host=self.host)
+        return self._client
+
+    async def analyze(self, title: str, content: str) -> tuple[float, float]:
+        """Analyze sentiment using the Ollama LLM.
+
+        Parameters
+        ----------
+        title:
+            Article headline.
+        content:
+            Article body text.
+
+        Returns
+        -------
+        tuple[float, float]
+            ``(sentiment_score, confidence)``.  On any parse error or
+            communication failure, returns ``(0.0, 0.0)`` as a safe fallback.
+        """
+        user_prompt = f"Title: {title}\n\nContent: {content}"
+
+        try:
+            client = self._get_client()
+            response = await client.chat(  # type: ignore[union-attr]
+                model=self.model,
+                messages=[
+                    {"role": "system", "content": _SYSTEM_PROMPT},
+                    {"role": "user", "content": user_prompt},
+                ],
+            )
+            raw_text: str = response["message"]["content"]  # type: ignore[index]
+            return self._parse_response(raw_text)
+        except Exception:
+            logger.exception("Ollama analysis failed")
+            return 0.0, 0.0
+
+    @staticmethod
+    def _parse_response(raw_text: str) -> tuple[float, float]:
+        """Extract sentiment_score and confidence from LLM JSON output.
+
+        Robust against markdown code fences and leading/trailing whitespace.
+        Returns ``(0.0, 0.0)`` on any parsing failure.
+        """
+        try:
+            # Strip potential markdown code fences.
+            text = raw_text.strip()
+            if text.startswith("```"):
+                # Remove ```json ... ``` wrapper
+                lines = text.split("\n")
+                lines = [ln for ln in lines if not ln.strip().startswith("```")]
+                text = "\n".join(lines).strip()
+
+            data = json.loads(text)
+            score = float(data["sentiment_score"])
+            confidence = float(data["confidence"])
+
+            # Clamp to valid ranges.
+            score = max(-1.0, min(1.0, score))
+            confidence = max(0.0, min(1.0, confidence))
+
+            return score, confidence
+        except (json.JSONDecodeError, KeyError, TypeError, ValueError):
+            logger.warning("Failed to parse Ollama response: %s", raw_text[:200])
+            return 0.0, 0.0