feat: sentiment analyzer — FinBERT + Ollama tiered analysis
This commit is contained in:
parent
9f46071502
commit
6952a829ae
11 changed files with 976 additions and 1 deletions
102
services/sentiment_analyzer/analyzers/ollama_analyzer.py
Normal file
102
services/sentiment_analyzer/analyzers/ollama_analyzer.py
Normal file
|
|
@ -0,0 +1,102 @@
|
|||
"""Ollama-based sentiment analyzer (LLM fallback).
|
||||
|
||||
Used when the FinBERT model's confidence is below the configured threshold.
|
||||
Sends a structured prompt to a local Ollama instance and parses JSON output.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_SYSTEM_PROMPT = (
|
||||
"You are a financial sentiment analysis assistant. "
|
||||
"You will be given a news article title and content. "
|
||||
"Analyze the sentiment and respond with ONLY valid JSON in this exact format:\n"
|
||||
'{"sentiment_score": <float between -1.0 and 1.0>, '
|
||||
'"confidence": <float between 0.0 and 1.0>, '
|
||||
'"entities": [<list of mentioned company/ticker names>]}\n'
|
||||
"Where sentiment_score: -1.0 = very negative, 0.0 = neutral, 1.0 = very positive.\n"
|
||||
"Respond with ONLY the JSON object, no other text."
|
||||
)
|
||||
|
||||
|
||||
class OllamaAnalyzer:
|
||||
"""Fallback sentiment analyzer using a local Ollama LLM."""
|
||||
|
||||
def __init__(self, model: str = "mistral", host: str = "http://localhost:11434") -> None:
|
||||
self.model = model
|
||||
self.host = host
|
||||
self._client: object | None = None
|
||||
|
||||
def _get_client(self) -> object:
|
||||
"""Lazily create the Ollama async client."""
|
||||
if self._client is None:
|
||||
import ollama # type: ignore[import-untyped]
|
||||
|
||||
self._client = ollama.AsyncClient(host=self.host)
|
||||
return self._client
|
||||
|
||||
async def analyze(self, title: str, content: str) -> tuple[float, float]:
|
||||
"""Analyze sentiment using the Ollama LLM.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
title:
|
||||
Article headline.
|
||||
content:
|
||||
Article body text.
|
||||
|
||||
Returns
|
||||
-------
|
||||
tuple[float, float]
|
||||
``(sentiment_score, confidence)``. On any parse error or
|
||||
communication failure, returns ``(0.0, 0.0)`` as a safe fallback.
|
||||
"""
|
||||
user_prompt = f"Title: {title}\n\nContent: {content}"
|
||||
|
||||
try:
|
||||
client = self._get_client()
|
||||
response = await client.chat( # type: ignore[union-attr]
|
||||
model=self.model,
|
||||
messages=[
|
||||
{"role": "system", "content": _SYSTEM_PROMPT},
|
||||
{"role": "user", "content": user_prompt},
|
||||
],
|
||||
)
|
||||
raw_text: str = response["message"]["content"] # type: ignore[index]
|
||||
return self._parse_response(raw_text)
|
||||
except Exception:
|
||||
logger.exception("Ollama analysis failed")
|
||||
return 0.0, 0.0
|
||||
|
||||
@staticmethod
|
||||
def _parse_response(raw_text: str) -> tuple[float, float]:
|
||||
"""Extract sentiment_score and confidence from LLM JSON output.
|
||||
|
||||
Robust against markdown code fences and leading/trailing whitespace.
|
||||
Returns ``(0.0, 0.0)`` on any parsing failure.
|
||||
"""
|
||||
try:
|
||||
# Strip potential markdown code fences.
|
||||
text = raw_text.strip()
|
||||
if text.startswith("```"):
|
||||
# Remove ```json ... ``` wrapper
|
||||
lines = text.split("\n")
|
||||
lines = [ln for ln in lines if not ln.strip().startswith("```")]
|
||||
text = "\n".join(lines).strip()
|
||||
|
||||
data = json.loads(text)
|
||||
score = float(data["sentiment_score"])
|
||||
confidence = float(data["confidence"])
|
||||
|
||||
# Clamp to valid ranges.
|
||||
score = max(-1.0, min(1.0, score))
|
||||
confidence = max(0.0, min(1.0, confidence))
|
||||
|
||||
return score, confidence
|
||||
except (json.JSONDecodeError, KeyError, TypeError, ValueError):
|
||||
logger.warning("Failed to parse Ollama response: %s", raw_text[:200])
|
||||
return 0.0, 0.0
|
||||
Loading…
Add table
Add a link
Reference in a new issue