102 lines
3.6 KiB
Python
102 lines
3.6 KiB
Python
"""Ollama-based sentiment analyzer (LLM fallback).
|
|
|
|
Used when the FinBERT model's confidence is below the configured threshold.
|
|
Sends a structured prompt to a local Ollama instance and parses JSON output.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import logging
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
_SYSTEM_PROMPT = (
|
|
"You are a financial sentiment analysis assistant. "
|
|
"You will be given a news article title and content. "
|
|
"Analyze the sentiment and respond with ONLY valid JSON in this exact format:\n"
|
|
'{"sentiment_score": <float between -1.0 and 1.0>, '
|
|
'"confidence": <float between 0.0 and 1.0>, '
|
|
'"entities": [<list of mentioned company/ticker names>]}\n'
|
|
"Where sentiment_score: -1.0 = very negative, 0.0 = neutral, 1.0 = very positive.\n"
|
|
"Respond with ONLY the JSON object, no other text."
|
|
)
|
|
|
|
|
|
class OllamaAnalyzer:
|
|
"""Fallback sentiment analyzer using a local Ollama LLM."""
|
|
|
|
def __init__(self, model: str = "mistral", host: str = "http://localhost:11434") -> None:
|
|
self.model = model
|
|
self.host = host
|
|
self._client: object | None = None
|
|
|
|
def _get_client(self) -> object:
|
|
"""Lazily create the Ollama async client."""
|
|
if self._client is None:
|
|
import ollama # type: ignore[import-untyped]
|
|
|
|
self._client = ollama.AsyncClient(host=self.host)
|
|
return self._client
|
|
|
|
async def analyze(self, title: str, content: str) -> tuple[float, float]:
|
|
"""Analyze sentiment using the Ollama LLM.
|
|
|
|
Parameters
|
|
----------
|
|
title:
|
|
Article headline.
|
|
content:
|
|
Article body text.
|
|
|
|
Returns
|
|
-------
|
|
tuple[float, float]
|
|
``(sentiment_score, confidence)``. On any parse error or
|
|
communication failure, returns ``(0.0, 0.0)`` as a safe fallback.
|
|
"""
|
|
user_prompt = f"Title: {title}\n\nContent: {content}"
|
|
|
|
try:
|
|
client = self._get_client()
|
|
response = await client.chat( # type: ignore[union-attr]
|
|
model=self.model,
|
|
messages=[
|
|
{"role": "system", "content": _SYSTEM_PROMPT},
|
|
{"role": "user", "content": user_prompt},
|
|
],
|
|
)
|
|
raw_text: str = response["message"]["content"] # type: ignore[index]
|
|
return self._parse_response(raw_text)
|
|
except Exception:
|
|
logger.exception("Ollama analysis failed")
|
|
return 0.0, 0.0
|
|
|
|
@staticmethod
|
|
def _parse_response(raw_text: str) -> tuple[float, float]:
|
|
"""Extract sentiment_score and confidence from LLM JSON output.
|
|
|
|
Robust against markdown code fences and leading/trailing whitespace.
|
|
Returns ``(0.0, 0.0)`` on any parsing failure.
|
|
"""
|
|
try:
|
|
# Strip potential markdown code fences.
|
|
text = raw_text.strip()
|
|
if text.startswith("```"):
|
|
# Remove ```json ... ``` wrapper
|
|
lines = text.split("\n")
|
|
lines = [ln for ln in lines if not ln.strip().startswith("```")]
|
|
text = "\n".join(lines).strip()
|
|
|
|
data = json.loads(text)
|
|
score = float(data["sentiment_score"])
|
|
confidence = float(data["confidence"])
|
|
|
|
# Clamp to valid ranges.
|
|
score = max(-1.0, min(1.0, score))
|
|
confidence = max(0.0, min(1.0, confidence))
|
|
|
|
return score, confidence
|
|
except (json.JSONDecodeError, KeyError, TypeError, ValueError):
|
|
logger.warning("Failed to parse Ollama response: %s", raw_text[:200])
|
|
return 0.0, 0.0
|