trading/tests/services/test_sentiment_analyzer.py

"""Tests for the sentiment analyzer service.

Covers FinBERT analyzer, Ollama analyzer, ticker extraction, and the main
service flow.
"""

from __future__ import annotations

from datetime import datetime, timezone
from unittest.mock import AsyncMock, MagicMock, patch

import pytest

from services.sentiment_analyzer.analyzers.finbert import FinBERTAnalyzer
from services.sentiment_analyzer.analyzers.ollama_analyzer import OllamaAnalyzer
from services.sentiment_analyzer.config import SentimentAnalyzerConfig
from services.sentiment_analyzer.main import process_article
from services.sentiment_analyzer.ticker_extractor import extract_tickers
from shared.schemas.news import RawArticle


# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------

def _make_raw_article(**overrides) -> RawArticle:
    defaults = {
        "source": "test",
        "url": "https://example.com/article",
        "title": "Test Article About $AAPL",
        "content": "Apple Inc announced strong earnings.",
        "published_at": datetime(2026, 1, 15, tzinfo=timezone.utc),
        "fetched_at": datetime(2026, 1, 15, 0, 5, tzinfo=timezone.utc),
        "content_hash": "abc123",
    }
    defaults.update(overrides)
    return RawArticle(**defaults)


def _make_pipeline_result(label: str, score: float) -> list[list[dict]]:
    """Build a return value matching transformers pipeline(return_all_scores=True)."""
    labels = {"positive": score if label == "positive" else 0.0,
              "negative": score if label == "negative" else 0.0,
              "neutral": score if label == "neutral" else 0.0}
    # Distribute remaining probability
    remaining = 1.0 - score
    other_labels = [l for l in labels if l != label]
    for ol in other_labels:
        labels[ol] = remaining / len(other_labels)

    return [[{"label": l, "score": s} for l, s in labels.items()]]


# ---------------------------------------------------------------------------
# FinBERT Analyzer Tests
# ---------------------------------------------------------------------------

class TestFinBERTAnalyzer:
    """Tests for FinBERTAnalyzer with a mocked transformers pipeline."""

    @pytest.mark.asyncio
    async def test_finbert_positive_sentiment(self):
        """Positive article should yield a positive score."""
        mock_pipe = MagicMock()
        mock_pipe.return_value = _make_pipeline_result("positive", 0.9)

        analyzer = FinBERTAnalyzer(model_name="test-model")
        analyzer._pipeline = mock_pipe

        score, confidence = await analyzer.analyze(
            "Apple beats earnings expectations",
            "Apple reported revenue above analyst estimates.",
        )

        assert score > 0.0, f"Expected positive score, got {score}"
        assert confidence == pytest.approx(0.9, abs=0.01)
        mock_pipe.assert_called_once()

    @pytest.mark.asyncio
    async def test_finbert_negative_sentiment(self):
        """Negative article should yield a negative score."""
        mock_pipe = MagicMock()
        mock_pipe.return_value = _make_pipeline_result("negative", 0.85)

        analyzer = FinBERTAnalyzer(model_name="test-model")
        analyzer._pipeline = mock_pipe

        score, confidence = await analyzer.analyze(
            "Major bank reports massive losses",
            "The bank lost $2 billion in the quarter.",
        )

        assert score < 0.0, f"Expected negative score, got {score}"
        assert confidence == pytest.approx(0.85, abs=0.01)

    @pytest.mark.asyncio
    async def test_finbert_neutral_sentiment(self):
        """Neutral article should yield a near-zero score."""
        mock_pipe = MagicMock()
        mock_pipe.return_value = _make_pipeline_result("neutral", 0.8)

        analyzer = FinBERTAnalyzer(model_name="test-model")
        analyzer._pipeline = mock_pipe

        score, confidence = await analyzer.analyze(
            "Company releases quarterly report",
            "The quarterly report was filed with the SEC.",
        )

        # Neutral dominant => score close to zero (neutral maps to 0).
        # The small residual comes from the remaining probability split
        # between positive and negative.
        assert abs(score) < 0.2, f"Expected near-zero score, got {score}"
        assert confidence == pytest.approx(0.8, abs=0.01)


# ---------------------------------------------------------------------------
# Ollama Analyzer Tests
# ---------------------------------------------------------------------------

class TestOllamaAnalyzer:
    """Tests for OllamaAnalyzer with a mocked ollama client."""

    @pytest.mark.asyncio
    async def test_ollama_successful_analysis(self):
        """Valid JSON response should be parsed correctly."""
        mock_client = AsyncMock()
        mock_client.chat.return_value = {
            "message": {
                "content": '{"sentiment_score": 0.75, "confidence": 0.85, "entities": ["AAPL"]}'
            }
        }

        analyzer = OllamaAnalyzer(model="test-model")
        analyzer._client = mock_client

        score, confidence = await analyzer.analyze("Good news for Apple", "Apple stock surges.")

        assert score == pytest.approx(0.75)
        assert confidence == pytest.approx(0.85)

    @pytest.mark.asyncio
    async def test_ollama_parse_error_returns_zero(self):
        """Invalid JSON should return (0.0, 0.0) fallback."""
        mock_client = AsyncMock()
        mock_client.chat.return_value = {
            "message": {"content": "I think the sentiment is positive but I'm not sure."}
        }

        analyzer = OllamaAnalyzer(model="test-model")
        analyzer._client = mock_client

        score, confidence = await analyzer.analyze("Some headline", "Some content")

        assert score == 0.0
        assert confidence == 0.0

    @pytest.mark.asyncio
    async def test_ollama_connection_error_returns_zero(self):
        """Network/connection errors should return (0.0, 0.0) fallback."""
        mock_client = AsyncMock()
        mock_client.chat.side_effect = ConnectionError("Cannot reach Ollama")

        analyzer = OllamaAnalyzer(model="test-model")
        analyzer._client = mock_client

        score, confidence = await analyzer.analyze("Some headline", "Some content")

        assert score == 0.0
        assert confidence == 0.0

    @pytest.mark.asyncio
    async def test_ollama_markdown_code_fence(self):
        """JSON wrapped in markdown code fences should still be parsed."""
        mock_client = AsyncMock()
        mock_client.chat.return_value = {
            "message": {
                "content": '```json\n{"sentiment_score": -0.5, "confidence": 0.7, "entities": []}\n```'
            }
        }

        analyzer = OllamaAnalyzer(model="test-model")
        analyzer._client = mock_client

        score, confidence = await analyzer.analyze("Bad news", "Markets tumble.")

        assert score == pytest.approx(-0.5)
        assert confidence == pytest.approx(0.7)


# ---------------------------------------------------------------------------
# Ticker Extraction Tests
# ---------------------------------------------------------------------------

class TestTickerExtraction:
    """Tests for the ticker extraction utility."""

    def test_ticker_extraction_dollar_sign(self):
        """$AAPL should extract AAPL."""
        tickers = extract_tickers("Big news for $AAPL today.")
        assert "AAPL" in tickers

    def test_ticker_extraction_exchange_prefix(self):
        """NASDAQ:TSLA should extract TSLA."""
        tickers = extract_tickers("Check out NASDAQ:TSLA performance.")
        assert "TSLA" in tickers

    def test_ticker_extraction_nyse_prefix(self):
        """NYSE:AAPL should extract AAPL."""
        tickers = extract_tickers("NYSE:AAPL is trading higher.")
        assert "AAPL" in tickers

    def test_ticker_extraction_filters_false_positives(self):
        """Common words like CEO, IPO, ETF, SEC, NYSE should be filtered."""
        tickers = extract_tickers(
            "The CEO announced a new IPO. The ETF was approved by the SEC on NYSE."
        )
        assert "CEO" not in tickers
        assert "IPO" not in tickers
        assert "ETF" not in tickers
        assert "SEC" not in tickers
        assert "NYSE" not in tickers

    def test_ticker_extraction_deduplicates(self):
        """Repeated mentions of the same ticker should appear only once."""
        tickers = extract_tickers("$AAPL rose 5%. $AAPL is now above $200. NASDAQ:AAPL is great.")
        assert tickers.count("AAPL") == 1

    def test_ticker_extraction_multiple_tickers(self):
        """Multiple different tickers should all be extracted."""
        tickers = extract_tickers("$AAPL and $MSFT both reported earnings. $GOOG is next.")
        assert "AAPL" in tickers
        assert "MSFT" in tickers
        assert "GOOG" in tickers

    def test_ticker_extraction_empty_text(self):
        """Empty text should return no tickers."""
        assert extract_tickers("") == []

    def test_ticker_extraction_no_tickers(self):
        """Text with no ticker-like patterns should return empty list."""
        tickers = extract_tickers("The market was flat today with no major movers.")
        # Should be empty — all uppercase words are filtered as false positives or too short.
        assert len(tickers) == 0


# ---------------------------------------------------------------------------
# Ollama Fallback Routing Test
# ---------------------------------------------------------------------------

class TestFallbackRouting:
    """Test that Ollama is called when FinBERT confidence is below threshold."""

    @pytest.mark.asyncio
    async def test_ollama_fallback_on_low_confidence(self):
        """When FinBERT confidence < threshold, Ollama should be called."""
        # FinBERT returns low confidence
        finbert = AsyncMock(spec=FinBERTAnalyzer)
        finbert.analyze = AsyncMock(return_value=(0.1, 0.4))  # confidence 0.4 < 0.6 threshold

        # Ollama returns higher confidence
        ollama = AsyncMock(spec=OllamaAnalyzer)
        ollama.analyze = AsyncMock(return_value=(0.8, 0.9))

        publisher = AsyncMock()
        publisher.publish = AsyncMock(return_value=b"1-0")

        config = SentimentAnalyzerConfig(
            finbert_confidence_threshold=0.6,
            otel_metrics_port=0,
        )

        # Mock counters
        counters = {
            "articles_scored": MagicMock(),
            "finbert_count": MagicMock(),
            "ollama_count": MagicMock(),
            "inference_latency": MagicMock(),
        }

        article = _make_raw_article(title="Test $AAPL Article", content="Apple stock rises.")

        await process_article(article, finbert, ollama, publisher, config, counters)

        # Both should have been called
        finbert.analyze.assert_called_once()
        ollama.analyze.assert_called_once()
        counters["finbert_count"].add.assert_called_once_with(1)
        counters["ollama_count"].add.assert_called_once_with(1)

    @pytest.mark.asyncio
    async def test_no_ollama_on_high_confidence(self):
        """When FinBERT confidence >= threshold, Ollama should NOT be called."""
        finbert = AsyncMock(spec=FinBERTAnalyzer)
        finbert.analyze = AsyncMock(return_value=(0.8, 0.9))  # confidence 0.9 >= 0.6

        ollama = AsyncMock(spec=OllamaAnalyzer)
        ollama.analyze = AsyncMock(return_value=(0.5, 0.7))

        publisher = AsyncMock()
        publisher.publish = AsyncMock(return_value=b"1-0")

        config = SentimentAnalyzerConfig(
            finbert_confidence_threshold=0.6,
            otel_metrics_port=0,
        )

        counters = {
            "articles_scored": MagicMock(),
            "finbert_count": MagicMock(),
            "ollama_count": MagicMock(),
            "inference_latency": MagicMock(),
        }

        article = _make_raw_article(title="Test $AAPL Article", content="Apple stock rises.")

        await process_article(article, finbert, ollama, publisher, config, counters)

        finbert.analyze.assert_called_once()
        ollama.analyze.assert_not_called()
        counters["ollama_count"].add.assert_not_called()


# ---------------------------------------------------------------------------
# Main Flow / Integration Test
# ---------------------------------------------------------------------------

class TestMainFlow:
    """Test the full process_article flow with mocked analyzers and Redis."""

    @pytest.mark.asyncio
    async def test_main_flow_publishes_scored_articles(self):
        """process_article should publish a ScoredArticle for each ticker found."""
        finbert = AsyncMock(spec=FinBERTAnalyzer)
        finbert.analyze = AsyncMock(return_value=(0.75, 0.88))

        ollama = AsyncMock(spec=OllamaAnalyzer)

        publisher = AsyncMock()
        publisher.publish = AsyncMock(return_value=b"1-0")

        config = SentimentAnalyzerConfig(
            finbert_confidence_threshold=0.6,
            otel_metrics_port=0,
        )

        counters = {
            "articles_scored": MagicMock(),
            "finbert_count": MagicMock(),
            "ollama_count": MagicMock(),
            "inference_latency": MagicMock(),
        }

        # Article mentions two tickers
        article = _make_raw_article(
            title="$AAPL and $MSFT report strong earnings",
            content="Both Apple and Microsoft beat estimates.",
        )

        await process_article(article, finbert, ollama, publisher, config, counters)

        # Should publish one ScoredArticle per ticker
        assert publisher.publish.call_count == 2
        counters["articles_scored"].add.assert_called_once_with(1)

        # Verify the published data
        calls = publisher.publish.call_args_list
        published_tickers = {call.args[0]["ticker"] for call in calls}
        assert "AAPL" in published_tickers
        assert "MSFT" in published_tickers

        # Each published message should have the correct sentiment score
        for call in calls:
            data = call.args[0]
            assert data["sentiment_score"] == pytest.approx(0.75)
            assert data["confidence"] == pytest.approx(0.88)
            assert data["model_used"] == "finbert"

    @pytest.mark.asyncio
    async def test_main_flow_no_tickers_no_publish(self):
        """Articles with no tickers should not publish anything."""
        finbert = AsyncMock(spec=FinBERTAnalyzer)
        finbert.analyze = AsyncMock(return_value=(0.5, 0.9))

        ollama = AsyncMock(spec=OllamaAnalyzer)

        publisher = AsyncMock()
        publisher.publish = AsyncMock()

        config = SentimentAnalyzerConfig(
            finbert_confidence_threshold=0.6,
            otel_metrics_port=0,
        )

        counters = {
            "articles_scored": MagicMock(),
            "finbert_count": MagicMock(),
            "ollama_count": MagicMock(),
            "inference_latency": MagicMock(),
        }

        article = _make_raw_article(
            title="Market is flat today",
            content="Nothing much happening in the market.",
        )

        await process_article(article, finbert, ollama, publisher, config, counters)

        publisher.publish.assert_not_called()
        # Still counted as scored
        counters["articles_scored"].add.assert_called_once_with(1)