feat: productionize local service — fix signal pipeline, lower thresholds, add company-name ticker extraction

- Point Ollama to local instance via host.docker.internal, use gemma3 model - Remove Docker Ollama service (using host's Ollama instead) - Add company-name-to-ticker mapping (Apple→AAPL, Tesla→TSLA, etc.) for RSS articles - Lower signal thresholds for faster feedback with paper trading: - FinBERT confidence: 0.6→0.4, signal strength: 0.3→0.15 - News strategy: article_count 2→1, confidence 0.5→0.3, score ±0.3→±0.15 - Fix market data BarSet access bug (BarSet.__contains__ returns False incorrectly) - Fix market data SIP feed error by switching to IEX feed for free Alpaca accounts - Fix nginx proxy routing for /api/auth/* to api-gateway /auth/* - Add seed_sample_data script - Update tests for new thresholds and alpaca mock modules
2026-02-22 22:17:26 +00:00 · 2026-02-22 22:17:26 +00:00 · d36ae40df1
commit d36ae40df1
parent 67e64fab18
18 changed files with 749 additions and 185 deletions
--- a/services/sentiment_analyzer/ticker_extractor.py
+++ b/services/sentiment_analyzer/ticker_extractor.py
@ -4,6 +4,7 @@ Handles common formats:
  - Dollar-prefixed: ``$AAPL``
  - Exchange-prefixed: ``NASDAQ:AAPL``, ``NYSE:TSLA``
  - Standalone uppercase words that look like tickers (1-5 uppercase letters)
+  - Company name mentions: ``Apple``, ``Tesla``, ``Nvidia``, etc.
 """

 from __future__ import annotations
@ -119,6 +120,54 @@ _FALSE_POSITIVES: frozenset[str] = frozenset(
    }
 )

+# Mapping of company names (lowercase) to their ticker symbols.
+# Longer names are checked first to avoid partial matches.
+_COMPANY_TO_TICKER: dict[str, str] = {
+    "alphabet": "GOOGL",
+    "google": "GOOGL",
+    "amazon": "AMZN",
+    "apple": "AAPL",
+    "microsoft": "MSFT",
+    "tesla": "TSLA",
+    "nvidia": "NVDA",
+    "meta platforms": "META",
+    "meta": "META",
+    "netflix": "NFLX",
+    "advanced micro devices": "AMD",
+    "amd": "AMD",
+    "intel": "INTC",
+    "broadcom": "AVGO",
+    "salesforce": "CRM",
+    "adobe": "ADBE",
+    "paypal": "PYPL",
+    "uber": "UBER",
+    "airbnb": "ABNB",
+    "spotify": "SPOT",
+    "shopify": "SHOP",
+    "snowflake": "SNOW",
+    "palantir": "PLTR",
+    "coinbase": "COIN",
+    "robinhood": "HOOD",
+    "walmart": "WMT",
+    "costco": "COST",
+    "jpmorgan": "JPM",
+    "goldman sachs": "GS",
+    "bank of america": "BAC",
+    "berkshire hathaway": "BRK.B",
+    "johnson & johnson": "JNJ",
+    "procter & gamble": "PG",
+    "coca-cola": "KO",
+    "disney": "DIS",
+    "boeing": "BA",
+}
+
+# Build a regex that matches any company name as a whole word (case-insensitive).
+# Sort by length descending so multi-word names match before single-word subsets.
+_COMPANY_PATTERN = re.compile(
+    r"\b(" + "|".join(re.escape(name) for name in sorted(_COMPANY_TO_TICKER, key=len, reverse=True)) + r")\b",
+    re.IGNORECASE,
+)
+
 # Pattern 1: $AAPL (dollar-sign prefix)
 _DOLLAR_PATTERN = re.compile(r"\$([A-Z]{1,5})\b")

@ -152,6 +201,13 @@ def extract_tickers(text: str) -> list[str]:
    for match in _EXCHANGE_PATTERN.finditer(text):
        _add(match.group(1))

+    # Company name mentions (case-insensitive).
+    for match in _COMPANY_PATTERN.finditer(text):
+        company_name = match.group(1).lower()
+        ticker = _COMPANY_TO_TICKER.get(company_name)
+        if ticker:
+            _add(ticker)
+
    # Standalone uppercase words: only include if they look like real tickers
    # (not in the false positives list).  We restrict to 2-5 chars to reduce
    # noise, unless they were already captured by the dollar/exchange patterns.