feat: productionize local service — fix signal pipeline, lower thresholds, add company-name ticker extraction

- Point Ollama to local instance via host.docker.internal, use gemma3 model
- Remove Docker Ollama service (using host's Ollama instead)
- Add company-name-to-ticker mapping (Apple→AAPL, Tesla→TSLA, etc.) for RSS articles
- Lower signal thresholds for faster feedback with paper trading:
  - FinBERT confidence: 0.6→0.4, signal strength: 0.3→0.15
  - News strategy: article_count 2→1, confidence 0.5→0.3, score ±0.3→±0.15
- Fix market data BarSet access bug (BarSet.__contains__ returns False incorrectly)
- Fix market data SIP feed error by switching to IEX feed for free Alpaca accounts
- Fix nginx proxy routing for /api/auth/* to api-gateway /auth/*
- Add seed_sample_data script
- Update tests for new thresholds and alpaca mock modules
This commit is contained in:
Viktor Barzin 2026-02-22 22:17:26 +00:00
parent 67e64fab18
commit d36ae40df1
No known key found for this signature in database
GPG key ID: 0EB088298288D958
18 changed files with 749 additions and 185 deletions

View file

@ -4,6 +4,7 @@ Handles common formats:
- Dollar-prefixed: ``$AAPL``
- Exchange-prefixed: ``NASDAQ:AAPL``, ``NYSE:TSLA``
- Standalone uppercase words that look like tickers (1-5 uppercase letters)
- Company name mentions: ``Apple``, ``Tesla``, ``Nvidia``, etc.
"""
from __future__ import annotations
@ -119,6 +120,54 @@ _FALSE_POSITIVES: frozenset[str] = frozenset(
}
)
# Mapping of company names (lowercase) to their ticker symbols.
# Longer names are checked first to avoid partial matches.
_COMPANY_TO_TICKER: dict[str, str] = {
"alphabet": "GOOGL",
"google": "GOOGL",
"amazon": "AMZN",
"apple": "AAPL",
"microsoft": "MSFT",
"tesla": "TSLA",
"nvidia": "NVDA",
"meta platforms": "META",
"meta": "META",
"netflix": "NFLX",
"advanced micro devices": "AMD",
"amd": "AMD",
"intel": "INTC",
"broadcom": "AVGO",
"salesforce": "CRM",
"adobe": "ADBE",
"paypal": "PYPL",
"uber": "UBER",
"airbnb": "ABNB",
"spotify": "SPOT",
"shopify": "SHOP",
"snowflake": "SNOW",
"palantir": "PLTR",
"coinbase": "COIN",
"robinhood": "HOOD",
"walmart": "WMT",
"costco": "COST",
"jpmorgan": "JPM",
"goldman sachs": "GS",
"bank of america": "BAC",
"berkshire hathaway": "BRK.B",
"johnson & johnson": "JNJ",
"procter & gamble": "PG",
"coca-cola": "KO",
"disney": "DIS",
"boeing": "BA",
}
# Build a regex that matches any company name as a whole word (case-insensitive).
# Sort by length descending so multi-word names match before single-word subsets.
_COMPANY_PATTERN = re.compile(
r"\b(" + "|".join(re.escape(name) for name in sorted(_COMPANY_TO_TICKER, key=len, reverse=True)) + r")\b",
re.IGNORECASE,
)
# Pattern 1: $AAPL (dollar-sign prefix)
_DOLLAR_PATTERN = re.compile(r"\$([A-Z]{1,5})\b")
@ -152,6 +201,13 @@ def extract_tickers(text: str) -> list[str]:
for match in _EXCHANGE_PATTERN.finditer(text):
_add(match.group(1))
# Company name mentions (case-insensitive).
for match in _COMPANY_PATTERN.finditer(text):
company_name = match.group(1).lower()
ticker = _COMPANY_TO_TICKER.get(company_name)
if ticker:
_add(ticker)
# Standalone uppercase words: only include if they look like real tickers
# (not in the false positives list). We restrict to 2-5 chars to reduce
# noise, unless they were already captured by the dollar/exchange patterns.