feat: productionize local service — fix signal pipeline, lower thresholds, add company-name ticker extraction
- Point Ollama to local instance via host.docker.internal, use gemma3 model - Remove Docker Ollama service (using host's Ollama instead) - Add company-name-to-ticker mapping (Apple→AAPL, Tesla→TSLA, etc.) for RSS articles - Lower signal thresholds for faster feedback with paper trading: - FinBERT confidence: 0.6→0.4, signal strength: 0.3→0.15 - News strategy: article_count 2→1, confidence 0.5→0.3, score ±0.3→±0.15 - Fix market data BarSet access bug (BarSet.__contains__ returns False incorrectly) - Fix market data SIP feed error by switching to IEX feed for free Alpaca accounts - Fix nginx proxy routing for /api/auth/* to api-gateway /auth/* - Add seed_sample_data script - Update tests for new thresholds and alpaca mock modules
This commit is contained in:
parent
67e64fab18
commit
d36ae40df1
18 changed files with 749 additions and 185 deletions
|
|
@ -4,6 +4,7 @@ Handles common formats:
|
|||
- Dollar-prefixed: ``$AAPL``
|
||||
- Exchange-prefixed: ``NASDAQ:AAPL``, ``NYSE:TSLA``
|
||||
- Standalone uppercase words that look like tickers (1-5 uppercase letters)
|
||||
- Company name mentions: ``Apple``, ``Tesla``, ``Nvidia``, etc.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
|
@ -119,6 +120,54 @@ _FALSE_POSITIVES: frozenset[str] = frozenset(
|
|||
}
|
||||
)
|
||||
|
||||
# Mapping of company names (lowercase) to their ticker symbols.
|
||||
# Longer names are checked first to avoid partial matches.
|
||||
_COMPANY_TO_TICKER: dict[str, str] = {
|
||||
"alphabet": "GOOGL",
|
||||
"google": "GOOGL",
|
||||
"amazon": "AMZN",
|
||||
"apple": "AAPL",
|
||||
"microsoft": "MSFT",
|
||||
"tesla": "TSLA",
|
||||
"nvidia": "NVDA",
|
||||
"meta platforms": "META",
|
||||
"meta": "META",
|
||||
"netflix": "NFLX",
|
||||
"advanced micro devices": "AMD",
|
||||
"amd": "AMD",
|
||||
"intel": "INTC",
|
||||
"broadcom": "AVGO",
|
||||
"salesforce": "CRM",
|
||||
"adobe": "ADBE",
|
||||
"paypal": "PYPL",
|
||||
"uber": "UBER",
|
||||
"airbnb": "ABNB",
|
||||
"spotify": "SPOT",
|
||||
"shopify": "SHOP",
|
||||
"snowflake": "SNOW",
|
||||
"palantir": "PLTR",
|
||||
"coinbase": "COIN",
|
||||
"robinhood": "HOOD",
|
||||
"walmart": "WMT",
|
||||
"costco": "COST",
|
||||
"jpmorgan": "JPM",
|
||||
"goldman sachs": "GS",
|
||||
"bank of america": "BAC",
|
||||
"berkshire hathaway": "BRK.B",
|
||||
"johnson & johnson": "JNJ",
|
||||
"procter & gamble": "PG",
|
||||
"coca-cola": "KO",
|
||||
"disney": "DIS",
|
||||
"boeing": "BA",
|
||||
}
|
||||
|
||||
# Build a regex that matches any company name as a whole word (case-insensitive).
|
||||
# Sort by length descending so multi-word names match before single-word subsets.
|
||||
_COMPANY_PATTERN = re.compile(
|
||||
r"\b(" + "|".join(re.escape(name) for name in sorted(_COMPANY_TO_TICKER, key=len, reverse=True)) + r")\b",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
# Pattern 1: $AAPL (dollar-sign prefix)
|
||||
_DOLLAR_PATTERN = re.compile(r"\$([A-Z]{1,5})\b")
|
||||
|
||||
|
|
@ -152,6 +201,13 @@ def extract_tickers(text: str) -> list[str]:
|
|||
for match in _EXCHANGE_PATTERN.finditer(text):
|
||||
_add(match.group(1))
|
||||
|
||||
# Company name mentions (case-insensitive).
|
||||
for match in _COMPANY_PATTERN.finditer(text):
|
||||
company_name = match.group(1).lower()
|
||||
ticker = _COMPANY_TO_TICKER.get(company_name)
|
||||
if ticker:
|
||||
_add(ticker)
|
||||
|
||||
# Standalone uppercase words: only include if they look like real tickers
|
||||
# (not in the false positives list). We restrict to 2-5 chars to reduce
|
||||
# noise, unless they were already captured by the dollar/exchange patterns.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue