feat: productionize local service — fix signal pipeline, lower thresholds, add company-name ticker extraction

- Point Ollama to local instance via host.docker.internal, use gemma3 model - Remove Docker Ollama service (using host's Ollama instead) - Add company-name-to-ticker mapping (Apple→AAPL, Tesla→TSLA, etc.) for RSS articles - Lower signal thresholds for faster feedback with paper trading: - FinBERT confidence: 0.6→0.4, signal strength: 0.3→0.15 - News strategy: article_count 2→1, confidence 0.5→0.3, score ±0.3→±0.15 - Fix market data BarSet access bug (BarSet.__contains__ returns False incorrectly) - Fix market data SIP feed error by switching to IEX feed for free Alpaca accounts - Fix nginx proxy routing for /api/auth/* to api-gateway /auth/* - Add seed_sample_data script - Update tests for new thresholds and alpaca mock modules
2026-02-22 22:17:26 +00:00 · 2026-02-22 22:17:26 +00:00 · d36ae40df1
commit d36ae40df1
parent 67e64fab18
18 changed files with 749 additions and 185 deletions
--- a/scripts/seed_sample_data.py
+++ b/scripts/seed_sample_data.py
@ -0,0 +1,398 @@
+"""Seed the database with ~30 days of realistic sample data.
+
+Populates portfolio snapshots, trades, signals, positions, news articles,
+sentiments, strategy metrics, weight history, and trade outcomes.
+
+Usage:
+    TRADING_DATABASE_URL=postgresql+asyncpg://trading:trading@localhost:5432/trading \
+        python -m scripts.seed_sample_data
+"""
+
+from __future__ import annotations
+
+import asyncio
+import hashlib
+import logging
+import random
+import uuid
+from datetime import datetime, timedelta, timezone
+
+from sqlalchemy import select
+
+from shared.config import BaseConfig
+from shared.db import create_db
+from shared.models.learning import TradeOutcome
+from shared.models.news import Article, ArticleSentiment
+from shared.models.timeseries import PortfolioSnapshot, StrategyMetric
+from shared.models.trading import (
+    Position,
+    Signal,
+    SignalDirection,
+    Strategy,
+    StrategyWeightHistory,
+    Trade,
+    TradeSide,
+    TradeStatus,
+)
+
+logger = logging.getLogger(__name__)
+
+TICKERS = ["AAPL", "TSLA", "NVDA", "MSFT", "GOOGL"]
+STRATEGY_NAMES = ["momentum", "mean_reversion", "news_driven"]
+
+# Realistic price ranges for tickers (approximate)
+TICKER_PRICES = {
+    "AAPL": (170.0, 195.0),
+    "TSLA": (220.0, 280.0),
+    "NVDA": (700.0, 900.0),
+    "MSFT": (380.0, 430.0),
+    "GOOGL": (140.0, 170.0),
+}
+
+NEWS_HEADLINES = [
+    ("AAPL", "Apple Reports Record Q4 Revenue Driven by iPhone Sales"),
+    ("AAPL", "Apple Vision Pro Sees Slow Adoption Rates"),
+    ("AAPL", "Apple Expands AI Features Across Product Line"),
+    ("TSLA", "Tesla Deliveries Beat Expectations in Q3"),
+    ("TSLA", "Tesla Cuts Prices Amid Growing EV Competition"),
+    ("TSLA", "Tesla's Robotaxi Event Draws Mixed Reviews"),
+    ("NVDA", "NVIDIA Reports Blowout Earnings on AI Chip Demand"),
+    ("NVDA", "NVIDIA Blackwell GPUs Face Supply Constraints"),
+    ("NVDA", "Data Center Revenue Drives NVIDIA to New Highs"),
+    ("MSFT", "Microsoft Azure Growth Accelerates on AI Workloads"),
+    ("MSFT", "Microsoft Copilot Adoption Grows Among Enterprise Clients"),
+    ("MSFT", "Microsoft Invests $10B in AI Infrastructure"),
+    ("GOOGL", "Google Search Revenue Beats Expectations"),
+    ("GOOGL", "Alphabet Faces Antitrust Ruling on Search Monopoly"),
+    ("GOOGL", "Google Cloud Turns Profitable for Third Consecutive Quarter"),
+    ("AAPL", "Apple Supply Chain Diversifies Beyond China"),
+    ("TSLA", "Tesla Semi Truck Enters Mass Production"),
+    ("NVDA", "NVIDIA Partners with Healthcare Companies for AI Diagnostics"),
+    ("MSFT", "Microsoft Teams Surpasses 300 Million Monthly Users"),
+    ("GOOGL", "YouTube Ad Revenue Surges 15% Year-Over-Year"),
+    ("AAPL", "Apple Services Revenue Hits All-Time High"),
+    ("TSLA", "Tesla Energy Storage Deployments Double Year-Over-Year"),
+    ("NVDA", "NVIDIA Stock Included in Dow Jones Industrial Average"),
+    ("MSFT", "Microsoft Gaming Division Posts Strong Results"),
+    ("GOOGL", "Google DeepMind Achieves Breakthrough in Protein Folding"),
+    ("AAPL", "Apple Announces Stock Buyback Program"),
+    ("TSLA", "Analysts Divided on Tesla Valuation After Rally"),
+    ("NVDA", "NVIDIA Announces Next-Gen GPU Architecture"),
+    ("MSFT", "Microsoft 365 Price Increase Draws Customer Pushback"),
+    ("GOOGL", "Waymo Expands Autonomous Ride Service to New Cities"),
+]
+
+
+def _random_price(ticker: str) -> float:
+    lo, hi = TICKER_PRICES[ticker]
+    return round(random.uniform(lo, hi), 2)
+
+
+def _content_hash(text: str) -> str:
+    return hashlib.sha256(text.encode()).hexdigest()
+
+
+async def seed(database_url: str | None = None) -> None:
+    config = BaseConfig()
+    if database_url:
+        config.database_url = database_url
+
+    engine, session_factory = create_db(config)
+
+    async with session_factory() as session:
+        # ---------------------------------------------------------------
+        # 1. Ensure strategies exist (reuse from seed_strategies)
+        # ---------------------------------------------------------------
+        result = await session.execute(select(Strategy))
+        existing = {s.name: s for s in result.scalars().all()}
+
+        strategies: dict[str, Strategy] = {}
+        for name in STRATEGY_NAMES:
+            if name in existing:
+                strategies[name] = existing[name]
+                logger.info("Strategy '%s' already exists", name)
+            else:
+                s = Strategy(
+                    name=name,
+                    description=f"Auto-seeded {name} strategy",
+                    current_weight=0.333,
+                    active=True,
+                )
+                session.add(s)
+                strategies[name] = s
+                logger.info("Created strategy '%s'", name)
+        await session.flush()
+
+        # ---------------------------------------------------------------
+        # 2. Check if data already seeded (idempotency)
+        # ---------------------------------------------------------------
+        trade_count = (
+            await session.execute(select(Trade))
+        ).scalars().first()
+        if trade_count is not None:
+            logger.info("Sample data already exists, skipping seed")
+            await engine.dispose()
+            return
+
+        now = datetime.now(timezone.utc)
+        random.seed(42)  # reproducible data
+
+        # ---------------------------------------------------------------
+        # 3. Portfolio snapshots — 30 days of equity curve
+        # ---------------------------------------------------------------
+        equity = 100_000.0
+        snapshots = []
+        for day_offset in range(30, 0, -1):
+            ts = now - timedelta(days=day_offset)
+            daily_change = random.gauss(0.001, 0.008)  # ~0.1% mean, 0.8% std
+            equity *= 1 + daily_change
+            positions_value = equity * random.uniform(0.3, 0.7)
+            cash = equity - positions_value
+            daily_pnl = equity * daily_change
+
+            snapshots.append(
+                PortfolioSnapshot(
+                    timestamp=ts,
+                    total_value=round(equity, 2),
+                    cash=round(cash, 2),
+                    positions_value=round(positions_value, 2),
+                    daily_pnl=round(daily_pnl, 2),
+                )
+            )
+        session.add_all(snapshots)
+        logger.info("Added %d portfolio snapshots", len(snapshots))
+
+        # ---------------------------------------------------------------
+        # 4. Signals + Trades — ~50 trades spread across 30 days
+        # ---------------------------------------------------------------
+        strategy_list = list(strategies.values())
+        all_trades: list[Trade] = []
+        all_signals: list[Signal] = []
+
+        for i in range(50):
+            day_offset = random.randint(1, 30)
+            ts = now - timedelta(
+                days=day_offset,
+                hours=random.randint(9, 15),
+                minutes=random.randint(0, 59),
+            )
+            ticker = random.choice(TICKERS)
+            strat = random.choice(strategy_list)
+            price = _random_price(ticker)
+            side = random.choice([TradeSide.BUY, TradeSide.SELL])
+            direction = (
+                SignalDirection.LONG if side == TradeSide.BUY else SignalDirection.SHORT
+            )
+            strength = round(random.uniform(0.4, 0.95), 3)
+            qty = round(random.uniform(5, 50), 0)
+
+            # P&L: 60% of trades are profitable
+            is_profitable = random.random() < 0.6
+            pnl = round(
+                random.uniform(20, 800) * (1 if is_profitable else -1)
+                * (price / 200),
+                2,
+            )
+
+            signal = Signal(
+                ticker=ticker,
+                direction=direction,
+                strength=strength,
+                strategy_sources={strat.name: strength},
+                sentiment_score=round(random.uniform(-0.5, 0.8), 3),
+                acted_on=True,
+                strategy_id=strat.id,
+                created_at=ts,
+                updated_at=ts,
+            )
+            session.add(signal)
+            await session.flush()
+
+            trade = Trade(
+                ticker=ticker,
+                side=side,
+                qty=qty,
+                price=price,
+                status=TradeStatus.FILLED,
+                pnl=pnl,
+                strategy_id=strat.id,
+                signal_id=signal.id,
+                created_at=ts,
+                updated_at=ts,
+            )
+            session.add(trade)
+            all_trades.append(trade)
+            all_signals.append(signal)
+
+        await session.flush()
+        logger.info("Added %d signals and %d trades", len(all_signals), len(all_trades))
+
+        # ---------------------------------------------------------------
+        # 5. Trade outcomes — for all closed trades
+        # ---------------------------------------------------------------
+        outcomes = []
+        for trade in all_trades:
+            hold_hours = random.randint(1, 72)
+            roi_pct = round((trade.pnl or 0) / (trade.price * trade.qty) * 100, 2)
+            outcome = TradeOutcome(
+                trade_id=trade.id,
+                hold_duration=timedelta(hours=hold_hours),
+                realized_pnl=trade.pnl or 0.0,
+                roi_pct=roi_pct,
+                was_profitable=(trade.pnl or 0) > 0,
+            )
+            outcomes.append(outcome)
+        session.add_all(outcomes)
+        logger.info("Added %d trade outcomes", len(outcomes))
+
+        # ---------------------------------------------------------------
+        # 6. Open positions — 4 current positions
+        # ---------------------------------------------------------------
+        open_tickers = random.sample(TICKERS, 4)
+        positions = []
+        for ticker in open_tickers:
+            price = _random_price(ticker)
+            qty = round(random.uniform(10, 100), 0)
+            unrealized = round(random.gauss(0, price * qty * 0.03), 2)
+            positions.append(
+                Position(
+                    ticker=ticker,
+                    qty=qty,
+                    avg_entry=price,
+                    unrealized_pnl=unrealized,
+                    stop_loss=round(price * 0.95, 2),
+                    take_profit=round(price * 1.10, 2),
+                )
+            )
+        session.add_all(positions)
+        logger.info("Added %d open positions", len(positions))
+
+        # ---------------------------------------------------------------
+        # 7. News articles + sentiments
+        # ---------------------------------------------------------------
+        articles = []
+        sentiments = []
+        for idx, (ticker, headline) in enumerate(NEWS_HEADLINES):
+            day_offset = random.randint(1, 30)
+            ts = now - timedelta(
+                days=day_offset,
+                hours=random.randint(6, 20),
+            )
+            url = f"https://finance.example.com/article/{idx + 1}"
+            article = Article(
+                source="RSS",
+                url=url,
+                title=headline,
+                published_at=ts,
+                fetched_at=ts + timedelta(minutes=random.randint(1, 30)),
+                content_hash=_content_hash(f"{headline}-{idx}"),
+                created_at=ts,
+                updated_at=ts,
+            )
+            session.add(article)
+            await session.flush()
+
+            # Sentiment score correlated with headline tone
+            positive_words = {"record", "beat", "surge", "high", "growth", "strong", "profit", "expand", "breakthrough", "buyback"}
+            negative_words = {"slow", "cut", "face", "divided", "pushback", "mixed", "antitrust"}
+            headline_lower = headline.lower()
+            pos_count = sum(1 for w in positive_words if w in headline_lower)
+            neg_count = sum(1 for w in negative_words if w in headline_lower)
+            base_score = 0.3 * pos_count - 0.3 * neg_count
+            score = max(-1.0, min(1.0, base_score + random.gauss(0, 0.1)))
+
+            sentiment = ArticleSentiment(
+                article_id=article.id,
+                ticker=ticker,
+                score=round(score, 3),
+                confidence=round(random.uniform(0.6, 0.95), 3),
+                model_used="finbert",
+                created_at=ts,
+                updated_at=ts,
+            )
+            sentiments.append(sentiment)
+            articles.append(article)
+
+        session.add_all(sentiments)
+        logger.info(
+            "Added %d articles and %d sentiments", len(articles), len(sentiments)
+        )
+
+        # ---------------------------------------------------------------
+        # 8. Strategy metrics — daily metrics per strategy for 30 days
+        # ---------------------------------------------------------------
+        metrics = []
+        for strat in strategy_list:
+            cum_pnl = 0.0
+            trade_count = 0
+            for day_offset in range(30, 0, -1):
+                ts = now - timedelta(days=day_offset)
+                daily_trades = random.randint(0, 3)
+                trade_count += daily_trades
+                daily_pnl = round(random.gauss(50, 200), 2)
+                cum_pnl += daily_pnl
+                win_rate = round(random.uniform(0.35, 0.75), 4)
+                sharpe = round(random.gauss(1.2, 0.5), 2)
+
+                metrics.append(
+                    StrategyMetric(
+                        timestamp=ts,
+                        strategy_id=strat.id,
+                        win_rate=win_rate,
+                        total_pnl=round(cum_pnl, 2),
+                        trade_count=trade_count,
+                        sharpe_ratio=sharpe,
+                    )
+                )
+        session.add_all(metrics)
+        logger.info("Added %d strategy metric records", len(metrics))
+
+        # ---------------------------------------------------------------
+        # 9. Strategy weight history — a few adjustment records
+        # ---------------------------------------------------------------
+        weight_records = []
+        reasons = [
+            "Periodic performance review — increased weight due to positive Sharpe",
+            "Reduced weight after string of losses",
+            "Rebalanced weights to equal distribution",
+            "Increased weight — strong win rate last 7 days",
+            "Decreased weight — high drawdown detected",
+        ]
+        for strat in strategy_list:
+            weight = 0.333
+            for adj_idx in range(random.randint(2, 4)):
+                day_offset = 30 - adj_idx * 7
+                if day_offset < 1:
+                    break
+                ts = now - timedelta(days=day_offset)
+                old_weight = weight
+                weight = round(max(0.1, min(0.6, weight + random.gauss(0, 0.05))), 3)
+                weight_records.append(
+                    StrategyWeightHistory(
+                        strategy_id=strat.id,
+                        old_weight=old_weight,
+                        new_weight=weight,
+                        reason=random.choice(reasons),
+                        created_at=ts,
+                        updated_at=ts,
+                    )
+                )
+        session.add_all(weight_records)
+        logger.info("Added %d weight history records", len(weight_records))
+
+        # ---------------------------------------------------------------
+        # Commit all data
+        # ---------------------------------------------------------------
+        await session.commit()
+        logger.info("All sample data committed successfully")
+
+    await engine.dispose()
+
+
+def main() -> None:
+    logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
+    asyncio.run(seed())
+
+
+if __name__ == "__main__":
+    main()