feat: productionize local service — fix signal pipeline, lower thresholds, add company-name ticker extraction
- Point Ollama to local instance via host.docker.internal, use gemma3 model - Remove Docker Ollama service (using host's Ollama instead) - Add company-name-to-ticker mapping (Apple→AAPL, Tesla→TSLA, etc.) for RSS articles - Lower signal thresholds for faster feedback with paper trading: - FinBERT confidence: 0.6→0.4, signal strength: 0.3→0.15 - News strategy: article_count 2→1, confidence 0.5→0.3, score ±0.3→±0.15 - Fix market data BarSet access bug (BarSet.__contains__ returns False incorrectly) - Fix market data SIP feed error by switching to IEX feed for free Alpaca accounts - Fix nginx proxy routing for /api/auth/* to api-gateway /auth/* - Add seed_sample_data script - Update tests for new thresholds and alpaca mock modules
This commit is contained in:
parent
67e64fab18
commit
d36ae40df1
18 changed files with 749 additions and 185 deletions
398
scripts/seed_sample_data.py
Normal file
398
scripts/seed_sample_data.py
Normal file
|
|
@ -0,0 +1,398 @@
|
|||
"""Seed the database with ~30 days of realistic sample data.
|
||||
|
||||
Populates portfolio snapshots, trades, signals, positions, news articles,
|
||||
sentiments, strategy metrics, weight history, and trade outcomes.
|
||||
|
||||
Usage:
|
||||
TRADING_DATABASE_URL=postgresql+asyncpg://trading:trading@localhost:5432/trading \
|
||||
python -m scripts.seed_sample_data
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import hashlib
|
||||
import logging
|
||||
import random
|
||||
import uuid
|
||||
from datetime import datetime, timedelta, timezone
|
||||
|
||||
from sqlalchemy import select
|
||||
|
||||
from shared.config import BaseConfig
|
||||
from shared.db import create_db
|
||||
from shared.models.learning import TradeOutcome
|
||||
from shared.models.news import Article, ArticleSentiment
|
||||
from shared.models.timeseries import PortfolioSnapshot, StrategyMetric
|
||||
from shared.models.trading import (
|
||||
Position,
|
||||
Signal,
|
||||
SignalDirection,
|
||||
Strategy,
|
||||
StrategyWeightHistory,
|
||||
Trade,
|
||||
TradeSide,
|
||||
TradeStatus,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
TICKERS = ["AAPL", "TSLA", "NVDA", "MSFT", "GOOGL"]
|
||||
STRATEGY_NAMES = ["momentum", "mean_reversion", "news_driven"]
|
||||
|
||||
# Realistic price ranges for tickers (approximate)
|
||||
TICKER_PRICES = {
|
||||
"AAPL": (170.0, 195.0),
|
||||
"TSLA": (220.0, 280.0),
|
||||
"NVDA": (700.0, 900.0),
|
||||
"MSFT": (380.0, 430.0),
|
||||
"GOOGL": (140.0, 170.0),
|
||||
}
|
||||
|
||||
NEWS_HEADLINES = [
|
||||
("AAPL", "Apple Reports Record Q4 Revenue Driven by iPhone Sales"),
|
||||
("AAPL", "Apple Vision Pro Sees Slow Adoption Rates"),
|
||||
("AAPL", "Apple Expands AI Features Across Product Line"),
|
||||
("TSLA", "Tesla Deliveries Beat Expectations in Q3"),
|
||||
("TSLA", "Tesla Cuts Prices Amid Growing EV Competition"),
|
||||
("TSLA", "Tesla's Robotaxi Event Draws Mixed Reviews"),
|
||||
("NVDA", "NVIDIA Reports Blowout Earnings on AI Chip Demand"),
|
||||
("NVDA", "NVIDIA Blackwell GPUs Face Supply Constraints"),
|
||||
("NVDA", "Data Center Revenue Drives NVIDIA to New Highs"),
|
||||
("MSFT", "Microsoft Azure Growth Accelerates on AI Workloads"),
|
||||
("MSFT", "Microsoft Copilot Adoption Grows Among Enterprise Clients"),
|
||||
("MSFT", "Microsoft Invests $10B in AI Infrastructure"),
|
||||
("GOOGL", "Google Search Revenue Beats Expectations"),
|
||||
("GOOGL", "Alphabet Faces Antitrust Ruling on Search Monopoly"),
|
||||
("GOOGL", "Google Cloud Turns Profitable for Third Consecutive Quarter"),
|
||||
("AAPL", "Apple Supply Chain Diversifies Beyond China"),
|
||||
("TSLA", "Tesla Semi Truck Enters Mass Production"),
|
||||
("NVDA", "NVIDIA Partners with Healthcare Companies for AI Diagnostics"),
|
||||
("MSFT", "Microsoft Teams Surpasses 300 Million Monthly Users"),
|
||||
("GOOGL", "YouTube Ad Revenue Surges 15% Year-Over-Year"),
|
||||
("AAPL", "Apple Services Revenue Hits All-Time High"),
|
||||
("TSLA", "Tesla Energy Storage Deployments Double Year-Over-Year"),
|
||||
("NVDA", "NVIDIA Stock Included in Dow Jones Industrial Average"),
|
||||
("MSFT", "Microsoft Gaming Division Posts Strong Results"),
|
||||
("GOOGL", "Google DeepMind Achieves Breakthrough in Protein Folding"),
|
||||
("AAPL", "Apple Announces Stock Buyback Program"),
|
||||
("TSLA", "Analysts Divided on Tesla Valuation After Rally"),
|
||||
("NVDA", "NVIDIA Announces Next-Gen GPU Architecture"),
|
||||
("MSFT", "Microsoft 365 Price Increase Draws Customer Pushback"),
|
||||
("GOOGL", "Waymo Expands Autonomous Ride Service to New Cities"),
|
||||
]
|
||||
|
||||
|
||||
def _random_price(ticker: str) -> float:
|
||||
lo, hi = TICKER_PRICES[ticker]
|
||||
return round(random.uniform(lo, hi), 2)
|
||||
|
||||
|
||||
def _content_hash(text: str) -> str:
|
||||
return hashlib.sha256(text.encode()).hexdigest()
|
||||
|
||||
|
||||
async def seed(database_url: str | None = None) -> None:
|
||||
config = BaseConfig()
|
||||
if database_url:
|
||||
config.database_url = database_url
|
||||
|
||||
engine, session_factory = create_db(config)
|
||||
|
||||
async with session_factory() as session:
|
||||
# ---------------------------------------------------------------
|
||||
# 1. Ensure strategies exist (reuse from seed_strategies)
|
||||
# ---------------------------------------------------------------
|
||||
result = await session.execute(select(Strategy))
|
||||
existing = {s.name: s for s in result.scalars().all()}
|
||||
|
||||
strategies: dict[str, Strategy] = {}
|
||||
for name in STRATEGY_NAMES:
|
||||
if name in existing:
|
||||
strategies[name] = existing[name]
|
||||
logger.info("Strategy '%s' already exists", name)
|
||||
else:
|
||||
s = Strategy(
|
||||
name=name,
|
||||
description=f"Auto-seeded {name} strategy",
|
||||
current_weight=0.333,
|
||||
active=True,
|
||||
)
|
||||
session.add(s)
|
||||
strategies[name] = s
|
||||
logger.info("Created strategy '%s'", name)
|
||||
await session.flush()
|
||||
|
||||
# ---------------------------------------------------------------
|
||||
# 2. Check if data already seeded (idempotency)
|
||||
# ---------------------------------------------------------------
|
||||
trade_count = (
|
||||
await session.execute(select(Trade))
|
||||
).scalars().first()
|
||||
if trade_count is not None:
|
||||
logger.info("Sample data already exists, skipping seed")
|
||||
await engine.dispose()
|
||||
return
|
||||
|
||||
now = datetime.now(timezone.utc)
|
||||
random.seed(42) # reproducible data
|
||||
|
||||
# ---------------------------------------------------------------
|
||||
# 3. Portfolio snapshots — 30 days of equity curve
|
||||
# ---------------------------------------------------------------
|
||||
equity = 100_000.0
|
||||
snapshots = []
|
||||
for day_offset in range(30, 0, -1):
|
||||
ts = now - timedelta(days=day_offset)
|
||||
daily_change = random.gauss(0.001, 0.008) # ~0.1% mean, 0.8% std
|
||||
equity *= 1 + daily_change
|
||||
positions_value = equity * random.uniform(0.3, 0.7)
|
||||
cash = equity - positions_value
|
||||
daily_pnl = equity * daily_change
|
||||
|
||||
snapshots.append(
|
||||
PortfolioSnapshot(
|
||||
timestamp=ts,
|
||||
total_value=round(equity, 2),
|
||||
cash=round(cash, 2),
|
||||
positions_value=round(positions_value, 2),
|
||||
daily_pnl=round(daily_pnl, 2),
|
||||
)
|
||||
)
|
||||
session.add_all(snapshots)
|
||||
logger.info("Added %d portfolio snapshots", len(snapshots))
|
||||
|
||||
# ---------------------------------------------------------------
|
||||
# 4. Signals + Trades — ~50 trades spread across 30 days
|
||||
# ---------------------------------------------------------------
|
||||
strategy_list = list(strategies.values())
|
||||
all_trades: list[Trade] = []
|
||||
all_signals: list[Signal] = []
|
||||
|
||||
for i in range(50):
|
||||
day_offset = random.randint(1, 30)
|
||||
ts = now - timedelta(
|
||||
days=day_offset,
|
||||
hours=random.randint(9, 15),
|
||||
minutes=random.randint(0, 59),
|
||||
)
|
||||
ticker = random.choice(TICKERS)
|
||||
strat = random.choice(strategy_list)
|
||||
price = _random_price(ticker)
|
||||
side = random.choice([TradeSide.BUY, TradeSide.SELL])
|
||||
direction = (
|
||||
SignalDirection.LONG if side == TradeSide.BUY else SignalDirection.SHORT
|
||||
)
|
||||
strength = round(random.uniform(0.4, 0.95), 3)
|
||||
qty = round(random.uniform(5, 50), 0)
|
||||
|
||||
# P&L: 60% of trades are profitable
|
||||
is_profitable = random.random() < 0.6
|
||||
pnl = round(
|
||||
random.uniform(20, 800) * (1 if is_profitable else -1)
|
||||
* (price / 200),
|
||||
2,
|
||||
)
|
||||
|
||||
signal = Signal(
|
||||
ticker=ticker,
|
||||
direction=direction,
|
||||
strength=strength,
|
||||
strategy_sources={strat.name: strength},
|
||||
sentiment_score=round(random.uniform(-0.5, 0.8), 3),
|
||||
acted_on=True,
|
||||
strategy_id=strat.id,
|
||||
created_at=ts,
|
||||
updated_at=ts,
|
||||
)
|
||||
session.add(signal)
|
||||
await session.flush()
|
||||
|
||||
trade = Trade(
|
||||
ticker=ticker,
|
||||
side=side,
|
||||
qty=qty,
|
||||
price=price,
|
||||
status=TradeStatus.FILLED,
|
||||
pnl=pnl,
|
||||
strategy_id=strat.id,
|
||||
signal_id=signal.id,
|
||||
created_at=ts,
|
||||
updated_at=ts,
|
||||
)
|
||||
session.add(trade)
|
||||
all_trades.append(trade)
|
||||
all_signals.append(signal)
|
||||
|
||||
await session.flush()
|
||||
logger.info("Added %d signals and %d trades", len(all_signals), len(all_trades))
|
||||
|
||||
# ---------------------------------------------------------------
|
||||
# 5. Trade outcomes — for all closed trades
|
||||
# ---------------------------------------------------------------
|
||||
outcomes = []
|
||||
for trade in all_trades:
|
||||
hold_hours = random.randint(1, 72)
|
||||
roi_pct = round((trade.pnl or 0) / (trade.price * trade.qty) * 100, 2)
|
||||
outcome = TradeOutcome(
|
||||
trade_id=trade.id,
|
||||
hold_duration=timedelta(hours=hold_hours),
|
||||
realized_pnl=trade.pnl or 0.0,
|
||||
roi_pct=roi_pct,
|
||||
was_profitable=(trade.pnl or 0) > 0,
|
||||
)
|
||||
outcomes.append(outcome)
|
||||
session.add_all(outcomes)
|
||||
logger.info("Added %d trade outcomes", len(outcomes))
|
||||
|
||||
# ---------------------------------------------------------------
|
||||
# 6. Open positions — 4 current positions
|
||||
# ---------------------------------------------------------------
|
||||
open_tickers = random.sample(TICKERS, 4)
|
||||
positions = []
|
||||
for ticker in open_tickers:
|
||||
price = _random_price(ticker)
|
||||
qty = round(random.uniform(10, 100), 0)
|
||||
unrealized = round(random.gauss(0, price * qty * 0.03), 2)
|
||||
positions.append(
|
||||
Position(
|
||||
ticker=ticker,
|
||||
qty=qty,
|
||||
avg_entry=price,
|
||||
unrealized_pnl=unrealized,
|
||||
stop_loss=round(price * 0.95, 2),
|
||||
take_profit=round(price * 1.10, 2),
|
||||
)
|
||||
)
|
||||
session.add_all(positions)
|
||||
logger.info("Added %d open positions", len(positions))
|
||||
|
||||
# ---------------------------------------------------------------
|
||||
# 7. News articles + sentiments
|
||||
# ---------------------------------------------------------------
|
||||
articles = []
|
||||
sentiments = []
|
||||
for idx, (ticker, headline) in enumerate(NEWS_HEADLINES):
|
||||
day_offset = random.randint(1, 30)
|
||||
ts = now - timedelta(
|
||||
days=day_offset,
|
||||
hours=random.randint(6, 20),
|
||||
)
|
||||
url = f"https://finance.example.com/article/{idx + 1}"
|
||||
article = Article(
|
||||
source="RSS",
|
||||
url=url,
|
||||
title=headline,
|
||||
published_at=ts,
|
||||
fetched_at=ts + timedelta(minutes=random.randint(1, 30)),
|
||||
content_hash=_content_hash(f"{headline}-{idx}"),
|
||||
created_at=ts,
|
||||
updated_at=ts,
|
||||
)
|
||||
session.add(article)
|
||||
await session.flush()
|
||||
|
||||
# Sentiment score correlated with headline tone
|
||||
positive_words = {"record", "beat", "surge", "high", "growth", "strong", "profit", "expand", "breakthrough", "buyback"}
|
||||
negative_words = {"slow", "cut", "face", "divided", "pushback", "mixed", "antitrust"}
|
||||
headline_lower = headline.lower()
|
||||
pos_count = sum(1 for w in positive_words if w in headline_lower)
|
||||
neg_count = sum(1 for w in negative_words if w in headline_lower)
|
||||
base_score = 0.3 * pos_count - 0.3 * neg_count
|
||||
score = max(-1.0, min(1.0, base_score + random.gauss(0, 0.1)))
|
||||
|
||||
sentiment = ArticleSentiment(
|
||||
article_id=article.id,
|
||||
ticker=ticker,
|
||||
score=round(score, 3),
|
||||
confidence=round(random.uniform(0.6, 0.95), 3),
|
||||
model_used="finbert",
|
||||
created_at=ts,
|
||||
updated_at=ts,
|
||||
)
|
||||
sentiments.append(sentiment)
|
||||
articles.append(article)
|
||||
|
||||
session.add_all(sentiments)
|
||||
logger.info(
|
||||
"Added %d articles and %d sentiments", len(articles), len(sentiments)
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------
|
||||
# 8. Strategy metrics — daily metrics per strategy for 30 days
|
||||
# ---------------------------------------------------------------
|
||||
metrics = []
|
||||
for strat in strategy_list:
|
||||
cum_pnl = 0.0
|
||||
trade_count = 0
|
||||
for day_offset in range(30, 0, -1):
|
||||
ts = now - timedelta(days=day_offset)
|
||||
daily_trades = random.randint(0, 3)
|
||||
trade_count += daily_trades
|
||||
daily_pnl = round(random.gauss(50, 200), 2)
|
||||
cum_pnl += daily_pnl
|
||||
win_rate = round(random.uniform(0.35, 0.75), 4)
|
||||
sharpe = round(random.gauss(1.2, 0.5), 2)
|
||||
|
||||
metrics.append(
|
||||
StrategyMetric(
|
||||
timestamp=ts,
|
||||
strategy_id=strat.id,
|
||||
win_rate=win_rate,
|
||||
total_pnl=round(cum_pnl, 2),
|
||||
trade_count=trade_count,
|
||||
sharpe_ratio=sharpe,
|
||||
)
|
||||
)
|
||||
session.add_all(metrics)
|
||||
logger.info("Added %d strategy metric records", len(metrics))
|
||||
|
||||
# ---------------------------------------------------------------
|
||||
# 9. Strategy weight history — a few adjustment records
|
||||
# ---------------------------------------------------------------
|
||||
weight_records = []
|
||||
reasons = [
|
||||
"Periodic performance review — increased weight due to positive Sharpe",
|
||||
"Reduced weight after string of losses",
|
||||
"Rebalanced weights to equal distribution",
|
||||
"Increased weight — strong win rate last 7 days",
|
||||
"Decreased weight — high drawdown detected",
|
||||
]
|
||||
for strat in strategy_list:
|
||||
weight = 0.333
|
||||
for adj_idx in range(random.randint(2, 4)):
|
||||
day_offset = 30 - adj_idx * 7
|
||||
if day_offset < 1:
|
||||
break
|
||||
ts = now - timedelta(days=day_offset)
|
||||
old_weight = weight
|
||||
weight = round(max(0.1, min(0.6, weight + random.gauss(0, 0.05))), 3)
|
||||
weight_records.append(
|
||||
StrategyWeightHistory(
|
||||
strategy_id=strat.id,
|
||||
old_weight=old_weight,
|
||||
new_weight=weight,
|
||||
reason=random.choice(reasons),
|
||||
created_at=ts,
|
||||
updated_at=ts,
|
||||
)
|
||||
)
|
||||
session.add_all(weight_records)
|
||||
logger.info("Added %d weight history records", len(weight_records))
|
||||
|
||||
# ---------------------------------------------------------------
|
||||
# Commit all data
|
||||
# ---------------------------------------------------------------
|
||||
await session.commit()
|
||||
logger.info("All sample data committed successfully")
|
||||
|
||||
await engine.dispose()
|
||||
|
||||
|
||||
def main() -> None:
|
||||
logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
|
||||
asyncio.run(seed())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Add table
Add a link
Reference in a new issue