426 lines
14 KiB
Python
426 lines
14 KiB
Python
|
|
"""Tests for the backtesting engine.
|
||
|
|
|
||
|
|
Covers:
|
||
|
|
- SimulatedBroker: slippage, commission, positions, equity
|
||
|
|
- BacktestDataLoader: chronological ordering, sentiment aggregation
|
||
|
|
- Metrics: total return, Sharpe ratio, max drawdown, win rate
|
||
|
|
- BacktestEngine: full run with buy+sell, position closing at end
|
||
|
|
"""
|
||
|
|
|
||
|
|
from __future__ import annotations
|
||
|
|
|
||
|
|
import math
|
||
|
|
from datetime import datetime, timedelta, timezone
|
||
|
|
|
||
|
|
import pytest
|
||
|
|
|
||
|
|
from backtester.config import BacktestConfig
|
||
|
|
from backtester.data_loader import BacktestDataLoader
|
||
|
|
from backtester.engine import BacktestEngine
|
||
|
|
from backtester.metrics import BacktestResult, compute_metrics
|
||
|
|
from backtester.simulated_broker import SimulatedBroker
|
||
|
|
from shared.schemas.trading import (
|
||
|
|
OrderRequest,
|
||
|
|
OrderSide,
|
||
|
|
OrderStatus,
|
||
|
|
TradeExecution,
|
||
|
|
)
|
||
|
|
|
||
|
|
|
||
|
|
# ======================================================================
|
||
|
|
# Helpers
|
||
|
|
# ======================================================================
|
||
|
|
|
||
|
|
def _ts(day: int) -> datetime:
|
||
|
|
"""Return a timezone-aware datetime for 2025-01-{day}."""
|
||
|
|
return datetime(2025, 1, day, tzinfo=timezone.utc)
|
||
|
|
|
||
|
|
|
||
|
|
def _make_bar(day: int, ticker: str, close: float, *, open_: float | None = None,
|
||
|
|
high: float | None = None, low: float | None = None, volume: float = 1000.0) -> dict:
|
||
|
|
"""Build a bar dict for the data loader."""
|
||
|
|
return {
|
||
|
|
"timestamp": _ts(day),
|
||
|
|
"ticker": ticker,
|
||
|
|
"open": open_ or close,
|
||
|
|
"high": high or close,
|
||
|
|
"low": low or close,
|
||
|
|
"close": close,
|
||
|
|
"volume": volume,
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
# ======================================================================
|
||
|
|
# SimulatedBroker tests
|
||
|
|
# ======================================================================
|
||
|
|
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
async def test_simulated_broker_buy_fills_with_slippage():
|
||
|
|
"""BUY orders fill at price * (1 + slippage_pct)."""
|
||
|
|
broker = SimulatedBroker(initial_capital=100_000.0, slippage_pct=0.001)
|
||
|
|
broker.set_current_prices({"AAPL": 100.0})
|
||
|
|
|
||
|
|
result = await broker.submit_order(
|
||
|
|
OrderRequest(ticker="AAPL", side=OrderSide.BUY, qty=10)
|
||
|
|
)
|
||
|
|
|
||
|
|
assert result.status == OrderStatus.FILLED
|
||
|
|
expected_fill = 100.0 * 1.001 # 100.1
|
||
|
|
assert result.filled_price == pytest.approx(expected_fill, abs=0.01)
|
||
|
|
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
async def test_simulated_broker_sell_fills_with_slippage():
|
||
|
|
"""SELL orders fill at price * (1 - slippage_pct)."""
|
||
|
|
broker = SimulatedBroker(initial_capital=100_000.0, slippage_pct=0.001)
|
||
|
|
broker.set_current_prices({"AAPL": 100.0})
|
||
|
|
|
||
|
|
# First buy to have a position
|
||
|
|
await broker.submit_order(
|
||
|
|
OrderRequest(ticker="AAPL", side=OrderSide.BUY, qty=10)
|
||
|
|
)
|
||
|
|
|
||
|
|
result = await broker.submit_order(
|
||
|
|
OrderRequest(ticker="AAPL", side=OrderSide.SELL, qty=10)
|
||
|
|
)
|
||
|
|
|
||
|
|
assert result.status == OrderStatus.FILLED
|
||
|
|
expected_fill = 100.0 * 0.999 # 99.9
|
||
|
|
assert result.filled_price == pytest.approx(expected_fill, abs=0.01)
|
||
|
|
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
async def test_simulated_broker_tracks_positions():
|
||
|
|
"""After buying, the position should be tracked; after selling, removed."""
|
||
|
|
broker = SimulatedBroker(initial_capital=100_000.0, slippage_pct=0.0)
|
||
|
|
broker.set_current_prices({"AAPL": 150.0})
|
||
|
|
|
||
|
|
# Buy 5 shares
|
||
|
|
await broker.submit_order(
|
||
|
|
OrderRequest(ticker="AAPL", side=OrderSide.BUY, qty=5)
|
||
|
|
)
|
||
|
|
positions = await broker.get_positions()
|
||
|
|
assert len(positions) == 1
|
||
|
|
assert positions[0].ticker == "AAPL"
|
||
|
|
assert positions[0].qty == 5
|
||
|
|
|
||
|
|
# Sell all
|
||
|
|
await broker.submit_order(
|
||
|
|
OrderRequest(ticker="AAPL", side=OrderSide.SELL, qty=5)
|
||
|
|
)
|
||
|
|
positions = await broker.get_positions()
|
||
|
|
assert len(positions) == 0
|
||
|
|
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
async def test_simulated_broker_commission_deducted():
|
||
|
|
"""Commission should be deducted from cash on each trade."""
|
||
|
|
commission = 5.0
|
||
|
|
broker = SimulatedBroker(
|
||
|
|
initial_capital=100_000.0, slippage_pct=0.0, commission_per_trade=commission
|
||
|
|
)
|
||
|
|
broker.set_current_prices({"TSLA": 200.0})
|
||
|
|
|
||
|
|
# Buy 10 shares: cost = 200*10 + 5 commission = 2005
|
||
|
|
await broker.submit_order(
|
||
|
|
OrderRequest(ticker="TSLA", side=OrderSide.BUY, qty=10)
|
||
|
|
)
|
||
|
|
|
||
|
|
expected_cash = 100_000.0 - (200.0 * 10) - commission
|
||
|
|
assert broker.cash == pytest.approx(expected_cash)
|
||
|
|
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
async def test_simulated_broker_account_equity():
|
||
|
|
"""Equity = cash + sum(position market values)."""
|
||
|
|
broker = SimulatedBroker(initial_capital=50_000.0, slippage_pct=0.0)
|
||
|
|
broker.set_current_prices({"GOOG": 100.0})
|
||
|
|
|
||
|
|
await broker.submit_order(
|
||
|
|
OrderRequest(ticker="GOOG", side=OrderSide.BUY, qty=100)
|
||
|
|
)
|
||
|
|
# cash = 50k - 100*100 = 40k, position value = 100*100 = 10k
|
||
|
|
account = await broker.get_account()
|
||
|
|
assert account.equity == pytest.approx(50_000.0)
|
||
|
|
|
||
|
|
# Price moves up to 110 -> position value = 11k
|
||
|
|
broker.set_current_prices({"GOOG": 110.0})
|
||
|
|
account = await broker.get_account()
|
||
|
|
assert account.equity == pytest.approx(40_000.0 + 110.0 * 100)
|
||
|
|
|
||
|
|
|
||
|
|
# ======================================================================
|
||
|
|
# BacktestDataLoader tests
|
||
|
|
# ======================================================================
|
||
|
|
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
async def test_data_loader_chronological_order():
|
||
|
|
"""Bars should be yielded in ascending timestamp order even if input is shuffled."""
|
||
|
|
bars = [
|
||
|
|
_make_bar(3, "AAPL", 103.0),
|
||
|
|
_make_bar(1, "AAPL", 101.0),
|
||
|
|
_make_bar(2, "AAPL", 102.0),
|
||
|
|
]
|
||
|
|
loader = BacktestDataLoader(bars=bars)
|
||
|
|
|
||
|
|
timestamps: list[datetime] = []
|
||
|
|
async for ts, _ticker, _bar, _sent in loader.iterate():
|
||
|
|
timestamps.append(ts)
|
||
|
|
|
||
|
|
assert timestamps == sorted(timestamps)
|
||
|
|
assert len(timestamps) == 3
|
||
|
|
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
async def test_data_loader_with_sentiment():
|
||
|
|
"""Sentiment context should aggregate records up to the current bar's timestamp."""
|
||
|
|
bars = [
|
||
|
|
_make_bar(2, "AAPL", 150.0),
|
||
|
|
_make_bar(4, "AAPL", 155.0),
|
||
|
|
]
|
||
|
|
sentiments = [
|
||
|
|
{"timestamp": _ts(1), "ticker": "AAPL", "score": 0.5, "confidence": 0.8},
|
||
|
|
{"timestamp": _ts(3), "ticker": "AAPL", "score": 0.9, "confidence": 0.9},
|
||
|
|
{"timestamp": _ts(5), "ticker": "AAPL", "score": -0.3, "confidence": 0.7}, # future
|
||
|
|
]
|
||
|
|
loader = BacktestDataLoader(bars=bars, sentiments=sentiments)
|
||
|
|
|
||
|
|
results: list = []
|
||
|
|
async for ts, ticker, bar, sentiment in loader.iterate():
|
||
|
|
results.append((ts, sentiment))
|
||
|
|
|
||
|
|
# At day 2: only the day-1 sentiment should be included
|
||
|
|
assert results[0][1] is not None
|
||
|
|
assert results[0][1].article_count == 1
|
||
|
|
assert results[0][1].avg_score == pytest.approx(0.5)
|
||
|
|
|
||
|
|
# At day 4: day-1 and day-3 sentiments should be included
|
||
|
|
assert results[1][1] is not None
|
||
|
|
assert results[1][1].article_count == 2
|
||
|
|
assert results[1][1].avg_score == pytest.approx(0.7) # (0.5 + 0.9) / 2
|
||
|
|
|
||
|
|
|
||
|
|
# ======================================================================
|
||
|
|
# Metrics tests
|
||
|
|
# ======================================================================
|
||
|
|
|
||
|
|
|
||
|
|
def test_metrics_total_return():
|
||
|
|
"""Total return should be (final - initial) / initial * 100."""
|
||
|
|
curve = [(_ts(1), 100_000.0), (_ts(10), 110_000.0)]
|
||
|
|
result = compute_metrics([], curve, initial_capital=100_000.0)
|
||
|
|
assert result.total_return == pytest.approx(10.0)
|
||
|
|
|
||
|
|
|
||
|
|
def test_metrics_sharpe_ratio():
|
||
|
|
"""Test Sharpe with known daily returns.
|
||
|
|
|
||
|
|
With constant daily returns, std = 0 and Sharpe is 0.
|
||
|
|
With varied returns, we can compute the expected value.
|
||
|
|
"""
|
||
|
|
# Daily returns: +1%, -0.5%, +1%, -0.5% (2 up, 2 down)
|
||
|
|
equity = 100_000.0
|
||
|
|
daily_rets = [0.01, -0.005, 0.01, -0.005]
|
||
|
|
curve = [(_ts(1), equity)]
|
||
|
|
for i, r in enumerate(daily_rets):
|
||
|
|
equity *= (1 + r)
|
||
|
|
curve.append((_ts(2 + i), equity))
|
||
|
|
|
||
|
|
result = compute_metrics([], curve, initial_capital=100_000.0)
|
||
|
|
|
||
|
|
# Manually compute expected Sharpe
|
||
|
|
mean_ret = sum(daily_rets) / len(daily_rets)
|
||
|
|
variance = sum((r - mean_ret) ** 2 for r in daily_rets) / (len(daily_rets) - 1)
|
||
|
|
std_ret = math.sqrt(variance)
|
||
|
|
expected_sharpe = (mean_ret / std_ret) * math.sqrt(252)
|
||
|
|
|
||
|
|
assert result.sharpe_ratio == pytest.approx(expected_sharpe, rel=0.01)
|
||
|
|
|
||
|
|
|
||
|
|
def test_metrics_max_drawdown():
|
||
|
|
"""Max drawdown should capture the largest peak-to-trough decline."""
|
||
|
|
curve = [
|
||
|
|
(_ts(1), 100_000.0),
|
||
|
|
(_ts(2), 110_000.0), # new peak
|
||
|
|
(_ts(3), 90_000.0), # trough: dd = (110k - 90k)/110k = 18.18%
|
||
|
|
(_ts(4), 105_000.0), # partial recovery
|
||
|
|
]
|
||
|
|
result = compute_metrics([], curve, initial_capital=100_000.0)
|
||
|
|
expected_dd = (110_000 - 90_000) / 110_000 * 100.0
|
||
|
|
assert result.max_drawdown_pct == pytest.approx(expected_dd, rel=0.01)
|
||
|
|
|
||
|
|
|
||
|
|
def test_metrics_win_rate():
|
||
|
|
"""Win rate = winning_trades / total_trades * 100."""
|
||
|
|
now = _ts(1)
|
||
|
|
later = _ts(5)
|
||
|
|
|
||
|
|
trades = [
|
||
|
|
# Round trip 1: buy 100 @ $10, sell 100 @ $12 -> profit
|
||
|
|
TradeExecution(
|
||
|
|
trade_id="aaaa1111-1111-1111-1111-111111111111",
|
||
|
|
ticker="AAPL", side=OrderSide.BUY, qty=100, price=10.0,
|
||
|
|
status=OrderStatus.FILLED, timestamp=now,
|
||
|
|
),
|
||
|
|
TradeExecution(
|
||
|
|
trade_id="aaaa2222-2222-2222-2222-222222222222",
|
||
|
|
ticker="AAPL", side=OrderSide.SELL, qty=100, price=12.0,
|
||
|
|
status=OrderStatus.FILLED, timestamp=later,
|
||
|
|
),
|
||
|
|
# Round trip 2: buy 50 @ $20, sell 50 @ $18 -> loss
|
||
|
|
TradeExecution(
|
||
|
|
trade_id="bbbb1111-1111-1111-1111-111111111111",
|
||
|
|
ticker="TSLA", side=OrderSide.BUY, qty=50, price=20.0,
|
||
|
|
status=OrderStatus.FILLED, timestamp=now,
|
||
|
|
),
|
||
|
|
TradeExecution(
|
||
|
|
trade_id="bbbb2222-2222-2222-2222-222222222222",
|
||
|
|
ticker="TSLA", side=OrderSide.SELL, qty=50, price=18.0,
|
||
|
|
status=OrderStatus.FILLED, timestamp=later,
|
||
|
|
),
|
||
|
|
]
|
||
|
|
curve = [(_ts(1), 100_000.0), (_ts(5), 100_100.0)]
|
||
|
|
result = compute_metrics(trades, curve, initial_capital=100_000.0)
|
||
|
|
assert result.win_rate == pytest.approx(50.0)
|
||
|
|
assert result.trade_count == 2
|
||
|
|
|
||
|
|
|
||
|
|
# ======================================================================
|
||
|
|
# BacktestEngine tests
|
||
|
|
# ======================================================================
|
||
|
|
|
||
|
|
|
||
|
|
class _AlwaysBuyStrategy:
|
||
|
|
"""Trivial strategy that always emits a LONG signal."""
|
||
|
|
|
||
|
|
name = "always_buy"
|
||
|
|
|
||
|
|
async def evaluate(self, ticker, market, sentiment=None):
|
||
|
|
from shared.schemas.trading import SignalDirection, TradeSignal
|
||
|
|
|
||
|
|
return TradeSignal(
|
||
|
|
ticker=ticker,
|
||
|
|
direction=SignalDirection.LONG,
|
||
|
|
strength=0.8,
|
||
|
|
strategy_sources=[self.name],
|
||
|
|
timestamp=market.bars[-1]["timestamp"] if market.bars else datetime.now(tz=timezone.utc),
|
||
|
|
)
|
||
|
|
|
||
|
|
|
||
|
|
class _AlwaysSellStrategy:
|
||
|
|
"""Trivial strategy that always emits a SHORT signal."""
|
||
|
|
|
||
|
|
name = "always_sell"
|
||
|
|
|
||
|
|
async def evaluate(self, ticker, market, sentiment=None):
|
||
|
|
from shared.schemas.trading import SignalDirection, TradeSignal
|
||
|
|
|
||
|
|
return TradeSignal(
|
||
|
|
ticker=ticker,
|
||
|
|
direction=SignalDirection.SHORT,
|
||
|
|
strength=0.8,
|
||
|
|
strategy_sources=[self.name],
|
||
|
|
timestamp=market.bars[-1]["timestamp"] if market.bars else datetime.now(tz=timezone.utc),
|
||
|
|
)
|
||
|
|
|
||
|
|
|
||
|
|
class _BuyThenSellStrategy:
|
||
|
|
"""Strategy that buys on bar 1 and sells on bar 3."""
|
||
|
|
|
||
|
|
name = "buy_then_sell"
|
||
|
|
|
||
|
|
def __init__(self):
|
||
|
|
self._call_count: dict[str, int] = {}
|
||
|
|
|
||
|
|
async def evaluate(self, ticker, market, sentiment=None):
|
||
|
|
from shared.schemas.trading import SignalDirection, TradeSignal
|
||
|
|
|
||
|
|
self._call_count[ticker] = self._call_count.get(ticker, 0) + 1
|
||
|
|
count = self._call_count[ticker]
|
||
|
|
|
||
|
|
if count == 1:
|
||
|
|
return TradeSignal(
|
||
|
|
ticker=ticker,
|
||
|
|
direction=SignalDirection.LONG,
|
||
|
|
strength=0.8,
|
||
|
|
strategy_sources=[self.name],
|
||
|
|
timestamp=datetime.now(tz=timezone.utc),
|
||
|
|
)
|
||
|
|
elif count == 3:
|
||
|
|
return TradeSignal(
|
||
|
|
ticker=ticker,
|
||
|
|
direction=SignalDirection.SHORT,
|
||
|
|
strength=0.8,
|
||
|
|
strategy_sources=[self.name],
|
||
|
|
timestamp=datetime.now(tz=timezone.utc),
|
||
|
|
)
|
||
|
|
return None
|
||
|
|
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
async def test_engine_runs_full_backtest():
|
||
|
|
"""Run a simple 3-bar scenario: buy on bar 1, sell on bar 3."""
|
||
|
|
config = BacktestConfig(
|
||
|
|
start_date=_ts(1),
|
||
|
|
end_date=_ts(3),
|
||
|
|
initial_capital=100_000.0,
|
||
|
|
slippage_pct=0.0,
|
||
|
|
commission_per_trade=0.0,
|
||
|
|
signal_threshold=0.0,
|
||
|
|
max_position_pct=0.05,
|
||
|
|
)
|
||
|
|
|
||
|
|
strategy = _BuyThenSellStrategy()
|
||
|
|
engine = BacktestEngine(config=config, strategies=[strategy])
|
||
|
|
|
||
|
|
bars = [
|
||
|
|
_make_bar(1, "AAPL", 100.0),
|
||
|
|
_make_bar(2, "AAPL", 110.0),
|
||
|
|
_make_bar(3, "AAPL", 120.0),
|
||
|
|
]
|
||
|
|
loader = BacktestDataLoader(bars=bars)
|
||
|
|
|
||
|
|
result = await engine.run(loader)
|
||
|
|
|
||
|
|
# Should have at least 2 trades (1 buy + 1 sell)
|
||
|
|
assert result.trade_count >= 1
|
||
|
|
assert len(result.equity_curve) > 0
|
||
|
|
# Price went up 20%, so total return should be positive
|
||
|
|
assert result.total_return >= 0
|
||
|
|
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
async def test_engine_closes_positions_at_end():
|
||
|
|
"""Any open positions should be closed at the final bar prices."""
|
||
|
|
config = BacktestConfig(
|
||
|
|
start_date=_ts(1),
|
||
|
|
end_date=_ts(3),
|
||
|
|
initial_capital=100_000.0,
|
||
|
|
slippage_pct=0.0,
|
||
|
|
commission_per_trade=0.0,
|
||
|
|
signal_threshold=0.0,
|
||
|
|
max_position_pct=0.10,
|
||
|
|
)
|
||
|
|
|
||
|
|
# This strategy only buys, never sells
|
||
|
|
strategy = _AlwaysBuyStrategy()
|
||
|
|
engine = BacktestEngine(config=config, strategies=[strategy])
|
||
|
|
|
||
|
|
bars = [
|
||
|
|
_make_bar(1, "AAPL", 100.0),
|
||
|
|
_make_bar(2, "AAPL", 105.0),
|
||
|
|
_make_bar(3, "AAPL", 110.0),
|
||
|
|
]
|
||
|
|
loader = BacktestDataLoader(bars=bars)
|
||
|
|
|
||
|
|
result = await engine.run(loader)
|
||
|
|
|
||
|
|
# The engine should have closed the position at the end.
|
||
|
|
# The trade log should contain at least a buy and a sell.
|
||
|
|
buys = [t for t in result.trade_log if t.side == OrderSide.BUY]
|
||
|
|
sells = [t for t in result.trade_log if t.side == OrderSide.SELL]
|
||
|
|
assert len(buys) >= 1
|
||
|
|
assert len(sells) >= 1 # auto-closed at end
|