trading/tests/backtester/test_kevin_backtest.py
Viktor Barzin b82014995c
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
feat(kevin-strategy): integrate expected_move into trading decision
The v2 prompt produces expected_move for every ticker mention. This
commit makes KevinStrategy.evaluate_mention USE it as a hard signal
rather than just a display field.

Three new rules, all guarded by KevinStrategyConfig knobs so the
behaviour can be turned off if it over-filters:

1) SELL + non-bearish expected_move => NO_OP (require_forward_for_
   bearish, default True). This is THE anti-capitulation rule —
   Kevin saying "I sold" without articulating where the stock goes
   next becomes NO_OP. Reactive sells stop translating into
   trades.

2) AVOID + bullish expected_move => NO_OP (don't close, don't
   blocklist). Same idea — if the LLM's forward call contradicts the
   avoid action, treat as inconsistent and skip.

3) BUY + bearish/sideways expected_move => NO_OP (schema veto).
   Catches LLM inconsistency.

4) BUY + unknown expected_move => bump min_conviction floor by
   unknown_conviction_bonus (default +0.05). Forces stronger
   conviction when there's no forward direction.

Tests: 6 new (one per rule above), 22 regression — total 28 GREEN.
Backtest stub _mention factory now defaults expected_move from
action (buy/sell/avoid maps) so existing backtest scenarios stay
green; the test_backtest_sell_mid_position_closes_early case was
the only one that needed the fix.

Side note: strategy is backward-compatible. If a mention has no
expected_move attribute (e.g. v1 stub from older code), it defaults
to UNKNOWN and the legacy code paths still work — just with the
stricter conviction floor on buys.
2026-05-28 22:45:24 +00:00

196 lines
6.3 KiB
Python

"""Tests for the mention-driven Kevin backtest mini-engine."""
from datetime import datetime, timedelta, timezone
from decimal import Decimal
import pandas as pd
import pytest
from backtester.kevin_backtest import (
KevinBacktestParams,
KevinBacktestRunner,
)
from backtester.metrics import BacktestResult
from shared.strategies.kevin import KevinStrategy, KevinStrategyConfig
class _StubPriceLoader:
"""In-memory bars; behaves like the real KevinPriceLoader."""
def __init__(self, bars_by_symbol: dict[str, pd.DataFrame]):
self.bars = bars_by_symbol
self.spy = bars_by_symbol.get("SPY")
async def daily_bars(self, symbol, start, end):
return self.bars.get(symbol, pd.DataFrame())
async def is_tradable(self, symbol):
return symbol in self.bars
async def benchmark_bars(self, start, end):
return self.spy if self.spy is not None else pd.DataFrame()
def _mention(symbol, action, conviction, horizon, days_ago, expected_move=None):
# Default expected_move based on action so backtests don't trip the
# v2 forward-direction veto on sells.
if expected_move is None:
expected_move = {
"buy": "up_mild",
"sell": "down_mild",
"avoid": "down_mild",
}.get(action, "unknown")
return type(
"M",
(),
{
"id": days_ago,
"symbol": symbol,
"action": type("A", (), {"value": action})(),
"conviction": Decimal(conviction),
"time_horizon": type("H", (), {"value": horizon})(),
"expected_move": type("E", (), {"value": expected_move})(),
"created_at": datetime(2026, 5, 15, 14, 0, tzinfo=timezone.utc)
+ timedelta(days=days_ago),
},
)
def _bars(symbol, start_date, prices):
"""Build a daily-bar DataFrame indexed by date."""
dates = pd.date_range(start_date, periods=len(prices), freq="B", tz="UTC")
return pd.DataFrame(
{
"open": prices,
"high": prices,
"low": prices,
"close": prices,
},
index=dates,
)
@pytest.fixture
def cfg() -> KevinStrategyConfig:
return KevinStrategyConfig(
min_conviction=Decimal("0.6"),
max_mention_age_hours=48 * 365, # effectively no age filter for backtest
base_position_pct=Decimal("0.04"),
min_trade_usd=Decimal("500"),
max_trade_usd=Decimal("5000"),
max_per_ticker_usd=Decimal("7500"),
hold_days_by_horizon={
"days": 3,
"weeks": 5,
"months": 10,
"long_term": 15,
"unspecified": 5,
},
avoid_closes_longs=True,
avoid_blocks_days=7,
)
async def test_backtest_emits_winning_trade(cfg):
# NVDA: enters at $100 day 0, exits at $110 day 5 = +10%
bars = {
"NVDA": _bars("NVDA", "2026-05-15", [100, 102, 104, 106, 108, 110, 112]),
"SPY": _bars("SPY", "2026-05-15", [500, 501, 502, 503, 504, 505, 506]),
}
strategy = KevinStrategy(cfg)
mentions = [_mention("NVDA", "buy", "0.8", "weeks", 0)]
runner = KevinBacktestRunner(strategy, _StubPriceLoader(bars))
result = await runner.run(
mentions,
KevinBacktestParams(
initial_capital=Decimal("100000"),
slippage_pct=Decimal("0.0005"),
),
)
assert isinstance(result, BacktestResult)
assert result.trade_count == 1
assert result.total_return_pct > 0
# exit was triggered by holding period (5 trading days)
async def test_backtest_filters_low_conviction(cfg):
bars = {
"NVDA": _bars("NVDA", "2026-05-15", [100, 105, 110, 115, 120, 125]),
"SPY": _bars("SPY", "2026-05-15", [500] * 6),
}
strategy = KevinStrategy(cfg)
mentions = [_mention("NVDA", "buy", "0.5", "weeks", 0)] # below 0.6 floor
runner = KevinBacktestRunner(strategy, _StubPriceLoader(bars))
result = await runner.run(mentions, KevinBacktestParams())
assert result.trade_count == 0
async def test_backtest_dedupe_roll_extends_exit(cfg):
# Two BUYs on same ticker within hold window; exit should extend
bars = {
"NVDA": _bars("NVDA", "2026-05-15", [100] * 20),
"SPY": _bars("SPY", "2026-05-15", [500] * 20),
}
strategy = KevinStrategy(cfg)
mentions = [
_mention("NVDA", "buy", "0.7", "weeks", 0),
_mention("NVDA", "buy", "0.7", "weeks", 3),
]
runner = KevinBacktestRunner(strategy, _StubPriceLoader(bars))
result = await runner.run(
mentions,
KevinBacktestParams(dedupe_policy="roll"),
)
# Exit at day 3 + 5 = 8, not day 0 + 5 = 5
assert result.trade_count == 1
closed = result.trades[0]
assert closed["holding_days_actual"] >= 5
async def test_backtest_sell_mid_position_closes_early(cfg):
bars = {
"NVDA": _bars("NVDA", "2026-05-15", [100, 105, 110, 95, 90, 85, 80]),
"SPY": _bars("SPY", "2026-05-15", [500] * 7),
}
strategy = KevinStrategy(cfg)
mentions = [
_mention("NVDA", "buy", "0.8", "weeks", 0),
_mention("NVDA", "sell", "0.9", "days", 2),
]
runner = KevinBacktestRunner(strategy, _StubPriceLoader(bars))
result = await runner.run(mentions, KevinBacktestParams())
assert result.trade_count == 1
assert result.trades[0]["holding_days_actual"] <= 5
async def test_backtest_handles_missing_bars(cfg):
bars = {
"SPY": _bars("SPY", "2026-05-15", [500] * 5),
# NVDA missing
}
strategy = KevinStrategy(cfg)
mentions = [_mention("NVDA", "buy", "0.8", "weeks", 0)]
runner = KevinBacktestRunner(strategy, _StubPriceLoader(bars))
result = await runner.run(mentions, KevinBacktestParams())
# Mention skipped (no price data); no trade
assert result.trade_count == 0
async def test_backtest_computes_alpha_vs_spy(cfg):
# NVDA +10%, SPY flat -> positive alpha
bars = {
"NVDA": _bars("NVDA", "2026-05-15", [100, 100, 100, 100, 100, 110, 110]),
"SPY": _bars("SPY", "2026-05-15", [500] * 7),
}
strategy = KevinStrategy(cfg)
mentions = [_mention("NVDA", "buy", "0.8", "weeks", 0)]
runner = KevinBacktestRunner(strategy, _StubPriceLoader(bars))
result = await runner.run(
mentions,
KevinBacktestParams(initial_capital=Decimal("100000")),
)
assert result.alpha_vs_spy_pct is not None
assert result.alpha_vs_spy_pct > 0