feat: learning engine — multi-armed bandit strategy weight adjustment

2026-02-22 15:43:11 +00:00 · 2026-02-22 15:43:11 +00:00 · c089bcb92c
commit c089bcb92c
parent 1d9900838d
6 changed files with 1177 additions and 0 deletions
--- a/services/learning_engine/evaluator.py
+++ b/services/learning_engine/evaluator.py
@ -0,0 +1,120 @@
+"""Trade evaluator -- computes outcomes and attributes credit to strategies.
+
+Given a closed trade (exit), this module computes realized P&L, ROI, and
+distributes reward signals to each contributing strategy proportionally
+to its signal strength.
+"""
+
+from __future__ import annotations
+
+import logging
+from uuid import UUID
+
+from shared.schemas.learning import TradeOutcomeSchema
+
+logger = logging.getLogger(__name__)
+
+
+class TradeEvaluator:
+    """Evaluates closed trades and attributes credit to strategies."""
+
+    def evaluate_trade(
+        self,
+        trade_id: UUID,
+        entry_price: float,
+        exit_price: float,
+        qty: float,
+        direction_sign: float,
+        hold_duration_seconds: float,
+    ) -> TradeOutcomeSchema:
+        """Compute the outcome of a closed trade.
+
+        Parameters
+        ----------
+        trade_id:
+            Unique identifier of the closing trade.
+        entry_price:
+            The price at which the position was opened.
+        exit_price:
+            The price at which the position was closed.
+        qty:
+            Number of shares traded.
+        direction_sign:
+            +1.0 for long positions, -1.0 for short positions.
+        hold_duration_seconds:
+            How long the position was held, in seconds.
+
+        Returns
+        -------
+        TradeOutcomeSchema
+            The evaluated outcome including realized P&L and ROI.
+        """
+        realized_pnl = (exit_price - entry_price) * qty * direction_sign
+        cost_basis = entry_price * qty
+        roi_pct = (realized_pnl / cost_basis * 100.0) if cost_basis != 0 else 0.0
+        was_profitable = realized_pnl > 0
+
+        return TradeOutcomeSchema(
+            trade_id=trade_id,
+            hold_duration_seconds=hold_duration_seconds,
+            realized_pnl=realized_pnl,
+            roi_pct=roi_pct,
+            was_profitable=was_profitable,
+        )
+
+    def attribute_credit(
+        self,
+        outcome: TradeOutcomeSchema,
+        strategy_sources: list[str],
+    ) -> dict[str, float]:
+        """Distribute reward signal to contributing strategies.
+
+        Parses ``strategy_sources`` entries which may be formatted as either:
+        - ``"name:DIRECTION:strength"`` (full format from the ensemble)
+        - ``"name"`` (bare strategy name -- defaults to strength 1.0)
+
+        The reward signal is the trade's ROI percentage distributed
+        proportionally to each strategy's signal strength.
+
+        Parameters
+        ----------
+        outcome:
+            The evaluated trade outcome.
+        strategy_sources:
+            List of strategy source strings from the signal.
+
+        Returns
+        -------
+        dict[str, float]
+            Mapping of strategy name to its reward signal.
+        """
+        if not strategy_sources:
+            return {}
+
+        # Parse strengths from strategy_sources
+        parsed: list[tuple[str, float]] = []
+        for source in strategy_sources:
+            parts = source.split(":")
+            name = parts[0]
+            if len(parts) >= 3:
+                try:
+                    strength = float(parts[2])
+                except (ValueError, IndexError):
+                    strength = 1.0
+            else:
+                strength = 1.0
+            parsed.append((name, strength))
+
+        # Compute total strength for proportional distribution
+        total_strength = sum(s for _, s in parsed)
+        if total_strength == 0:
+            return {}
+
+        # Distribute reward proportionally
+        rewards: dict[str, float] = {}
+        for name, strength in parsed:
+            proportion = strength / total_strength
+            reward_signal = outcome.roi_pct * proportion
+            rewards[name] = reward_signal
+
+        return rewards