trading/services/learning_engine/evaluator.py

"""Trade evaluator -- computes outcomes and attributes credit to strategies.

Given a closed trade (exit), this module computes realized P&L, ROI, and
distributes reward signals to each contributing strategy proportionally
to its signal strength.
"""

from __future__ import annotations

import logging
from uuid import UUID

from shared.schemas.learning import TradeOutcomeSchema

logger = logging.getLogger(__name__)


class TradeEvaluator:
    """Evaluates closed trades and attributes credit to strategies."""

    def evaluate_trade(
        self,
        trade_id: UUID,
        entry_price: float,
        exit_price: float,
        qty: float,
        direction_sign: float,
        hold_duration_seconds: float,
    ) -> TradeOutcomeSchema:
        """Compute the outcome of a closed trade.

        Parameters
        ----------
        trade_id:
            Unique identifier of the closing trade.
        entry_price:
            The price at which the position was opened.
        exit_price:
            The price at which the position was closed.
        qty:
            Number of shares traded.
        direction_sign:
            +1.0 for long positions, -1.0 for short positions.
        hold_duration_seconds:
            How long the position was held, in seconds.

        Returns
        -------
        TradeOutcomeSchema
            The evaluated outcome including realized P&L and ROI.
        """
        realized_pnl = (exit_price - entry_price) * qty * direction_sign
        cost_basis = entry_price * qty
        roi_pct = (realized_pnl / cost_basis * 100.0) if cost_basis != 0 else 0.0
        was_profitable = realized_pnl > 0

        return TradeOutcomeSchema(
            trade_id=trade_id,
            hold_duration_seconds=hold_duration_seconds,
            realized_pnl=realized_pnl,
            roi_pct=roi_pct,
            was_profitable=was_profitable,
        )

    def attribute_credit(
        self,
        outcome: TradeOutcomeSchema,
        strategy_sources: list[str],
    ) -> dict[str, float]:
        """Distribute reward signal to contributing strategies.

        Parses ``strategy_sources`` entries which may be formatted as either:
        - ``"name:DIRECTION:strength"`` (full format from the ensemble)
        - ``"name"`` (bare strategy name -- defaults to strength 1.0)

        The reward signal is the trade's ROI percentage distributed
        proportionally to each strategy's signal strength.

        Parameters
        ----------
        outcome:
            The evaluated trade outcome.
        strategy_sources:
            List of strategy source strings from the signal.

        Returns
        -------
        dict[str, float]
            Mapping of strategy name to its reward signal.
        """
        if not strategy_sources:
            return {}

        # Parse strengths from strategy_sources
        parsed: list[tuple[str, float]] = []
        for source in strategy_sources:
            parts = source.split(":")
            name = parts[0]
            if len(parts) >= 3:
                try:
                    strength = float(parts[2])
                except (ValueError, IndexError):
                    strength = 1.0
            else:
                strength = 1.0
            parsed.append((name, strength))

        # Compute total strength for proportional distribution
        total_strength = sum(s for _, s in parsed)
        if total_strength == 0:
            return {}

        # Distribute reward proportionally
        rewards: dict[str, float] = {}
        for name, strength in parsed:
            proportion = strength / total_strength
            reward_signal = outcome.roi_pct * proportion
            rewards[name] = reward_signal

        return rewards