Source code for ergodic_insurance.validation_metrics
"""Validation metrics for walk-forward analysis and strategy backtesting.
This module provides performance metrics and comparison tools for evaluating
insurance strategies across training and testing periods in walk-forward validation.
Example:
>>> from validation_metrics import ValidationMetrics, MetricCalculator
>>> import numpy as np
>>> # Calculate metrics for a strategy's performance
>>> returns = np.random.normal(0.08, 0.02, 1000)
>>> losses = np.random.exponential(100000, 1000)
>>>
>>> calculator = MetricCalculator()
>>> metrics = calculator.calculate_metrics(
... returns=returns,
... losses=losses,
... final_assets=10000000
... )
>>>
>>> print(f"ROE: {metrics.roe:.2%}")
>>> print(f"Sharpe Ratio: {metrics.sharpe_ratio:.2f}")
"""
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional, Tuple
import numpy as np
import pandas as pd
from scipy import stats
[docs]
@dataclass
class ValidationMetrics:
"""Container for validation performance metrics.
Attributes:
roe: Return on equity (annualized)
ruin_probability: Probability of insolvency
growth_rate: Compound annual growth rate
volatility: Standard deviation of returns
sharpe_ratio: Risk-adjusted return metric
max_drawdown: Maximum peak-to-trough decline
var_95: Value at Risk at 95% confidence
cvar_95: Conditional Value at Risk at 95% confidence
win_rate: Percentage of profitable periods
profit_factor: Ratio of gross profits to gross losses
recovery_time: Average time to recover from drawdown
stability: R-squared of equity curve
"""
roe: float
ruin_probability: float
growth_rate: float
volatility: float
sharpe_ratio: float = 0.0
max_drawdown: float = 0.0
var_95: float = 0.0
cvar_95: float = 0.0
win_rate: float = 0.0
profit_factor: float = 0.0
recovery_time: float = 0.0
stability: float = 0.0
[docs]
def to_dict(self) -> Dict[str, float]:
"""Convert metrics to dictionary.
Returns:
Dictionary of metric values.
"""
return {
"roe": self.roe,
"ruin_probability": self.ruin_probability,
"growth_rate": self.growth_rate,
"volatility": self.volatility,
"sharpe_ratio": self.sharpe_ratio,
"max_drawdown": self.max_drawdown,
"var_95": self.var_95,
"cvar_95": self.cvar_95,
"win_rate": self.win_rate,
"profit_factor": self.profit_factor,
"recovery_time": self.recovery_time,
"stability": self.stability,
}
[docs]
def compare(self, other: "ValidationMetrics") -> Dict[str, float]:
"""Compare metrics with another set.
Args:
other: Metrics to compare against.
Returns:
Dictionary of percentage differences.
"""
comparisons = {}
for key, value in self.to_dict().items():
other_value = getattr(other, key)
if other_value != 0:
comparisons[f"{key}_diff"] = (value - other_value) / abs(other_value)
else:
comparisons[f"{key}_diff"] = 0.0 if value == 0 else float("inf")
return comparisons
[docs]
@dataclass
class StrategyPerformance:
"""Performance tracking for a single strategy.
Attributes:
strategy_name: Name of the strategy
in_sample_metrics: Metrics from training period
out_sample_metrics: Metrics from testing period
degradation: Performance degradation from in-sample to out-sample
overfitting_score: Degree of overfitting (0 = none, 1 = severe)
consistency_score: Consistency across multiple windows
metadata: Additional strategy-specific data
"""
strategy_name: str
in_sample_metrics: Optional[ValidationMetrics] = None
out_sample_metrics: Optional[ValidationMetrics] = None
degradation: Dict[str, float] = field(default_factory=dict)
overfitting_score: float = 0.0
consistency_score: float = 0.0
metadata: Dict[str, Any] = field(default_factory=dict)
[docs]
def calculate_degradation(self):
"""Calculate performance degradation from in-sample to out-of-sample."""
if self.in_sample_metrics and self.out_sample_metrics:
self.degradation = self.out_sample_metrics.compare(self.in_sample_metrics)
# Calculate overfitting score based on key metrics
key_metrics = ["roe", "sharpe_ratio", "growth_rate"]
degradations = [abs(self.degradation.get(f"{m}_diff", 0)) for m in key_metrics]
self.overfitting_score = float(np.mean(degradations))
[docs]
def to_dataframe(self) -> pd.DataFrame:
"""Convert performance to DataFrame for reporting.
Returns:
DataFrame with performance metrics.
"""
data = []
if self.in_sample_metrics:
row: Dict[str, Any] = {"period": "in_sample", "strategy": self.strategy_name}
metrics_dict = self.in_sample_metrics.to_dict()
row.update({k: str(v) if isinstance(v, float) else v for k, v in metrics_dict.items()})
data.append(row)
if self.out_sample_metrics:
row2: Dict[str, Any] = {"period": "out_sample", "strategy": self.strategy_name}
metrics_dict = self.out_sample_metrics.to_dict()
row2.update({k: str(v) if isinstance(v, float) else v for k, v in metrics_dict.items()})
data.append(row2)
return pd.DataFrame(data) if data else pd.DataFrame()
[docs]
class MetricCalculator:
"""Calculator for performance metrics from simulation results."""
def __init__(self, risk_free_rate: float = 0.02):
"""Initialize metric calculator.
Args:
risk_free_rate: Risk-free rate for Sharpe ratio calculation.
"""
self.risk_free_rate = risk_free_rate
[docs]
def calculate_metrics( # pylint: disable=too-many-locals
self,
returns: np.ndarray,
losses: Optional[np.ndarray] = None,
final_assets: Optional[np.ndarray] = None,
initial_assets: float = 10000000,
n_years: Optional[int] = None,
) -> ValidationMetrics:
"""Calculate comprehensive performance metrics.
Args:
returns: Array of period returns
losses: Array of loss amounts (optional)
final_assets: Array of final asset values (optional)
initial_assets: Initial asset value
n_years: Number of years for annualization
Returns:
ValidationMetrics object with calculated metrics.
"""
# Basic return metrics
roe = float(np.mean(returns))
volatility = float(np.std(returns))
# Growth rate
if final_assets is not None and len(final_assets) > 0:
if n_years:
growth_rate = float(np.mean((final_assets / initial_assets) ** (1 / n_years) - 1))
else:
growth_rate = float(np.mean(final_assets / initial_assets - 1))
else:
growth_rate = roe
# Risk metrics
sharpe_ratio = (roe - self.risk_free_rate) / volatility if volatility > 0 else 0.0
# Drawdown analysis
if len(returns) > 1:
cumulative = np.cumprod(1 + returns)
running_max = np.maximum.accumulate(cumulative)
drawdown = (cumulative - running_max) / running_max
max_drawdown = float(abs(np.min(drawdown)))
else:
max_drawdown = 0.0
# Value at Risk
var_95 = float(np.percentile(returns, 5))
cvar_95 = (
float(np.mean(returns[returns <= var_95]))
if len(returns[returns <= var_95]) > 0
else var_95
)
# Win rate and profit factor
positive_returns = returns[returns > 0]
negative_returns = returns[returns < 0]
win_rate = len(positive_returns) / len(returns) if len(returns) > 0 else 0.0
if len(negative_returns) > 0:
profit_factor = abs(np.sum(positive_returns) / np.sum(negative_returns))
else:
profit_factor = float("inf") if len(positive_returns) > 0 else 1.0
# Ruin probability
if final_assets is not None and len(final_assets) > 0:
ruin_probability = float(np.mean(final_assets <= 0))
else:
ruin_probability = 0.0
# Stability (R-squared of cumulative returns)
if len(returns) > 2:
cumulative = np.cumprod(1 + returns)
x = np.arange(len(cumulative))
_slope, _intercept, r_value, _, _ = stats.linregress(x, np.log(cumulative))
stability = r_value**2
else:
stability = 0.0
return ValidationMetrics(
roe=roe,
ruin_probability=ruin_probability,
growth_rate=growth_rate,
volatility=volatility,
sharpe_ratio=sharpe_ratio,
max_drawdown=max_drawdown,
var_95=var_95,
cvar_95=cvar_95,
win_rate=win_rate,
profit_factor=profit_factor,
recovery_time=0.0, # Would require more detailed drawdown analysis
stability=stability,
)
[docs]
def calculate_rolling_metrics(
self, returns: np.ndarray, window_size: int = 252
) -> pd.DataFrame:
"""Calculate rolling window metrics.
Args:
returns: Array of returns
window_size: Size of rolling window
Returns:
DataFrame with rolling metrics.
"""
n_windows = len(returns) - window_size + 1
metrics_list = []
for i in range(n_windows):
window_returns = returns[i : i + window_size]
metrics = self.calculate_metrics(window_returns)
metrics_dict = metrics.to_dict()
metrics_dict["window_start"] = i
metrics_dict["window_end"] = i + window_size
metrics_list.append(metrics_dict)
return pd.DataFrame(metrics_list)
[docs]
class PerformanceTargets:
"""User-defined performance targets for strategy evaluation.
Attributes:
min_roe: Minimum acceptable ROE
max_ruin_probability: Maximum acceptable ruin probability
min_sharpe_ratio: Minimum acceptable Sharpe ratio
max_drawdown: Maximum acceptable drawdown
min_growth_rate: Minimum acceptable growth rate
"""
def __init__(
self,
min_roe: Optional[float] = None,
max_ruin_probability: Optional[float] = None,
min_sharpe_ratio: Optional[float] = None,
max_drawdown: Optional[float] = None,
min_growth_rate: Optional[float] = None,
):
"""Initialize performance targets.
Args:
min_roe: Minimum ROE target
max_ruin_probability: Maximum ruin probability target
min_sharpe_ratio: Minimum Sharpe ratio target
max_drawdown: Maximum drawdown target
min_growth_rate: Minimum growth rate target
"""
self.min_roe = min_roe
self.max_ruin_probability = max_ruin_probability
self.min_sharpe_ratio = min_sharpe_ratio
self.max_drawdown = max_drawdown
self.min_growth_rate = min_growth_rate
[docs]
def evaluate(self, metrics: ValidationMetrics) -> Tuple[bool, List[str]]:
"""Evaluate metrics against targets.
Args:
metrics: Metrics to evaluate
Returns:
Tuple of (meets_all_targets, list_of_failures)
"""
failures = []
if self.min_roe is not None and metrics.roe < self.min_roe:
failures.append(f"ROE {metrics.roe:.2%} < target {self.min_roe:.2%}")
if (
self.max_ruin_probability is not None
and metrics.ruin_probability > self.max_ruin_probability
):
failures.append(
f"Ruin probability {metrics.ruin_probability:.2%} > target {self.max_ruin_probability:.2%}"
)
if self.min_sharpe_ratio is not None and metrics.sharpe_ratio < self.min_sharpe_ratio:
failures.append(
f"Sharpe ratio {metrics.sharpe_ratio:.2f} < target {self.min_sharpe_ratio:.2f}"
)
if self.max_drawdown is not None and metrics.max_drawdown > self.max_drawdown:
failures.append(
f"Max drawdown {metrics.max_drawdown:.2%} > target {self.max_drawdown:.2%}"
)
if self.min_growth_rate is not None and metrics.growth_rate < self.min_growth_rate:
failures.append(
f"Growth rate {metrics.growth_rate:.2%} < target {self.min_growth_rate:.2%}"
)
return len(failures) == 0, failures