Source code for ergodic_insurance.accuracy_validator

"""Numerical accuracy validation for Monte Carlo simulations.

This module provides tools to validate the numerical accuracy of optimized
Monte Carlo simulations against reference implementations, ensuring that
performance optimizations don't compromise result quality.

Key features:
    - High-precision reference implementations
    - Statistical validation of distributions
    - Edge case and boundary condition testing
    - Accuracy comparison metrics
    - Detailed validation reports

Example:
    >>> from accuracy_validator import AccuracyValidator
    >>> import numpy as np
    >>>
    >>> validator = AccuracyValidator()
    >>>
    >>> # Compare optimized vs reference implementation
    >>> optimized_results = np.random.normal(0.08, 0.02, 10000)
    >>> reference_results = np.random.normal(0.08, 0.02, 10000)
    >>>
    >>> validation = validator.compare_implementations(
    ...     optimized_results, reference_results
    ... )
    >>> print(f"Accuracy: {validation.accuracy_score:.4f}")

Google-style docstrings are used throughout for Sphinx documentation.
"""

from dataclasses import dataclass, field
from typing import Any, Callable, Dict, List, Optional, Tuple

import numpy as np
from scipy import stats

from ergodic_insurance.summary_statistics import format_quantile_key


[docs] @dataclass class ValidationResult: """Results from accuracy validation.""" accuracy_score: float #: Overall accuracy score (0-1) mean_error: float = 0.0 #: Mean absolute error max_error: float = 0.0 #: Maximum absolute error relative_error: float = 0.0 #: Mean relative error ks_statistic: float = 0.0 #: Kolmogorov-Smirnov test statistic ks_pvalue: float = 0.0 #: Kolmogorov-Smirnov test p-value passed_tests: List[str] = field(default_factory=list) #: List of passed validation tests failed_tests: List[str] = field(default_factory=list) #: List of failed validation tests edge_cases: Dict[str, bool] = field(default_factory=dict) #: Results from edge case testing
[docs] def is_valid(self, tolerance: float = 0.01) -> bool: """Check if validation passes within tolerance. Args: tolerance: Maximum acceptable relative error. Returns: True if validation passes. """ return ( self.relative_error < tolerance and len(self.failed_tests) == 0 and self.accuracy_score > 0.99 )
[docs] def summary(self) -> str: """Generate validation summary. Returns: Formatted summary string. """ summary = f"Accuracy Validation Summary\n{'='*50}\n" summary += f"Accuracy Score: {self.accuracy_score:.4f}\n" summary += f"Mean Error: {self.mean_error:.6f}\n" summary += f"Max Error: {self.max_error:.6f}\n" summary += f"Relative Error: {self.relative_error:.2%}\n" summary += f"KS Test: statistic={self.ks_statistic:.4f}, p-value={self.ks_pvalue:.4f}\n" summary += "\nValidation Tests:\n" summary += f" Passed: {len(self.passed_tests)}\n" summary += f" Failed: {len(self.failed_tests)}\n" if self.failed_tests: summary += "\nFailed Tests:\n" for test in self.failed_tests[:5]: summary += f" - {test}\n" if self.edge_cases: summary += "\nEdge Cases:\n" for case, passed in self.edge_cases.items(): status = "✓" if passed else "✗" summary += f" {status} {case}\n" return summary
[docs] class ReferenceImplementations: """High-precision reference implementations for validation. These implementations prioritize accuracy over speed and serve as the ground truth for validation. """
[docs] @staticmethod def calculate_growth_rate_precise( final_assets: float, initial_assets: float, n_years: float ) -> float: """Calculate growth rate with high precision. Args: final_assets: Final asset value. initial_assets: Initial asset value. n_years: Number of years. Returns: Precise growth rate. """ if final_assets <= 0 or initial_assets <= 0: return -np.inf # Use high precision logarithm (float128 not available on Windows, using float64) ratio = np.float64(final_assets) / np.float64(initial_assets) if ratio <= 0: return -np.inf return float(np.log(ratio) / n_years)
[docs] @staticmethod def apply_insurance_precise( loss: float, attachment: float, limit: float ) -> Tuple[float, float]: """Apply insurance with precise calculations. Args: loss: Loss amount. attachment: Insurance attachment point. limit: Insurance limit. Returns: Tuple of (retained_loss, recovered_amount). """ # Use high precision for edge cases (float128 not available on Windows, using float64) loss = np.float64(loss) attachment = np.float64(attachment) limit = np.float64(limit) if loss <= attachment: return float(loss), 0.0 excess = loss - attachment recovery = min(float(excess), float(limit)) retained = loss - recovery return float(retained), float(recovery)
[docs] @staticmethod def calculate_var_precise(losses: np.ndarray, confidence: float) -> float: """Calculate Value at Risk with high precision. Args: losses: Array of loss amounts. confidence: Confidence level (e.g., 0.95). Returns: VaR at specified confidence level. """ if len(losses) == 0: return 0.0 # Use high precision sorting (float128 not available on Windows, using float64) sorted_losses = np.sort(losses.astype(np.float64)) index = int(np.ceil(confidence * len(sorted_losses))) - 1 index = max(0, min(index, len(sorted_losses) - 1)) return float(sorted_losses[index])
[docs] @staticmethod def calculate_tvar_precise(losses: np.ndarray, confidence: float) -> float: """Calculate Tail Value at Risk with high precision. Args: losses: Array of loss amounts. confidence: Confidence level (e.g., 0.95). Returns: TVaR at specified confidence level. """ if len(losses) == 0: return 0.0 var = ReferenceImplementations.calculate_var_precise(losses, confidence) tail_losses = losses[losses >= var] if len(tail_losses) == 0: return float(var) return float(np.mean(tail_losses.astype(np.float64)))
[docs] @staticmethod def calculate_ruin_probability_precise(paths: np.ndarray, threshold: float = 0.0) -> float: """Calculate ruin probability with high precision. Args: paths: Array of asset paths. threshold: Ruin threshold. Returns: Probability of ruin. """ if len(paths) == 0: return 0.0 # Check each path precisely ruin_count = 0 for path in paths: min_value = np.min(path.astype(np.float64)) if min_value <= threshold: ruin_count += 1 return ruin_count / len(paths)
[docs] class StatisticalValidation: """Statistical tests for distribution validation."""
[docs] @staticmethod def compare_distributions(data1: np.ndarray, data2: np.ndarray) -> Dict[str, Any]: """Compare two distributions statistically. Args: data1: First dataset. data2: Second dataset. Returns: Dictionary of statistical test results. """ import warnings results = {} # Kolmogorov-Smirnov test - use asymptotic method for small samples or edge cases with warnings.catch_warnings(): warnings.filterwarnings("ignore", message=".*Exact calculation unsuccessful.*") # Force asymptotic method for very small samples or when data has many duplicates if ( len(data1) < 10 or len(data2) < 10 or len(np.unique(data1)) < 3 or len(np.unique(data2)) < 3 ): ks_stat, ks_p = stats.ks_2samp(data1, data2, method="asymp") else: ks_stat, ks_p = stats.ks_2samp(data1, data2) results["ks_statistic"] = ks_stat results["ks_pvalue"] = ks_p results["ks_passes"] = bool(ks_p > 0.05) # Mann-Whitney U test mw_stat, mw_p = stats.mannwhitneyu(data1, data2, alternative="two-sided") results["mw_statistic"] = mw_stat results["mw_pvalue"] = mw_p results["mw_passes"] = bool(mw_p > 0.05) # Compare moments results["mean_diff"] = abs(np.mean(data1) - np.mean(data2)) results["std_diff"] = abs(np.std(data1) - np.std(data2)) results["skew_diff"] = abs(stats.skew(data1) - stats.skew(data2)) results["kurtosis_diff"] = abs(stats.kurtosis(data1) - stats.kurtosis(data2)) # Quantile comparison quantiles = [0.01, 0.05, 0.25, 0.5, 0.75, 0.95, 0.99] q1 = np.percentile(data1, [q * 100 for q in quantiles]) q2 = np.percentile(data2, [q * 100 for q in quantiles]) results["quantile_errors"] = { format_quantile_key(q): abs(q1[i] - q2[i]) for i, q in enumerate(quantiles) } return results
[docs] @staticmethod def validate_statistical_properties( data: np.ndarray, expected_mean: float, expected_std: float, tolerance: float = 0.05 ) -> Dict[str, bool]: """Validate statistical properties of data. Args: data: Data to validate. expected_mean: Expected mean value. expected_std: Expected standard deviation. tolerance: Relative tolerance for validation. Returns: Dictionary of validation results. """ actual_mean = np.mean(data) actual_std = np.std(data) validations = { "mean_valid": bool( abs(actual_mean - expected_mean) / (abs(expected_mean) + 1e-10) < tolerance ), "std_valid": bool( abs(actual_std - expected_std) / (abs(expected_std) + 1e-10) < tolerance ), "normality_test": ( bool(stats.normaltest(data).pvalue > 0.05) if len(data) > 20 else True ), "no_outliers": bool(np.sum(np.abs(stats.zscore(data)) > 4) / len(data) < 0.01), } return validations
[docs] class EdgeCaseTester: """Test edge cases and boundary conditions."""
[docs] @staticmethod def test_extreme_values() -> Dict[str, bool]: """Test handling of extreme values. Returns: Dictionary of test results. """ tests = {} ref = ReferenceImplementations() # Test zero values tests["zero_initial_assets"] = bool( ref.calculate_growth_rate_precise(100, 0, 10) == -np.inf ) tests["zero_final_assets"] = bool(ref.calculate_growth_rate_precise(0, 100, 10) == -np.inf) # Test infinity tests["infinite_loss"] = bool(ref.apply_insurance_precise(np.inf, 1000, 10000)[0] == np.inf) # Test negative values tests["negative_assets"] = bool(ref.calculate_growth_rate_precise(-100, 100, 10) == -np.inf) # Test very large numbers large_num = 1e308 # Near float64 max retained, _recovered = ref.apply_insurance_precise(large_num, 1000, 10000) tests["large_number_handling"] = bool(retained > 0 and not np.isnan(retained)) # Test very small numbers small_num = 1e-308 # Near float64 min tests["small_number_handling"] = bool( ref.apply_insurance_precise(small_num, 0, 1)[1] == small_num ) return tests
[docs] @staticmethod def test_boundary_conditions() -> Dict[str, bool]: """Test boundary conditions. Returns: Dictionary of test results. """ tests = {} ref = ReferenceImplementations() # Test insurance boundaries loss = 5000 attachment = 1000 limit = 3000 _retained, recovered = ref.apply_insurance_precise(loss, attachment, limit) tests["insurance_limit_boundary"] = bool(abs(recovered - limit) < 1e-10) # Test exact attachment point _retained2, recovered2 = ref.apply_insurance_precise(attachment, attachment, limit) tests["exact_attachment"] = bool(recovered2 == 0) # Test loss below attachment retained3, recovered3 = ref.apply_insurance_precise(500, attachment, limit) tests["below_attachment"] = bool(recovered3 == 0 and retained3 == 500) # Test empty arrays tests["empty_var"] = bool(ref.calculate_var_precise(np.array([]), 0.95) == 0) tests["empty_tvar"] = bool(ref.calculate_tvar_precise(np.array([]), 0.95) == 0) tests["empty_ruin"] = bool(ref.calculate_ruin_probability_precise(np.array([]), 0) == 0) return tests
[docs] class AccuracyValidator: """Main accuracy validation engine. Provides comprehensive validation of numerical accuracy for Monte Carlo simulations. """ def __init__(self, tolerance: float = 0.01): """Initialize accuracy validator. Args: tolerance: Maximum acceptable relative error. """ self.tolerance = tolerance self.reference = ReferenceImplementations() self.statistical = StatisticalValidation() self.edge_tester = EdgeCaseTester()
[docs] def compare_implementations( self, optimized_results: np.ndarray, reference_results: np.ndarray, test_name: str = "Implementation Comparison", ) -> ValidationResult: """Compare optimized implementation against reference. Args: optimized_results: Results from optimized implementation. reference_results: Results from reference implementation. test_name: Name of the test being performed. Returns: ValidationResult with comparison metrics. """ passed_tests = [] failed_tests = [] # Calculate errors abs_errors = np.abs(optimized_results - reference_results) mean_error = np.mean(abs_errors) max_error = np.max(abs_errors) # Relative error (avoid division by zero) nonzero_mask = reference_results != 0 if np.any(nonzero_mask): relative_errors = abs_errors[nonzero_mask] / np.abs(reference_results[nonzero_mask]) relative_error = np.mean(relative_errors) else: relative_error = 0.0 # Statistical comparison stat_results = self.statistical.compare_distributions(optimized_results, reference_results) # Build test results if relative_error < self.tolerance: passed_tests.append(f"{test_name}: Relative error within tolerance") else: failed_tests.append( f"{test_name}: Relative error {relative_error:.4f} exceeds tolerance" ) if stat_results["ks_passes"]: passed_tests.append("Kolmogorov-Smirnov test passed") else: failed_tests.append( f"Kolmogorov-Smirnov test failed (p={stat_results['ks_pvalue']:.4f})" ) if stat_results["mean_diff"] < self.tolerance * abs(np.mean(reference_results)): passed_tests.append("Mean comparison passed") else: failed_tests.append(f"Mean difference too large: {stat_results['mean_diff']:.6f}") # Calculate accuracy score accuracy_score = 1.0 - min(relative_error, 1.0) return ValidationResult( accuracy_score=accuracy_score, mean_error=mean_error, max_error=max_error, relative_error=relative_error, ks_statistic=stat_results["ks_statistic"], ks_pvalue=stat_results["ks_pvalue"], passed_tests=passed_tests, failed_tests=failed_tests, )
[docs] def validate_growth_rates( self, optimized_func: Callable, test_cases: Optional[List[Tuple]] = None ) -> ValidationResult: """Validate growth rate calculations. Args: optimized_func: Optimized growth rate function. test_cases: List of (final, initial, years) test cases. Returns: ValidationResult for growth rate calculations. """ if test_cases is None: # Generate default test cases rng = np.random.default_rng(42) test_cases = [(rng.uniform(5e6, 20e6), 10e6, 10) for _ in range(1000)] optimized_results = [] reference_results = [] for final, initial, years in test_cases: opt_result = optimized_func(final, initial, years) ref_result = self.reference.calculate_growth_rate_precise(final, initial, years) optimized_results.append(opt_result) reference_results.append(ref_result) return self.compare_implementations( np.array(optimized_results), np.array(reference_results), "Growth Rate Calculation" )
[docs] def validate_insurance_calculations( self, optimized_func: Callable, test_cases: Optional[List[Tuple]] = None ) -> ValidationResult: """Validate insurance calculations. Args: optimized_func: Optimized insurance function. test_cases: List of (loss, attachment, limit) test cases. Returns: ValidationResult for insurance calculations. """ if test_cases is None: # Generate test cases rng = np.random.default_rng(42) test_cases = [(rng.exponential(100000), 50000, 500000) for _ in range(1000)] optimized_retained = [] reference_retained = [] for loss, attachment, limit in test_cases: opt_retained, _ = optimized_func(loss, attachment, limit) ref_retained, _ = self.reference.apply_insurance_precise(loss, attachment, limit) optimized_retained.append(opt_retained) reference_retained.append(ref_retained) return self.compare_implementations( np.array(optimized_retained), np.array(reference_retained), "Insurance Calculation" )
[docs] def validate_risk_metrics( self, optimized_var: Callable, optimized_tvar: Callable, test_data: Optional[np.ndarray] = None, ) -> ValidationResult: """Validate risk metric calculations. Args: optimized_var: Optimized VaR function. optimized_tvar: Optimized TVaR function. test_data: Test loss data. Returns: ValidationResult for risk metrics. """ if test_data is None: rng = np.random.default_rng(42) test_data = rng.lognormal(12, 1.5, 10000) confidence_levels = [0.9, 0.95, 0.99] passed_tests = [] failed_tests = [] total_error = 0 test_count = 0 for confidence in confidence_levels: # VaR validation opt_var = optimized_var(test_data, confidence) ref_var = self.reference.calculate_var_precise(test_data, confidence) var_error = abs(opt_var - ref_var) / (abs(ref_var) + 1e-10) if var_error < self.tolerance: passed_tests.append(f"VaR@{confidence:.0%} validation passed") else: failed_tests.append(f"VaR@{confidence:.0%} error: {var_error:.4f}") total_error += var_error test_count += 1 # TVaR validation opt_tvar = optimized_tvar(test_data, confidence) ref_tvar = self.reference.calculate_tvar_precise(test_data, confidence) tvar_error = abs(opt_tvar - ref_tvar) / (abs(ref_tvar) + 1e-10) if tvar_error < self.tolerance: passed_tests.append(f"TVaR@{confidence:.0%} validation passed") else: failed_tests.append(f"TVaR@{confidence:.0%} error: {tvar_error:.4f}") total_error += tvar_error test_count += 1 avg_error = total_error / test_count accuracy_score = 1.0 - min(avg_error, 1.0) return ValidationResult( accuracy_score=accuracy_score, relative_error=avg_error, passed_tests=passed_tests, failed_tests=failed_tests, )
[docs] def run_full_validation(self) -> ValidationResult: """Run comprehensive validation suite. Returns: Complete ValidationResult. """ passed_tests = [] failed_tests = [] # Test edge cases edge_results = self.edge_tester.test_extreme_values() for test_name, passed in edge_results.items(): if passed: passed_tests.append(f"Edge case: {test_name}") else: failed_tests.append(f"Edge case: {test_name}") # Test boundary conditions boundary_results = self.edge_tester.test_boundary_conditions() for test_name, passed in boundary_results.items(): if passed: passed_tests.append(f"Boundary: {test_name}") else: failed_tests.append(f"Boundary: {test_name}") # Calculate overall accuracy total_tests = len(passed_tests) + len(failed_tests) accuracy_score = len(passed_tests) / total_tests if total_tests > 0 else 0 return ValidationResult( accuracy_score=accuracy_score, passed_tests=passed_tests, failed_tests=failed_tests, edge_cases={**edge_results, **boundary_results}, )
[docs] def generate_validation_report(self, results: List[ValidationResult]) -> str: """Generate comprehensive validation report. Args: results: List of validation results. Returns: Formatted validation report. """ report = "ACCURACY VALIDATION REPORT\n" + "=" * 60 + "\n\n" for i, result in enumerate(results, 1): report += f"Test #{i}\n" + "-" * 30 + "\n" report += result.summary() + "\n\n" # Overall summary avg_accuracy = np.mean([r.accuracy_score for r in results]) total_passed = sum(len(r.passed_tests) for r in results) total_failed = sum(len(r.failed_tests) for r in results) report += "OVERALL SUMMARY\n" + "=" * 60 + "\n" report += f"Average Accuracy Score: {avg_accuracy:.4f}\n" report += f"Total Tests Passed: {total_passed}\n" report += f"Total Tests Failed: {total_failed}\n" report += f"Success Rate: {total_passed/(total_passed+total_failed)*100:.1f}%\n" if avg_accuracy >= 0.99 and total_failed == 0: report += "\n✓ VALIDATION PASSED: All accuracy requirements met\n" else: report += "\n✗ VALIDATION FAILED: Accuracy requirements not met\n" return report
if __name__ == "__main__": # Example usage rng = np.random.default_rng(42) # Create validator validator = AccuracyValidator(tolerance=0.01) # Generate test data optimized = rng.normal(0.08, 0.02, 10000) reference = optimized + rng.normal(0, 0.0001, 10000) # Small perturbation # Run validation result = validator.compare_implementations(optimized, reference) print(result.summary()) # Run full validation suite full_result = validator.run_full_validation() print("\n" + full_result.summary()) # Generate report report = validator.generate_validation_report([result, full_result]) print("\n" + report)