Source code for ergodic_insurance.accuracy_validator

"""Numerical accuracy validation for Monte Carlo simulations.

This module provides tools to validate the numerical accuracy of optimized
Monte Carlo simulations against reference implementations, ensuring that
performance optimizations don't compromise result quality.

Key features:
    - High-precision reference implementations
    - Statistical validation of distributions
    - Edge case and boundary condition testing
    - Accuracy comparison metrics
    - Detailed validation reports

Example:
    >>> from accuracy_validator import AccuracyValidator
    >>> import numpy as np
    >>>
    >>> validator = AccuracyValidator()
    >>>
    >>> # Compare optimized vs reference implementation
    >>> optimized_results = np.random.normal(0.08, 0.02, 10000)
    >>> reference_results = np.random.normal(0.08, 0.02, 10000)
    >>>
    >>> validation = validator.compare_implementations(
    ...     optimized_results, reference_results
    ... )
    >>> print(f"Accuracy: {validation.accuracy_score:.4f}")

Google-style docstrings are used throughout for Sphinx documentation.
"""

from dataclasses import dataclass, field
from typing import Any, Callable, Dict, List, Optional, Tuple

import numpy as np
from scipy import stats

from ergodic_insurance.summary_statistics import format_quantile_key



[docs]
@dataclass
class ValidationResult:
    """Results from accuracy validation."""

    accuracy_score: float  #: Overall accuracy score (0-1)
    mean_error: float = 0.0  #: Mean absolute error
    max_error: float = 0.0  #: Maximum absolute error
    relative_error: float = 0.0  #: Mean relative error
    ks_statistic: float = 0.0  #: Kolmogorov-Smirnov test statistic
    ks_pvalue: float = 0.0  #: Kolmogorov-Smirnov test p-value
    passed_tests: List[str] = field(default_factory=list)  #: List of passed validation tests
    failed_tests: List[str] = field(default_factory=list)  #: List of failed validation tests
    edge_cases: Dict[str, bool] = field(default_factory=dict)  #: Results from edge case testing


[docs]
    def is_valid(self, tolerance: float = 0.01) -> bool:
        """Check if validation passes within tolerance.

        Args:
            tolerance: Maximum acceptable relative error.

        Returns:
            True if validation passes.
        """
        return (
            self.relative_error < tolerance
            and len(self.failed_tests) == 0
            and self.accuracy_score > 0.99
        )



[docs]
    def summary(self) -> str:
        """Generate validation summary.

        Returns:
            Formatted summary string.
        """
        summary = f"Accuracy Validation Summary\n{'='*50}\n"
        summary += f"Accuracy Score: {self.accuracy_score:.4f}\n"
        summary += f"Mean Error: {self.mean_error:.6f}\n"
        summary += f"Max Error: {self.max_error:.6f}\n"
        summary += f"Relative Error: {self.relative_error:.2%}\n"
        summary += f"KS Test: statistic={self.ks_statistic:.4f}, p-value={self.ks_pvalue:.4f}\n"

        summary += "\nValidation Tests:\n"
        summary += f"  Passed: {len(self.passed_tests)}\n"
        summary += f"  Failed: {len(self.failed_tests)}\n"

        if self.failed_tests:
            summary += "\nFailed Tests:\n"
            for test in self.failed_tests[:5]:
                summary += f"  - {test}\n"

        if self.edge_cases:
            summary += "\nEdge Cases:\n"
            for case, passed in self.edge_cases.items():
                status = "✓" if passed else "✗"
                summary += f"  {status} {case}\n"

        return summary





[docs]
class ReferenceImplementations:
    """High-precision reference implementations for validation.

    These implementations prioritize accuracy over speed and serve
    as the ground truth for validation.
    """


[docs]
    @staticmethod
    def calculate_growth_rate_precise(
        final_assets: float, initial_assets: float, n_years: float
    ) -> float:
        """Calculate growth rate with high precision.

        Args:
            final_assets: Final asset value.
            initial_assets: Initial asset value.
            n_years: Number of years.

        Returns:
            Precise growth rate.
        """
        if final_assets <= 0 or initial_assets <= 0:
            return -np.inf

        # Use high precision logarithm (float128 not available on Windows, using float64)
        ratio = np.float64(final_assets) / np.float64(initial_assets)
        if ratio <= 0:
            return -np.inf

        return float(np.log(ratio) / n_years)



[docs]
    @staticmethod
    def apply_insurance_precise(
        loss: float, attachment: float, limit: float
    ) -> Tuple[float, float]:
        """Apply insurance with precise calculations.

        Args:
            loss: Loss amount.
            attachment: Insurance attachment point.
            limit: Insurance limit.

        Returns:
            Tuple of (retained_loss, recovered_amount).
        """
        # Use high precision for edge cases (float128 not available on Windows, using float64)
        loss = np.float64(loss)
        attachment = np.float64(attachment)
        limit = np.float64(limit)

        if loss <= attachment:
            return float(loss), 0.0

        excess = loss - attachment
        recovery = min(float(excess), float(limit))
        retained = loss - recovery

        return float(retained), float(recovery)



[docs]
    @staticmethod
    def calculate_var_precise(losses: np.ndarray, confidence: float) -> float:
        """Calculate Value at Risk with high precision.

        Args:
            losses: Array of loss amounts.
            confidence: Confidence level (e.g., 0.95).

        Returns:
            VaR at specified confidence level.
        """
        if len(losses) == 0:
            return 0.0

        # Use high precision sorting (float128 not available on Windows, using float64)
        sorted_losses = np.sort(losses.astype(np.float64))
        index = int(np.ceil(confidence * len(sorted_losses))) - 1
        index = max(0, min(index, len(sorted_losses) - 1))

        return float(sorted_losses[index])



[docs]
    @staticmethod
    def calculate_tvar_precise(losses: np.ndarray, confidence: float) -> float:
        """Calculate Tail Value at Risk with high precision.

        Args:
            losses: Array of loss amounts.
            confidence: Confidence level (e.g., 0.95).

        Returns:
            TVaR at specified confidence level.
        """
        if len(losses) == 0:
            return 0.0

        var = ReferenceImplementations.calculate_var_precise(losses, confidence)
        tail_losses = losses[losses >= var]

        if len(tail_losses) == 0:
            return float(var)

        return float(np.mean(tail_losses.astype(np.float64)))



[docs]
    @staticmethod
    def calculate_ruin_probability_precise(paths: np.ndarray, threshold: float = 0.0) -> float:
        """Calculate ruin probability with high precision.

        Args:
            paths: Array of asset paths.
            threshold: Ruin threshold.

        Returns:
            Probability of ruin.
        """
        if len(paths) == 0:
            return 0.0

        # Check each path precisely
        ruin_count = 0
        for path in paths:
            min_value = np.min(path.astype(np.float64))
            if min_value <= threshold:
                ruin_count += 1

        return ruin_count / len(paths)





[docs]
class StatisticalValidation:
    """Statistical tests for distribution validation."""


[docs]
    @staticmethod
    def compare_distributions(data1: np.ndarray, data2: np.ndarray) -> Dict[str, Any]:
        """Compare two distributions statistically.

        Args:
            data1: First dataset.
            data2: Second dataset.

        Returns:
            Dictionary of statistical test results.
        """
        import warnings

        results = {}

        # Kolmogorov-Smirnov test - use asymptotic method for small samples or edge cases
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore", message=".*Exact calculation unsuccessful.*")
            # Force asymptotic method for very small samples or when data has many duplicates
            if (
                len(data1) < 10
                or len(data2) < 10
                or len(np.unique(data1)) < 3
                or len(np.unique(data2)) < 3
            ):
                ks_stat, ks_p = stats.ks_2samp(data1, data2, method="asymp")
            else:
                ks_stat, ks_p = stats.ks_2samp(data1, data2)
        results["ks_statistic"] = ks_stat
        results["ks_pvalue"] = ks_p
        results["ks_passes"] = bool(ks_p > 0.05)

        # Mann-Whitney U test
        mw_stat, mw_p = stats.mannwhitneyu(data1, data2, alternative="two-sided")
        results["mw_statistic"] = mw_stat
        results["mw_pvalue"] = mw_p
        results["mw_passes"] = bool(mw_p > 0.05)

        # Compare moments
        results["mean_diff"] = abs(np.mean(data1) - np.mean(data2))
        results["std_diff"] = abs(np.std(data1) - np.std(data2))
        results["skew_diff"] = abs(stats.skew(data1) - stats.skew(data2))
        results["kurtosis_diff"] = abs(stats.kurtosis(data1) - stats.kurtosis(data2))

        # Quantile comparison
        quantiles = [0.01, 0.05, 0.25, 0.5, 0.75, 0.95, 0.99]
        q1 = np.percentile(data1, [q * 100 for q in quantiles])
        q2 = np.percentile(data2, [q * 100 for q in quantiles])
        results["quantile_errors"] = {
            format_quantile_key(q): abs(q1[i] - q2[i]) for i, q in enumerate(quantiles)
        }

        return results



[docs]
    @staticmethod
    def validate_statistical_properties(
        data: np.ndarray, expected_mean: float, expected_std: float, tolerance: float = 0.05
    ) -> Dict[str, bool]:
        """Validate statistical properties of data.

        Args:
            data: Data to validate.
            expected_mean: Expected mean value.
            expected_std: Expected standard deviation.
            tolerance: Relative tolerance for validation.

        Returns:
            Dictionary of validation results.
        """
        actual_mean = np.mean(data)
        actual_std = np.std(data)

        validations = {
            "mean_valid": bool(
                abs(actual_mean - expected_mean) / (abs(expected_mean) + 1e-10) < tolerance
            ),
            "std_valid": bool(
                abs(actual_std - expected_std) / (abs(expected_std) + 1e-10) < tolerance
            ),
            "normality_test": (
                bool(stats.normaltest(data).pvalue > 0.05) if len(data) > 20 else True
            ),
            "no_outliers": bool(np.sum(np.abs(stats.zscore(data)) > 4) / len(data) < 0.01),
        }

        return validations





[docs]
class EdgeCaseTester:
    """Test edge cases and boundary conditions."""


[docs]
    @staticmethod
    def test_extreme_values() -> Dict[str, bool]:
        """Test handling of extreme values.

        Returns:
            Dictionary of test results.
        """
        tests = {}
        ref = ReferenceImplementations()

        # Test zero values
        tests["zero_initial_assets"] = bool(
            ref.calculate_growth_rate_precise(100, 0, 10) == -np.inf
        )
        tests["zero_final_assets"] = bool(ref.calculate_growth_rate_precise(0, 100, 10) == -np.inf)

        # Test infinity
        tests["infinite_loss"] = bool(ref.apply_insurance_precise(np.inf, 1000, 10000)[0] == np.inf)

        # Test negative values
        tests["negative_assets"] = bool(ref.calculate_growth_rate_precise(-100, 100, 10) == -np.inf)

        # Test very large numbers
        large_num = 1e308  # Near float64 max
        retained, _recovered = ref.apply_insurance_precise(large_num, 1000, 10000)
        tests["large_number_handling"] = bool(retained > 0 and not np.isnan(retained))

        # Test very small numbers
        small_num = 1e-308  # Near float64 min
        tests["small_number_handling"] = bool(
            ref.apply_insurance_precise(small_num, 0, 1)[1] == small_num
        )

        return tests



[docs]
    @staticmethod
    def test_boundary_conditions() -> Dict[str, bool]:
        """Test boundary conditions.

        Returns:
            Dictionary of test results.
        """
        tests = {}
        ref = ReferenceImplementations()

        # Test insurance boundaries
        loss = 5000
        attachment = 1000
        limit = 3000
        _retained, recovered = ref.apply_insurance_precise(loss, attachment, limit)
        tests["insurance_limit_boundary"] = bool(abs(recovered - limit) < 1e-10)

        # Test exact attachment point
        _retained2, recovered2 = ref.apply_insurance_precise(attachment, attachment, limit)
        tests["exact_attachment"] = bool(recovered2 == 0)

        # Test loss below attachment
        retained3, recovered3 = ref.apply_insurance_precise(500, attachment, limit)
        tests["below_attachment"] = bool(recovered3 == 0 and retained3 == 500)

        # Test empty arrays
        tests["empty_var"] = bool(ref.calculate_var_precise(np.array([]), 0.95) == 0)
        tests["empty_tvar"] = bool(ref.calculate_tvar_precise(np.array([]), 0.95) == 0)
        tests["empty_ruin"] = bool(ref.calculate_ruin_probability_precise(np.array([]), 0) == 0)

        return tests





[docs]
class AccuracyValidator:
    """Main accuracy validation engine.

    Provides comprehensive validation of numerical accuracy for
    Monte Carlo simulations.
    """

    def __init__(self, tolerance: float = 0.01):
        """Initialize accuracy validator.

        Args:
            tolerance: Maximum acceptable relative error.
        """
        self.tolerance = tolerance
        self.reference = ReferenceImplementations()
        self.statistical = StatisticalValidation()
        self.edge_tester = EdgeCaseTester()


[docs]
    def compare_implementations(
        self,
        optimized_results: np.ndarray,
        reference_results: np.ndarray,
        test_name: str = "Implementation Comparison",
    ) -> ValidationResult:
        """Compare optimized implementation against reference.

        Args:
            optimized_results: Results from optimized implementation.
            reference_results: Results from reference implementation.
            test_name: Name of the test being performed.

        Returns:
            ValidationResult with comparison metrics.
        """
        passed_tests = []
        failed_tests = []

        # Calculate errors
        abs_errors = np.abs(optimized_results - reference_results)
        mean_error = np.mean(abs_errors)
        max_error = np.max(abs_errors)

        # Relative error (avoid division by zero)
        nonzero_mask = reference_results != 0
        if np.any(nonzero_mask):
            relative_errors = abs_errors[nonzero_mask] / np.abs(reference_results[nonzero_mask])
            relative_error = np.mean(relative_errors)
        else:
            relative_error = 0.0

        # Statistical comparison
        stat_results = self.statistical.compare_distributions(optimized_results, reference_results)

        # Build test results
        if relative_error < self.tolerance:
            passed_tests.append(f"{test_name}: Relative error within tolerance")
        else:
            failed_tests.append(
                f"{test_name}: Relative error {relative_error:.4f} exceeds tolerance"
            )

        if stat_results["ks_passes"]:
            passed_tests.append("Kolmogorov-Smirnov test passed")
        else:
            failed_tests.append(
                f"Kolmogorov-Smirnov test failed (p={stat_results['ks_pvalue']:.4f})"
            )

        if stat_results["mean_diff"] < self.tolerance * abs(np.mean(reference_results)):
            passed_tests.append("Mean comparison passed")
        else:
            failed_tests.append(f"Mean difference too large: {stat_results['mean_diff']:.6f}")

        # Calculate accuracy score
        accuracy_score = 1.0 - min(relative_error, 1.0)

        return ValidationResult(
            accuracy_score=accuracy_score,
            mean_error=mean_error,
            max_error=max_error,
            relative_error=relative_error,
            ks_statistic=stat_results["ks_statistic"],
            ks_pvalue=stat_results["ks_pvalue"],
            passed_tests=passed_tests,
            failed_tests=failed_tests,
        )



[docs]
    def validate_growth_rates(
        self, optimized_func: Callable, test_cases: Optional[List[Tuple]] = None
    ) -> ValidationResult:
        """Validate growth rate calculations.

        Args:
            optimized_func: Optimized growth rate function.
            test_cases: List of (final, initial, years) test cases.

        Returns:
            ValidationResult for growth rate calculations.
        """
        if test_cases is None:
            # Generate default test cases
            rng = np.random.default_rng(42)
            test_cases = [(rng.uniform(5e6, 20e6), 10e6, 10) for _ in range(1000)]

        optimized_results = []
        reference_results = []

        for final, initial, years in test_cases:
            opt_result = optimized_func(final, initial, years)
            ref_result = self.reference.calculate_growth_rate_precise(final, initial, years)

            optimized_results.append(opt_result)
            reference_results.append(ref_result)

        return self.compare_implementations(
            np.array(optimized_results), np.array(reference_results), "Growth Rate Calculation"
        )



[docs]
    def validate_insurance_calculations(
        self, optimized_func: Callable, test_cases: Optional[List[Tuple]] = None
    ) -> ValidationResult:
        """Validate insurance calculations.

        Args:
            optimized_func: Optimized insurance function.
            test_cases: List of (loss, attachment, limit) test cases.

        Returns:
            ValidationResult for insurance calculations.
        """
        if test_cases is None:
            # Generate test cases
            rng = np.random.default_rng(42)
            test_cases = [(rng.exponential(100000), 50000, 500000) for _ in range(1000)]

        optimized_retained = []
        reference_retained = []

        for loss, attachment, limit in test_cases:
            opt_retained, _ = optimized_func(loss, attachment, limit)
            ref_retained, _ = self.reference.apply_insurance_precise(loss, attachment, limit)

            optimized_retained.append(opt_retained)
            reference_retained.append(ref_retained)

        return self.compare_implementations(
            np.array(optimized_retained), np.array(reference_retained), "Insurance Calculation"
        )



[docs]
    def validate_risk_metrics(
        self,
        optimized_var: Callable,
        optimized_tvar: Callable,
        test_data: Optional[np.ndarray] = None,
    ) -> ValidationResult:
        """Validate risk metric calculations.

        Args:
            optimized_var: Optimized VaR function.
            optimized_tvar: Optimized TVaR function.
            test_data: Test loss data.

        Returns:
            ValidationResult for risk metrics.
        """
        if test_data is None:
            rng = np.random.default_rng(42)
            test_data = rng.lognormal(12, 1.5, 10000)

        confidence_levels = [0.9, 0.95, 0.99]
        passed_tests = []
        failed_tests = []

        total_error = 0
        test_count = 0

        for confidence in confidence_levels:
            # VaR validation
            opt_var = optimized_var(test_data, confidence)
            ref_var = self.reference.calculate_var_precise(test_data, confidence)
            var_error = abs(opt_var - ref_var) / (abs(ref_var) + 1e-10)

            if var_error < self.tolerance:
                passed_tests.append(f"VaR@{confidence:.0%} validation passed")
            else:
                failed_tests.append(f"VaR@{confidence:.0%} error: {var_error:.4f}")

            total_error += var_error
            test_count += 1

            # TVaR validation
            opt_tvar = optimized_tvar(test_data, confidence)
            ref_tvar = self.reference.calculate_tvar_precise(test_data, confidence)
            tvar_error = abs(opt_tvar - ref_tvar) / (abs(ref_tvar) + 1e-10)

            if tvar_error < self.tolerance:
                passed_tests.append(f"TVaR@{confidence:.0%} validation passed")
            else:
                failed_tests.append(f"TVaR@{confidence:.0%} error: {tvar_error:.4f}")

            total_error += tvar_error
            test_count += 1

        avg_error = total_error / test_count
        accuracy_score = 1.0 - min(avg_error, 1.0)

        return ValidationResult(
            accuracy_score=accuracy_score,
            relative_error=avg_error,
            passed_tests=passed_tests,
            failed_tests=failed_tests,
        )



[docs]
    def run_full_validation(self) -> ValidationResult:
        """Run comprehensive validation suite.

        Returns:
            Complete ValidationResult.
        """
        passed_tests = []
        failed_tests = []

        # Test edge cases
        edge_results = self.edge_tester.test_extreme_values()
        for test_name, passed in edge_results.items():
            if passed:
                passed_tests.append(f"Edge case: {test_name}")
            else:
                failed_tests.append(f"Edge case: {test_name}")

        # Test boundary conditions
        boundary_results = self.edge_tester.test_boundary_conditions()
        for test_name, passed in boundary_results.items():
            if passed:
                passed_tests.append(f"Boundary: {test_name}")
            else:
                failed_tests.append(f"Boundary: {test_name}")

        # Calculate overall accuracy
        total_tests = len(passed_tests) + len(failed_tests)
        accuracy_score = len(passed_tests) / total_tests if total_tests > 0 else 0

        return ValidationResult(
            accuracy_score=accuracy_score,
            passed_tests=passed_tests,
            failed_tests=failed_tests,
            edge_cases={**edge_results, **boundary_results},
        )



[docs]
    def generate_validation_report(self, results: List[ValidationResult]) -> str:
        """Generate comprehensive validation report.

        Args:
            results: List of validation results.

        Returns:
            Formatted validation report.
        """
        report = "ACCURACY VALIDATION REPORT\n" + "=" * 60 + "\n\n"

        for i, result in enumerate(results, 1):
            report += f"Test #{i}\n" + "-" * 30 + "\n"
            report += result.summary() + "\n\n"

        # Overall summary
        avg_accuracy = np.mean([r.accuracy_score for r in results])
        total_passed = sum(len(r.passed_tests) for r in results)
        total_failed = sum(len(r.failed_tests) for r in results)

        report += "OVERALL SUMMARY\n" + "=" * 60 + "\n"
        report += f"Average Accuracy Score: {avg_accuracy:.4f}\n"
        report += f"Total Tests Passed: {total_passed}\n"
        report += f"Total Tests Failed: {total_failed}\n"
        report += f"Success Rate: {total_passed/(total_passed+total_failed)*100:.1f}%\n"

        if avg_accuracy >= 0.99 and total_failed == 0:
            report += "\n✓ VALIDATION PASSED: All accuracy requirements met\n"
        else:
            report += "\n✗ VALIDATION FAILED: Accuracy requirements not met\n"

        return report




if __name__ == "__main__":
    # Example usage
    rng = np.random.default_rng(42)

    # Create validator
    validator = AccuracyValidator(tolerance=0.01)

    # Generate test data
    optimized = rng.normal(0.08, 0.02, 10000)
    reference = optimized + rng.normal(0, 0.0001, 10000)  # Small perturbation

    # Run validation
    result = validator.compare_implementations(optimized, reference)
    print(result.summary())

    # Run full validation suite
    full_result = validator.run_full_validation()
    print("\n" + full_result.summary())

    # Generate report
    report = validator.generate_validation_report([result, full_result])
    print("\n" + report)