Source code for ergodic_insurance.reporting.validator

"""Report validation and quality control utilities.

This module provides validation functions to ensure report completeness,
accuracy, and quality before generation.
"""

import logging
from pathlib import Path
import re
from typing import Any, Dict, Generator, List, Tuple

import numpy as np

from .config import ReportConfig, SectionConfig

logger = logging.getLogger(__name__)


[docs] class ReportValidator: """Validate report configuration and content. This class provides comprehensive validation for report configurations, ensuring all references are valid, data is complete, and quality standards are met. Attributes: config: Report configuration to validate. errors: List of validation errors. warnings: List of validation warnings. """ def __init__(self, config: ReportConfig): """Initialize ReportValidator. Args: config: Report configuration to validate. """ self.config = config self.errors: List[str] = [] self.warnings: List[str] = [] self.info: List[str] = []
[docs] def validate(self) -> Tuple[bool, List[str], List[str]]: """Run complete validation suite. Returns: Tuple of (is_valid, errors, warnings). """ self.errors = [] self.warnings = [] self.info = [] # Run all validation checks self._validate_structure() self._validate_references() self._validate_data_sources() self._validate_formatting() self._validate_completeness() self._validate_quality() is_valid = len(self.errors) == 0 # Log results if self.errors: for error in self.errors: logger.error(f"Validation error: {error}") if self.warnings: for warning in self.warnings: logger.warning(f"Validation warning: {warning}") if self.info: for info in self.info: logger.info(f"Validation info: {info}") return is_valid, self.errors, self.warnings
def _validate_structure(self): """Validate report structure and hierarchy.""" # Check metadata if not self.config.metadata.title: self.errors.append("Report title is required") if not self.config.metadata.authors: self.warnings.append("No authors specified") # Check sections if not self.config.sections: self.errors.append("Report must have at least one section") # Validate section hierarchy self._check_section_hierarchy(self.config.sections) # Check output formats if not self.config.output_formats: self.errors.append("At least one output format must be specified") # Validate directories exist if not self.config.output_dir.exists(): self.warnings.append(f"Output directory does not exist: {self.config.output_dir}") if not self.config.cache_dir.exists(): self.warnings.append(f"Cache directory does not exist: {self.config.cache_dir}") def _check_section_hierarchy(self, sections: List[SectionConfig], parent_level: int = 0): """Check section hierarchy is valid. Args: sections: List of sections to check. parent_level: Parent section level. """ for section in sections: # Check level progression if section.level <= parent_level: self.warnings.append( f"Section '{section.title}' level {section.level} " f"should be greater than parent level {parent_level}" ) # Check title if not section.title: self.errors.append("Section title cannot be empty") # Recursively check subsections if section.subsections: self._check_section_hierarchy(section.subsections, section.level) def _validate_references(self): # pylint: disable=too-many-branches """Validate all figure and table references. Note: Multiple branches are necessary for comprehensive validation of all figure/table references and their relationships.""" # Collect all defined figures and tables defined_figures = set() defined_tables = set() for section in self._iter_sections(self.config.sections): for fig in section.figures: if fig.name in defined_figures: self.errors.append(f"Duplicate figure name: {fig.name}") defined_figures.add(fig.name) for table in section.tables: if table.name in defined_tables: self.errors.append(f"Duplicate table name: {table.name}") defined_tables.add(table.name) # Check for references in content referenced_figures = set() referenced_tables = set() for section in self._iter_sections(self.config.sections): if section.content: # Find figure references fig_refs = re.findall(r"Figure[:\s]+(\w+)", section.content) referenced_figures.update(fig_refs) # Find table references table_refs = re.findall(r"Table[:\s]+(\w+)", section.content) referenced_tables.update(table_refs) # Check for undefined references undefined_figures = referenced_figures - defined_figures if undefined_figures: for fig in undefined_figures: self.warnings.append(f"Referenced but undefined figure: {fig}") undefined_tables = referenced_tables - defined_tables if undefined_tables: for table in undefined_tables: self.warnings.append(f"Referenced but undefined table: {table}") # Check for unused definitions unused_figures = defined_figures - referenced_figures if unused_figures: for fig_name in unused_figures: self.info.append(f"Defined but unreferenced figure: {fig_name}") unused_tables = defined_tables - referenced_tables if unused_tables: for table_name in unused_tables: self.info.append(f"Defined but unreferenced table: {table_name}") def _iter_sections(self, sections: List[SectionConfig]) -> Generator[SectionConfig, None, None]: """Iterate through all sections including subsections. Args: sections: List of sections. Yields: Each section and subsection. """ for section in sections: yield section if section.subsections: yield from self._iter_sections(section.subsections) def _validate_data_sources(self): """Validate data sources for figures and tables.""" for section in self._iter_sections(self.config.sections): # Check figure sources for fig in section.figures: if isinstance(fig.source, (str, Path)): source_path = Path(fig.source) if not source_path.exists() and not str(fig.source).startswith("generate_"): self.warnings.append( f"Figure source not found: {fig.source} (figure: {fig.name})" ) # Check table data sources for table in section.tables: if isinstance(table.data_source, (str, Path)): source_path = Path(table.data_source) if not source_path.exists() and not str(table.data_source).startswith( "generate_" ): self.warnings.append( f"Table data source not found: {table.data_source} (table: {table.name})" ) def _validate_formatting(self): """Validate formatting parameters.""" # Check figure dimensions for section in self._iter_sections(self.config.sections): for fig in section.figures: if fig.width > 10 or fig.height > 10: self.warnings.append( f"Figure '{fig.name}' dimensions may be too large: " f"{fig.width}x{fig.height} inches" ) if fig.dpi < 150: self.warnings.append( f"Figure '{fig.name}' DPI ({fig.dpi}) may be too low for print quality" ) # Check style parameters style = self.config.style if style.font_size < 8: self.warnings.append(f"Font size {style.font_size}pt may be too small") elif style.font_size > 14: self.warnings.append(f"Font size {style.font_size}pt may be too large") # Check margins for margin_name, margin_value in style.margins.items(): if margin_value < 0.5: self.warnings.append(f"Margin '{margin_name}' ({margin_value}in) may be too small") elif margin_value > 2: self.warnings.append(f"Margin '{margin_name}' ({margin_value}in) may be too large") def _validate_completeness(self): """Check report completeness.""" # Check for required sections based on template if self.config.template == "executive": required_sections = {"Key Findings", "Recommendations"} section_titles = {s.title for s in self.config.sections} missing = required_sections - section_titles if missing: for missing_section in missing: self.warnings.append(f"Executive report missing section: {missing_section}") elif self.config.template == "technical": required_sections = {"Methodology", "Statistical Validation"} section_titles = {s.title for s in self.config.sections} missing = required_sections - section_titles if missing: for missing_section in missing: self.warnings.append(f"Technical report missing section: {missing_section}") # Check for empty sections for section in self._iter_sections(self.config.sections): if ( not section.content and not section.figures and not section.tables and not section.subsections ): self.warnings.append(f"Section '{section.title}' has no content") def _validate_quality(self): """Perform quality checks on report configuration.""" # Check caption quality for section in self._iter_sections(self.config.sections): for fig in section.figures: if len(fig.caption) < 10: self.warnings.append( f"Figure '{fig.name}' caption may be too short: '{fig.caption}'" ) for table in section.tables: if len(table.caption) < 10: self.warnings.append( f"Table '{table.name}' caption may be too short: '{table.caption}'" ) # Check metadata quality if self.config.metadata.abstract and len(self.config.metadata.abstract) < 50: self.warnings.append("Abstract may be too short") if not self.config.metadata.keywords: self.warnings.append("No keywords specified for report") # Count total content total_figures = sum(len(s.figures) for s in self._iter_sections(self.config.sections)) total_tables = sum(len(s.tables) for s in self._iter_sections(self.config.sections)) self.info.append( f"Report contains {len(self.config.sections)} sections, " f"{total_figures} figures, and {total_tables} tables" ) # Check balance if total_figures > 20: self.warnings.append(f"Report has many figures ({total_figures}), consider reducing") if total_tables > 15: self.warnings.append(f"Report has many tables ({total_tables}), consider reducing")
[docs] def validate_results_data(results: Dict[str, Any]) -> Tuple[bool, List[str]]: """Validate results data for report generation. Args: results: Results dictionary to validate. Returns: Tuple of (is_valid, error_messages). """ errors = [] # Check for required keys required_keys = ["roe", "ruin_probability", "trajectories"] for key in required_keys: if key not in results: errors.append(f"Missing required results key: {key}") # Validate data types and ranges if "roe" in results: roe = results["roe"] if not isinstance(roe, (int, float)): errors.append(f"ROE must be numeric, got {type(roe)}") elif not -1 <= roe <= 10: errors.append(f"ROE value {roe} seems unrealistic") if "ruin_probability" in results: ruin_prob = results["ruin_probability"] if not isinstance(ruin_prob, (int, float)): errors.append(f"Ruin probability must be numeric, got {type(ruin_prob)}") elif not 0 <= ruin_prob <= 1: errors.append(f"Ruin probability must be between 0 and 1, got {ruin_prob}") if "trajectories" in results: trajectories = results["trajectories"] if not isinstance(trajectories, np.ndarray): errors.append(f"Trajectories must be numpy array, got {type(trajectories)}") elif len(trajectories.shape) not in [1, 2]: errors.append(f"Trajectories must be 1D or 2D array, got shape {trajectories.shape}") return len(errors) == 0, errors
[docs] def validate_parameters(params: Dict[str, Any]) -> Tuple[bool, List[str]]: """Validate model parameters. Args: params: Parameters dictionary to validate. Returns: Tuple of (is_valid, error_messages). """ errors = [] # Check for required parameter groups required_groups = ["financial", "insurance", "simulation"] for group in required_groups: if group not in params: errors.append(f"Missing required parameter group: {group}") # Validate financial parameters if "financial" in params: financial = params["financial"] if "initial_assets" in financial: if financial["initial_assets"] <= 0: errors.append("Initial assets must be positive") if "tax_rate" in financial: if not 0 <= financial["tax_rate"] <= 1: errors.append("Tax rate must be between 0 and 1") # Validate simulation parameters if "simulation" in params: sim = params["simulation"] if "years" in sim: if sim["years"] <= 0: errors.append("Simulation years must be positive") if "num_simulations" in sim: if sim["num_simulations"] <= 0: errors.append("Number of simulations must be positive") return len(errors) == 0, errors