Source code for ergodic_insurance.reporting.formatters

"""Formatting utilities for table generation and report creation.

This module provides comprehensive formatting functions for numbers, currency,
percentages, and color coding for tables in various output formats.

Google-style docstrings are used throughout for consistency.
"""

from decimal import Decimal
from typing import Any, Dict, List, Literal, Optional, Tuple, Union

import numpy as np
import pandas as pd



[docs]
class NumberFormatter:
    """Format numbers for display in tables and reports.

    This class provides methods to format various numeric types including
    currency, percentages, and scientific notation with consistent precision
    and locale-aware formatting.

    Attributes:
        currency_symbol: Symbol to use for currency formatting.
        decimal_places: Default number of decimal places.
        thousands_separator: Character for thousands separation.
        decimal_separator: Character for decimal separation.
    """

    def __init__(
        self,
        currency_symbol: str = "$",
        decimal_places: int = 2,
        thousands_separator: str = ",",
        decimal_separator: str = ".",
    ):
        """Initialize NumberFormatter.

        Args:
            currency_symbol: Symbol for currency (default "$").
            decimal_places: Default decimal precision (default 2).
            thousands_separator: Thousands separator (default ",").
            decimal_separator: Decimal separator (default ".").
        """
        self.currency_symbol = currency_symbol
        self.decimal_places = decimal_places
        self.thousands_separator = thousands_separator
        self.decimal_separator = decimal_separator


[docs]
    def format_currency(
        self,
        value: Union[float, int, Decimal],
        decimals: Optional[int] = None,
        abbreviate: bool = False,
    ) -> str:
        """Format a number as currency.

        Args:
            value: Numeric value to format.
            decimals: Number of decimal places (uses default if None).
            abbreviate: Whether to abbreviate large numbers (e.g., $1.5M).

        Returns:
            Formatted currency string.

        Examples:
            >>> formatter = NumberFormatter()
            >>> formatter.format_currency(1234567.89)
            '$1,234,567.89'
            >>> formatter.format_currency(1234567.89, abbreviate=True)
            '$1.23M'
        """
        # Handle None and NaN values
        if value is None or (isinstance(value, float) and np.isnan(value)):
            return "-"

        decimals = decimals if decimals is not None else self.decimal_places

        # Boundary: float for display formatting
        float_value = float(value)
        abs_value = abs(float_value)

        if abbreviate and abs_value >= 1_000_000_000:
            return f"{self.currency_symbol}{value/1_000_000_000:.{decimals}f}B"
        if abbreviate and abs_value >= 1_000_000:
            return f"{self.currency_symbol}{value/1_000_000:.{decimals}f}M"
        if abbreviate and abs_value >= 1_000:
            return f"{self.currency_symbol}{value/1_000:.{decimals}f}K"

        # Format with thousands separator
        formatted = f"{value:,.{decimals}f}"
        # Replace default separators with configured ones
        if self.thousands_separator != ",":
            formatted = formatted.replace(",", self.thousands_separator)
        if self.decimal_separator != ".":
            formatted = formatted.replace(".", self.decimal_separator)
        return f"{self.currency_symbol}{formatted}"



[docs]
    def format_percentage(
        self,
        value: Union[float, int],
        decimals: Optional[int] = None,
        multiply_by_100: bool = True,
    ) -> str:
        """Format a number as percentage.

        Args:
            value: Numeric value to format.
            decimals: Number of decimal places (default 1).
            multiply_by_100: Whether to multiply by 100 (default True).

        Returns:
            Formatted percentage string.

        Examples:
            >>> formatter = NumberFormatter()
            >>> formatter.format_percentage(0.1234)
            '12.34%'
            >>> formatter.format_percentage(12.34, multiply_by_100=False)
            '12.34%'
        """
        # Handle None and NaN values
        if value is None or (isinstance(value, float) and np.isnan(value)):
            return "-"

        decimals = decimals if decimals is not None else 2

        if multiply_by_100:
            value = value * 100

        return f"{value:.{decimals}f}%"



[docs]
    def format_number(
        self,
        value: Union[float, int],
        decimals: Optional[int] = None,
        scientific: bool = False,
        abbreviate: bool = False,
    ) -> str:
        """Format a general number.

        Args:
            value: Numeric value to format.
            decimals: Number of decimal places.
            scientific: Use scientific notation for large/small numbers.
            abbreviate: Abbreviate large numbers (K, M, B).

        Returns:
            Formatted number string.

        Examples:
            >>> formatter = NumberFormatter()
            >>> formatter.format_number(1234567.89)
            '1,234,567.89'
            >>> formatter.format_number(0.00001234, scientific=True)
            '1.23e-05'
        """
        # Handle None and NaN values
        if value is None or (isinstance(value, float) and np.isnan(value)):
            return "-"

        decimals = decimals if decimals is not None else self.decimal_places

        if scientific and (abs(value) >= 1e6 or (abs(value) < 1e-3 and value != 0)):
            return f"{value:.{decimals}e}"
        if abbreviate and abs(value) >= 1_000_000_000:
            return f"{value/1_000_000_000:.{decimals}f}B"
        if abbreviate and abs(value) >= 1_000_000:
            return f"{value/1_000_000:.{decimals}f}M"
        if abbreviate and abs(value) >= 1_000:
            return f"{value/1_000:.{decimals}f}K"

        formatted = f"{value:,.{decimals}f}"
        if self.thousands_separator != ",":
            formatted = formatted.replace(",", self.thousands_separator)
        if self.decimal_separator != ".":
            formatted = formatted.replace(".", self.decimal_separator)
        return formatted



[docs]
    def format_ratio(self, value: Union[float, int], decimals: int = 2) -> str:
        """Format a ratio value.

        Args:
            value: Ratio value to format.
            decimals: Number of decimal places.

        Returns:
            Formatted ratio string.

        Examples:
            >>> formatter = NumberFormatter()
            >>> formatter.format_ratio(1.5)
            '1.50x'
        """
        # Handle None and NaN values
        if value is None or (isinstance(value, float) and np.isnan(value)):
            return "-"

        return f"{value:.{decimals}f}x"





[docs]
class ColorCoder:
    """Apply color coding to values for visual indicators.

    This class provides methods for traffic light coloring, heatmaps,
    and threshold-based coloring for different output formats.

    Attributes:
        output_format: Target output format (html, latex, terminal).
        color_scheme: Color scheme to use.
    """

    # Default color schemes
    TRAFFIC_LIGHT = {
        "good": "#28a745",  # Green
        "warning": "#ffc107",  # Yellow/Amber
        "bad": "#dc3545",  # Red
    }

    HEATMAP_COLORS = {
        "low": "#e3f2fd",  # Light blue
        "medium_low": "#90caf9",  # Medium blue
        "medium": "#42a5f5",  # Blue
        "medium_high": "#ffb74d",  # Orange
        "high": "#ef5350",  # Red
    }

    def __init__(
        self,
        output_format: Literal["html", "latex", "terminal", "none"] = "none",
        color_scheme: Optional[Dict[str, str]] = None,
    ):
        """Initialize ColorCoder.

        Args:
            output_format: Target output format.
            color_scheme: Custom color scheme (uses defaults if None).
        """
        self.output_format = output_format
        self.color_scheme = color_scheme or self.TRAFFIC_LIGHT


[docs]
    def traffic_light(
        self,
        value: Union[float, int],
        thresholds: Dict[str, Tuple[Optional[float], Optional[float]]],
        text: Optional[str] = None,
    ) -> str:
        """Apply traffic light coloring based on thresholds.

        Args:
            value: Numeric value to evaluate.
            thresholds: Dict with keys 'good', 'warning', 'bad' and (min, max) tuples.
            text: Text to display (uses value if None).

        Returns:
            Formatted string with appropriate coloring.

        Examples:
            >>> coder = ColorCoder(output_format="html")
            >>> thresholds = {
            ...     'good': (0.15, None),
            ...     'warning': (0.10, 0.15),
            ...     'bad': (None, 0.10)
            ... }
            >>> coder.traffic_light(0.18, thresholds)
            '<span style="color: #28a745;">0.18</span>'
        """
        # Handle None and NaN values
        if value is None or (isinstance(value, float) and np.isnan(value)):
            return "-"

        display_text = text if text is not None else str(value)

        # Determine color based on thresholds
        color_key = "bad"  # Default
        for key, (min_val, max_val) in thresholds.items():
            if min_val is not None and max_val is not None:
                if min_val <= value <= max_val:
                    color_key = key
                    break
            elif min_val is not None:
                if value >= min_val:
                    color_key = key
                    break
            elif max_val is not None:
                if value <= max_val:
                    color_key = key
                    break

        return self._apply_color(display_text, self.color_scheme.get(color_key, "#000000"))



[docs]
    def heatmap(
        self,
        value: Union[float, int],
        min_val: float,
        max_val: float,
        text: Optional[str] = None,
    ) -> str:
        """Apply heatmap coloring based on value range.

        Args:
            value: Numeric value to color.
            min_val: Minimum value in range.
            max_val: Maximum value in range.
            text: Text to display (uses value if None).

        Returns:
            Formatted string with heatmap coloring.

        Examples:
            >>> coder = ColorCoder(output_format="html")
            >>> coder.heatmap(50, 0, 100)
            '<span style="background-color: #42a5f5;">50</span>'
        """
        # Handle None and NaN values
        if value is None or (isinstance(value, float) and np.isnan(value)):
            return "-"

        display_text = text if text is not None else str(value)

        # Normalize value to 0-1 range
        if max_val == min_val:
            normalized = 0.5
        else:
            normalized = (value - min_val) / (max_val - min_val)
            normalized = max(0, min(1, normalized))  # Clamp to [0, 1]

        # Select color based on normalized value
        if normalized < 0.2:
            color = self.HEATMAP_COLORS["low"]
        elif normalized < 0.4:
            color = self.HEATMAP_COLORS["medium_low"]
        elif normalized < 0.6:
            color = self.HEATMAP_COLORS["medium"]
        elif normalized < 0.8:
            color = self.HEATMAP_COLORS["medium_high"]
        else:
            color = self.HEATMAP_COLORS["high"]

        return self._apply_color(display_text, color, is_background=True)



[docs]
    def threshold_color(
        self,
        value: Union[float, int],
        threshold: float,
        above_color: str = "#28a745",
        below_color: str = "#dc3545",
        text: Optional[str] = None,
    ) -> str:
        """Apply binary coloring based on threshold.

        Args:
            value: Numeric value to evaluate.
            threshold: Threshold value.
            above_color: Color for values above threshold.
            below_color: Color for values below threshold.
            text: Text to display.

        Returns:
            Formatted string with threshold-based coloring.
        """
        # Handle None and NaN values
        if value is None or (isinstance(value, float) and np.isnan(value)):
            return "-"

        display_text = text if text is not None else str(value)
        color = above_color if value >= threshold else below_color
        return self._apply_color(display_text, color)


    def _apply_color(  # pylint: disable=too-many-return-statements
        self,
        text: str,
        color: str,
        is_background: bool = False,
    ) -> str:
        """Apply color formatting based on output format.

        Args:
            text: Text to format.
            color: Color code (hex or name).
            is_background: Whether to apply as background color.

        Returns:
            Formatted text string.
        """
        if self.output_format == "html":
            if is_background:
                return f'<span style="background-color: {color}; padding: 2px 4px;">{text}</span>'
            return f'<span style="color: {color};">{text}</span>'
        if self.output_format == "latex":
            # LaTeX requires color package
            if is_background:
                return f"\\colorbox{{{color}}}{{{text}}}"
            return f"\\textcolor{{{color}}}{{{text}}}"
        if self.output_format == "terminal":
            # Use Unicode symbols for terminal
            if "good" in str(color) or "#28a745" in str(color):
                return f"✓ {text}"
            if "warning" in str(color) or "#ffc107" in str(color):
                return f"⚠ {text}"
            if "bad" in str(color) or "#dc3545" in str(color):
                return f"✗ {text}"
            return text
        return text




[docs]
class TableFormatter:
    """High-level table formatting utilities.

    This class combines number formatting and color coding to provide
    comprehensive table formatting capabilities.

    Attributes:
        number_formatter: NumberFormatter instance.
        color_coder: ColorCoder instance.
    """

    def __init__(
        self,
        output_format: Literal["html", "latex", "terminal", "none"] = "none",
        currency_symbol: str = "$",
        decimal_places: int = 2,
    ):
        """Initialize TableFormatter.

        Args:
            output_format: Target output format.
            currency_symbol: Currency symbol to use.
            decimal_places: Default decimal precision.
        """
        self.number_formatter = NumberFormatter(
            currency_symbol=currency_symbol,
            decimal_places=decimal_places,
        )
        self.color_coder = ColorCoder(output_format=output_format)
        self.output_format = output_format


[docs]
    def format_dataframe(
        self,
        df: pd.DataFrame,
        column_formats: Optional[Dict[str, Dict[str, Any]]] = None,
        row_colors: Optional[Dict[int, str]] = None,
        alternating_rows: bool = False,
    ) -> pd.DataFrame:
        """Apply formatting to entire DataFrame.

        Args:
            df: Input DataFrame.
            column_formats: Format specifications per column.
            row_colors: Colors for specific rows.
            alternating_rows: Whether to use alternating row colors.

        Returns:
            Formatted DataFrame.

        Examples:
            >>> formatter = TableFormatter()
            >>> formats = {
            ...     'Revenue': {'type': 'currency', 'abbreviate': True},
            ...     'Growth': {'type': 'percentage'},
            ...     'Risk': {'type': 'traffic_light', 'thresholds': {...}}
            ... }
            >>> formatted_df = formatter.format_dataframe(df, formats)
        """
        formatted_df = df.copy()

        if column_formats:
            for col, fmt in column_formats.items():
                if col not in formatted_df.columns:
                    continue

                fmt_type = fmt.get("type", "number")

                if fmt_type == "currency":
                    decimals = fmt.get("decimals")
                    abbreviate = fmt.get("abbreviate", False)
                    formatted_df[col] = formatted_df[col].apply(
                        lambda x, d=decimals, a=abbreviate: self.number_formatter.format_currency(
                            x, decimals=d, abbreviate=a
                        )
                    )
                elif fmt_type == "percentage":
                    decimals = fmt.get("decimals")
                    multiply_by_100 = fmt.get("multiply_by_100", True)
                    formatted_df[col] = formatted_df[col].apply(
                        lambda x, d=decimals, m=multiply_by_100: self.number_formatter.format_percentage(
                            x, decimals=d, multiply_by_100=m
                        )
                    )
                elif fmt_type == "number":
                    decimals = fmt.get("decimals")
                    scientific = fmt.get("scientific", False)
                    abbreviate = fmt.get("abbreviate", False)
                    formatted_df[col] = formatted_df[col].apply(
                        lambda x, d=decimals, s=scientific, a=abbreviate: self.number_formatter.format_number(
                            x, decimals=d, scientific=s, abbreviate=a
                        )
                    )
                elif fmt_type == "ratio":
                    decimals = fmt.get("decimals", 2)
                    formatted_df[col] = formatted_df[col].apply(
                        lambda x, d=decimals: self.number_formatter.format_ratio(x, decimals=d)
                    )
                elif fmt_type == "traffic_light":
                    thresholds = fmt.get("thresholds", {})
                    formatted_df[col] = formatted_df[col].apply(
                        lambda x, t=thresholds: self.color_coder.traffic_light(x, t)
                    )
                elif fmt_type == "heatmap":
                    min_val = fmt.get("min", formatted_df[col].min())
                    max_val = fmt.get("max", formatted_df[col].max())
                    formatted_df[col] = formatted_df[col].apply(
                        lambda x, mi=min_val, ma=max_val: self.color_coder.heatmap(x, mi, ma)
                    )

        return formatted_df



[docs]
    def add_totals_row(
        self,
        df: pd.DataFrame,
        columns: Optional[List[str]] = None,
        label: str = "Total",
        operation: Literal["sum", "mean", "median"] = "sum",
    ) -> pd.DataFrame:
        """Add a totals row to DataFrame.

        Args:
            df: Input DataFrame.
            columns: Columns to total (None for all numeric).
            label: Label for totals row.
            operation: Aggregation operation.

        Returns:
            DataFrame with totals row added.
        """
        df_with_totals = df.copy()

        if columns is None:
            columns = df.select_dtypes(include=[np.number]).columns.tolist()

        totals_row = {}
        for col in df.columns:
            if col in columns:
                if operation == "sum":
                    totals_row[col] = df[col].sum()
                elif operation == "mean":
                    totals_row[col] = df[col].mean()
                elif operation == "median":
                    totals_row[col] = df[col].median()
            else:
                totals_row[col] = label if df.columns.get_loc(col) == 0 else ""

        df_with_totals = pd.concat(
            [df_with_totals, pd.DataFrame([totals_row])],
            ignore_index=True,
        )

        return df_with_totals



[docs]
    def add_footnotes(
        self,
        table_str: str,
        footnotes: List[str],
        output_format: Optional[str] = None,
    ) -> str:
        """Add footnotes to a table string.

        Args:
            table_str: Table string.
            footnotes: List of footnote texts.
            output_format: Output format (uses instance format if None).

        Returns:
            Table with footnotes added.
        """
        output_format = output_format or self.output_format

        if not footnotes:
            return table_str

        footnote_str = ""
        if output_format == "html":
            footnote_str = "<div class='footnotes'><small>"
            for i, note in enumerate(footnotes, 1):
                footnote_str += f"<sup>{i}</sup> {note}<br/>"
            footnote_str += "</small></div>"
        elif output_format == "latex":
            for i, note in enumerate(footnotes, 1):
                footnote_str += f"\\footnote{{{note}}}"
        else:
            footnote_str = "\n" + "-" * 40 + "\n"
            for i, note in enumerate(footnotes, 1):
                footnote_str += f"[{i}] {note}\n"

        return table_str + "\n" + footnote_str





[docs]
def format_for_export(
    df: pd.DataFrame,
    export_format: Literal["csv", "excel", "latex", "html", "markdown"],
    include_index: bool = False,
    **kwargs,
) -> Union[str, None]:
    """Format DataFrame for export to various formats.

    Args:
        df: DataFrame to export.
        export_format: Export format.
        include_index: Whether to include row index.
        **kwargs: Additional format-specific arguments.

    Returns:
        Formatted string or None for file-based exports.

    Examples:
        >>> df = pd.DataFrame({'A': [1, 2], 'B': [3, 4]})
        >>> csv_str = format_for_export(df, 'csv')
        >>> latex_str = format_for_export(df, 'latex', caption='My Table')
    """
    if export_format == "csv":
        return str(df.to_csv(index=include_index, **kwargs))
    if export_format == "excel":
        # Excel export requires file path
        file_path = kwargs.get("file_path")
        if file_path:
            df.to_excel(file_path, index=include_index, **kwargs)
        return None
    if export_format == "latex":
        caption = kwargs.pop("caption", None)
        label = kwargs.pop("label", None)
        latex_str = df.to_latex(index=include_index, **kwargs)

        if caption or label:
            # Wrap in table environment
            table_str = "\\begin{table}[htbp]\n\\centering\n"
            if caption:
                table_str += f"\\caption{{{caption}}}\n"
            if label:
                table_str += f"\\label{{{label}}}\n"
            table_str += latex_str
            table_str += "\\end{table}"
            return str(table_str)
        return str(latex_str)
    if export_format == "html":
        table_id = kwargs.pop("table_id", None)
        classes = kwargs.pop("classes", None)
        html_str = df.to_html(index=include_index, **kwargs)

        if table_id:
            html_str = html_str.replace("<table", f'<table id="{table_id}"')
        if classes:
            html_str = html_str.replace("<table", f'<table class="{classes}"')

        return str(html_str)
    if export_format == "markdown":
        return str(df.to_markdown(index=include_index, **kwargs))
    raise ValueError(f"Unsupported format: {export_format}")