Source code for ergodic_insurance.reporting.formatters
"""Formatting utilities for table generation and report creation.
This module provides comprehensive formatting functions for numbers, currency,
percentages, and color coding for tables in various output formats.
Google-style docstrings are used throughout for consistency.
"""
from decimal import Decimal
from typing import Any, Dict, List, Literal, Optional, Tuple, Union
import numpy as np
import pandas as pd
[docs]
class NumberFormatter:
"""Format numbers for display in tables and reports.
This class provides methods to format various numeric types including
currency, percentages, and scientific notation with consistent precision
and locale-aware formatting.
Attributes:
currency_symbol: Symbol to use for currency formatting.
decimal_places: Default number of decimal places.
thousands_separator: Character for thousands separation.
decimal_separator: Character for decimal separation.
"""
def __init__(
self,
currency_symbol: str = "$",
decimal_places: int = 2,
thousands_separator: str = ",",
decimal_separator: str = ".",
):
"""Initialize NumberFormatter.
Args:
currency_symbol: Symbol for currency (default "$").
decimal_places: Default decimal precision (default 2).
thousands_separator: Thousands separator (default ",").
decimal_separator: Decimal separator (default ".").
"""
self.currency_symbol = currency_symbol
self.decimal_places = decimal_places
self.thousands_separator = thousands_separator
self.decimal_separator = decimal_separator
[docs]
def format_currency(
self,
value: Union[float, int, Decimal],
decimals: Optional[int] = None,
abbreviate: bool = False,
) -> str:
"""Format a number as currency.
Args:
value: Numeric value to format.
decimals: Number of decimal places (uses default if None).
abbreviate: Whether to abbreviate large numbers (e.g., $1.5M).
Returns:
Formatted currency string.
Examples:
>>> formatter = NumberFormatter()
>>> formatter.format_currency(1234567.89)
'$1,234,567.89'
>>> formatter.format_currency(1234567.89, abbreviate=True)
'$1.23M'
"""
# Handle None and NaN values
if value is None or (isinstance(value, float) and np.isnan(value)):
return "-"
decimals = decimals if decimals is not None else self.decimal_places
# Boundary: float for display formatting
float_value = float(value)
abs_value = abs(float_value)
if abbreviate and abs_value >= 1_000_000_000:
return f"{self.currency_symbol}{value/1_000_000_000:.{decimals}f}B"
if abbreviate and abs_value >= 1_000_000:
return f"{self.currency_symbol}{value/1_000_000:.{decimals}f}M"
if abbreviate and abs_value >= 1_000:
return f"{self.currency_symbol}{value/1_000:.{decimals}f}K"
# Format with thousands separator
formatted = f"{value:,.{decimals}f}"
# Replace default separators with configured ones
if self.thousands_separator != ",":
formatted = formatted.replace(",", self.thousands_separator)
if self.decimal_separator != ".":
formatted = formatted.replace(".", self.decimal_separator)
return f"{self.currency_symbol}{formatted}"
[docs]
def format_percentage(
self,
value: Union[float, int],
decimals: Optional[int] = None,
multiply_by_100: bool = True,
) -> str:
"""Format a number as percentage.
Args:
value: Numeric value to format.
decimals: Number of decimal places (default 1).
multiply_by_100: Whether to multiply by 100 (default True).
Returns:
Formatted percentage string.
Examples:
>>> formatter = NumberFormatter()
>>> formatter.format_percentage(0.1234)
'12.34%'
>>> formatter.format_percentage(12.34, multiply_by_100=False)
'12.34%'
"""
# Handle None and NaN values
if value is None or (isinstance(value, float) and np.isnan(value)):
return "-"
decimals = decimals if decimals is not None else 2
if multiply_by_100:
value = value * 100
return f"{value:.{decimals}f}%"
[docs]
def format_number(
self,
value: Union[float, int],
decimals: Optional[int] = None,
scientific: bool = False,
abbreviate: bool = False,
) -> str:
"""Format a general number.
Args:
value: Numeric value to format.
decimals: Number of decimal places.
scientific: Use scientific notation for large/small numbers.
abbreviate: Abbreviate large numbers (K, M, B).
Returns:
Formatted number string.
Examples:
>>> formatter = NumberFormatter()
>>> formatter.format_number(1234567.89)
'1,234,567.89'
>>> formatter.format_number(0.00001234, scientific=True)
'1.23e-05'
"""
# Handle None and NaN values
if value is None or (isinstance(value, float) and np.isnan(value)):
return "-"
decimals = decimals if decimals is not None else self.decimal_places
if scientific and (abs(value) >= 1e6 or (abs(value) < 1e-3 and value != 0)):
return f"{value:.{decimals}e}"
if abbreviate and abs(value) >= 1_000_000_000:
return f"{value/1_000_000_000:.{decimals}f}B"
if abbreviate and abs(value) >= 1_000_000:
return f"{value/1_000_000:.{decimals}f}M"
if abbreviate and abs(value) >= 1_000:
return f"{value/1_000:.{decimals}f}K"
formatted = f"{value:,.{decimals}f}"
if self.thousands_separator != ",":
formatted = formatted.replace(",", self.thousands_separator)
if self.decimal_separator != ".":
formatted = formatted.replace(".", self.decimal_separator)
return formatted
[docs]
def format_ratio(self, value: Union[float, int], decimals: int = 2) -> str:
"""Format a ratio value.
Args:
value: Ratio value to format.
decimals: Number of decimal places.
Returns:
Formatted ratio string.
Examples:
>>> formatter = NumberFormatter()
>>> formatter.format_ratio(1.5)
'1.50x'
"""
# Handle None and NaN values
if value is None or (isinstance(value, float) and np.isnan(value)):
return "-"
return f"{value:.{decimals}f}x"
[docs]
class ColorCoder:
"""Apply color coding to values for visual indicators.
This class provides methods for traffic light coloring, heatmaps,
and threshold-based coloring for different output formats.
Attributes:
output_format: Target output format (html, latex, terminal).
color_scheme: Color scheme to use.
"""
# Default color schemes
TRAFFIC_LIGHT = {
"good": "#28a745", # Green
"warning": "#ffc107", # Yellow/Amber
"bad": "#dc3545", # Red
}
HEATMAP_COLORS = {
"low": "#e3f2fd", # Light blue
"medium_low": "#90caf9", # Medium blue
"medium": "#42a5f5", # Blue
"medium_high": "#ffb74d", # Orange
"high": "#ef5350", # Red
}
def __init__(
self,
output_format: Literal["html", "latex", "terminal", "none"] = "none",
color_scheme: Optional[Dict[str, str]] = None,
):
"""Initialize ColorCoder.
Args:
output_format: Target output format.
color_scheme: Custom color scheme (uses defaults if None).
"""
self.output_format = output_format
self.color_scheme = color_scheme or self.TRAFFIC_LIGHT
[docs]
def traffic_light(
self,
value: Union[float, int],
thresholds: Dict[str, Tuple[Optional[float], Optional[float]]],
text: Optional[str] = None,
) -> str:
"""Apply traffic light coloring based on thresholds.
Args:
value: Numeric value to evaluate.
thresholds: Dict with keys 'good', 'warning', 'bad' and (min, max) tuples.
text: Text to display (uses value if None).
Returns:
Formatted string with appropriate coloring.
Examples:
>>> coder = ColorCoder(output_format="html")
>>> thresholds = {
... 'good': (0.15, None),
... 'warning': (0.10, 0.15),
... 'bad': (None, 0.10)
... }
>>> coder.traffic_light(0.18, thresholds)
'<span style="color: #28a745;">0.18</span>'
"""
# Handle None and NaN values
if value is None or (isinstance(value, float) and np.isnan(value)):
return "-"
display_text = text if text is not None else str(value)
# Determine color based on thresholds
color_key = "bad" # Default
for key, (min_val, max_val) in thresholds.items():
if min_val is not None and max_val is not None:
if min_val <= value <= max_val:
color_key = key
break
elif min_val is not None:
if value >= min_val:
color_key = key
break
elif max_val is not None:
if value <= max_val:
color_key = key
break
return self._apply_color(display_text, self.color_scheme.get(color_key, "#000000"))
[docs]
def heatmap(
self,
value: Union[float, int],
min_val: float,
max_val: float,
text: Optional[str] = None,
) -> str:
"""Apply heatmap coloring based on value range.
Args:
value: Numeric value to color.
min_val: Minimum value in range.
max_val: Maximum value in range.
text: Text to display (uses value if None).
Returns:
Formatted string with heatmap coloring.
Examples:
>>> coder = ColorCoder(output_format="html")
>>> coder.heatmap(50, 0, 100)
'<span style="background-color: #42a5f5;">50</span>'
"""
# Handle None and NaN values
if value is None or (isinstance(value, float) and np.isnan(value)):
return "-"
display_text = text if text is not None else str(value)
# Normalize value to 0-1 range
if max_val == min_val:
normalized = 0.5
else:
normalized = (value - min_val) / (max_val - min_val)
normalized = max(0, min(1, normalized)) # Clamp to [0, 1]
# Select color based on normalized value
if normalized < 0.2:
color = self.HEATMAP_COLORS["low"]
elif normalized < 0.4:
color = self.HEATMAP_COLORS["medium_low"]
elif normalized < 0.6:
color = self.HEATMAP_COLORS["medium"]
elif normalized < 0.8:
color = self.HEATMAP_COLORS["medium_high"]
else:
color = self.HEATMAP_COLORS["high"]
return self._apply_color(display_text, color, is_background=True)
[docs]
def threshold_color(
self,
value: Union[float, int],
threshold: float,
above_color: str = "#28a745",
below_color: str = "#dc3545",
text: Optional[str] = None,
) -> str:
"""Apply binary coloring based on threshold.
Args:
value: Numeric value to evaluate.
threshold: Threshold value.
above_color: Color for values above threshold.
below_color: Color for values below threshold.
text: Text to display.
Returns:
Formatted string with threshold-based coloring.
"""
# Handle None and NaN values
if value is None or (isinstance(value, float) and np.isnan(value)):
return "-"
display_text = text if text is not None else str(value)
color = above_color if value >= threshold else below_color
return self._apply_color(display_text, color)
def _apply_color( # pylint: disable=too-many-return-statements
self,
text: str,
color: str,
is_background: bool = False,
) -> str:
"""Apply color formatting based on output format.
Args:
text: Text to format.
color: Color code (hex or name).
is_background: Whether to apply as background color.
Returns:
Formatted text string.
"""
if self.output_format == "html":
if is_background:
return f'<span style="background-color: {color}; padding: 2px 4px;">{text}</span>'
return f'<span style="color: {color};">{text}</span>'
if self.output_format == "latex":
# LaTeX requires color package
if is_background:
return f"\\colorbox{{{color}}}{{{text}}}"
return f"\\textcolor{{{color}}}{{{text}}}"
if self.output_format == "terminal":
# Use Unicode symbols for terminal
if "good" in str(color) or "#28a745" in str(color):
return f"✓ {text}"
if "warning" in str(color) or "#ffc107" in str(color):
return f"⚠ {text}"
if "bad" in str(color) or "#dc3545" in str(color):
return f"✗ {text}"
return text
return text
[docs]
class TableFormatter:
"""High-level table formatting utilities.
This class combines number formatting and color coding to provide
comprehensive table formatting capabilities.
Attributes:
number_formatter: NumberFormatter instance.
color_coder: ColorCoder instance.
"""
def __init__(
self,
output_format: Literal["html", "latex", "terminal", "none"] = "none",
currency_symbol: str = "$",
decimal_places: int = 2,
):
"""Initialize TableFormatter.
Args:
output_format: Target output format.
currency_symbol: Currency symbol to use.
decimal_places: Default decimal precision.
"""
self.number_formatter = NumberFormatter(
currency_symbol=currency_symbol,
decimal_places=decimal_places,
)
self.color_coder = ColorCoder(output_format=output_format)
self.output_format = output_format
[docs]
def format_dataframe(
self,
df: pd.DataFrame,
column_formats: Optional[Dict[str, Dict[str, Any]]] = None,
row_colors: Optional[Dict[int, str]] = None,
alternating_rows: bool = False,
) -> pd.DataFrame:
"""Apply formatting to entire DataFrame.
Args:
df: Input DataFrame.
column_formats: Format specifications per column.
row_colors: Colors for specific rows.
alternating_rows: Whether to use alternating row colors.
Returns:
Formatted DataFrame.
Examples:
>>> formatter = TableFormatter()
>>> formats = {
... 'Revenue': {'type': 'currency', 'abbreviate': True},
... 'Growth': {'type': 'percentage'},
... 'Risk': {'type': 'traffic_light', 'thresholds': {...}}
... }
>>> formatted_df = formatter.format_dataframe(df, formats)
"""
formatted_df = df.copy()
if column_formats:
for col, fmt in column_formats.items():
if col not in formatted_df.columns:
continue
fmt_type = fmt.get("type", "number")
if fmt_type == "currency":
decimals = fmt.get("decimals")
abbreviate = fmt.get("abbreviate", False)
formatted_df[col] = formatted_df[col].apply(
lambda x, d=decimals, a=abbreviate: self.number_formatter.format_currency(
x, decimals=d, abbreviate=a
)
)
elif fmt_type == "percentage":
decimals = fmt.get("decimals")
multiply_by_100 = fmt.get("multiply_by_100", True)
formatted_df[col] = formatted_df[col].apply(
lambda x, d=decimals, m=multiply_by_100: self.number_formatter.format_percentage(
x, decimals=d, multiply_by_100=m
)
)
elif fmt_type == "number":
decimals = fmt.get("decimals")
scientific = fmt.get("scientific", False)
abbreviate = fmt.get("abbreviate", False)
formatted_df[col] = formatted_df[col].apply(
lambda x, d=decimals, s=scientific, a=abbreviate: self.number_formatter.format_number(
x, decimals=d, scientific=s, abbreviate=a
)
)
elif fmt_type == "ratio":
decimals = fmt.get("decimals", 2)
formatted_df[col] = formatted_df[col].apply(
lambda x, d=decimals: self.number_formatter.format_ratio(x, decimals=d)
)
elif fmt_type == "traffic_light":
thresholds = fmt.get("thresholds", {})
formatted_df[col] = formatted_df[col].apply(
lambda x, t=thresholds: self.color_coder.traffic_light(x, t)
)
elif fmt_type == "heatmap":
min_val = fmt.get("min", formatted_df[col].min())
max_val = fmt.get("max", formatted_df[col].max())
formatted_df[col] = formatted_df[col].apply(
lambda x, mi=min_val, ma=max_val: self.color_coder.heatmap(x, mi, ma)
)
return formatted_df
[docs]
def add_totals_row(
self,
df: pd.DataFrame,
columns: Optional[List[str]] = None,
label: str = "Total",
operation: Literal["sum", "mean", "median"] = "sum",
) -> pd.DataFrame:
"""Add a totals row to DataFrame.
Args:
df: Input DataFrame.
columns: Columns to total (None for all numeric).
label: Label for totals row.
operation: Aggregation operation.
Returns:
DataFrame with totals row added.
"""
df_with_totals = df.copy()
if columns is None:
columns = df.select_dtypes(include=[np.number]).columns.tolist()
totals_row = {}
for col in df.columns:
if col in columns:
if operation == "sum":
totals_row[col] = df[col].sum()
elif operation == "mean":
totals_row[col] = df[col].mean()
elif operation == "median":
totals_row[col] = df[col].median()
else:
totals_row[col] = label if df.columns.get_loc(col) == 0 else ""
df_with_totals = pd.concat(
[df_with_totals, pd.DataFrame([totals_row])],
ignore_index=True,
)
return df_with_totals
[docs]
def add_footnotes(
self,
table_str: str,
footnotes: List[str],
output_format: Optional[str] = None,
) -> str:
"""Add footnotes to a table string.
Args:
table_str: Table string.
footnotes: List of footnote texts.
output_format: Output format (uses instance format if None).
Returns:
Table with footnotes added.
"""
output_format = output_format or self.output_format
if not footnotes:
return table_str
footnote_str = ""
if output_format == "html":
footnote_str = "<div class='footnotes'><small>"
for i, note in enumerate(footnotes, 1):
footnote_str += f"<sup>{i}</sup> {note}<br/>"
footnote_str += "</small></div>"
elif output_format == "latex":
for i, note in enumerate(footnotes, 1):
footnote_str += f"\\footnote{{{note}}}"
else:
footnote_str = "\n" + "-" * 40 + "\n"
for i, note in enumerate(footnotes, 1):
footnote_str += f"[{i}] {note}\n"
return table_str + "\n" + footnote_str
[docs]
def format_for_export(
df: pd.DataFrame,
export_format: Literal["csv", "excel", "latex", "html", "markdown"],
include_index: bool = False,
**kwargs,
) -> Union[str, None]:
"""Format DataFrame for export to various formats.
Args:
df: DataFrame to export.
export_format: Export format.
include_index: Whether to include row index.
**kwargs: Additional format-specific arguments.
Returns:
Formatted string or None for file-based exports.
Examples:
>>> df = pd.DataFrame({'A': [1, 2], 'B': [3, 4]})
>>> csv_str = format_for_export(df, 'csv')
>>> latex_str = format_for_export(df, 'latex', caption='My Table')
"""
if export_format == "csv":
return str(df.to_csv(index=include_index, **kwargs))
if export_format == "excel":
# Excel export requires file path
file_path = kwargs.get("file_path")
if file_path:
df.to_excel(file_path, index=include_index, **kwargs)
return None
if export_format == "latex":
caption = kwargs.pop("caption", None)
label = kwargs.pop("label", None)
latex_str = df.to_latex(index=include_index, **kwargs)
if caption or label:
# Wrap in table environment
table_str = "\\begin{table}[htbp]\n\\centering\n"
if caption:
table_str += f"\\caption{{{caption}}}\n"
if label:
table_str += f"\\label{{{label}}}\n"
table_str += latex_str
table_str += "\\end{table}"
return str(table_str)
return str(latex_str)
if export_format == "html":
table_id = kwargs.pop("table_id", None)
classes = kwargs.pop("classes", None)
html_str = df.to_html(index=include_index, **kwargs)
if table_id:
html_str = html_str.replace("<table", f'<table id="{table_id}"')
if classes:
html_str = html_str.replace("<table", f'<table class="{classes}"')
return str(html_str)
if export_format == "markdown":
return str(df.to_markdown(index=include_index, **kwargs))
raise ValueError(f"Unsupported format: {export_format}")