#!/usr/bin/env python3
# Timestamp: 2025-12-20
# File: /home/ywatanabe/proj/scitex-code/src/scitex/fts/_stats/_dataclasses/_Stats.py
"""
Stats - Statistical analysis results with provenance and GUI support.
This module provides the single source of truth for statistical result schemas,
supporting both:
1. Bundle storage (data provenance, test results)
2. GUI annotation (positioning, styling for interactive editing)
"""
import json
from dataclasses import asdict, dataclass, field
from datetime import datetime
from typing import Any, Dict, List, Literal, Optional, Union
STATS_VERSION = "1.1.0"
# Type aliases for GUI support
PositionMode = Literal["absolute", "relative_to_plot", "above_whisker", "auto"]
UnitType = Literal["mm", "px", "inch", "data"]
SymbolStyle = Literal[
"asterisk", "text", "bracket", "compact", "detailed", "publication"
]
@dataclass
class DataRef:
"""Reference to data used in analysis."""
path: str
columns: Optional[List[str]] = None
filter: Optional[str] = None
def to_dict(self) -> Dict[str, Any]:
result = {"path": self.path}
if self.columns:
result["columns"] = self.columns
if self.filter:
result["filter"] = self.filter
return result
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "DataRef":
return cls(
path=data.get("path", ""),
columns=data.get("columns"),
filter=data.get("filter"),
)
# =============================================================================
# GUI Position and Styling Classes (merged from scitex.schema._stats)
# =============================================================================
@dataclass
class Position:
"""
Position specification with unit support for GUI integration.
Supports multiple coordinate systems for flexibility across
matplotlib (mm), Fabric.js (px), and data coordinates.
"""
x: float
y: float
unit: UnitType = "mm"
relative_to: Optional[str] = None # Plot ID or "axes"
offset: Optional[Dict[str, float]] = None # {"dx": 0, "dy": 0}
def to_dict(self) -> Dict[str, Any]:
return {
"x": self.x,
"y": self.y,
"unit": self.unit,
"relative_to": self.relative_to,
"offset": self.offset,
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "Position":
return cls(**data)
def to_mm(self, dpi: float = 300.0) -> "Position":
"""Convert position to mm (for matplotlib)."""
if self.unit == "mm":
return self
elif self.unit == "px":
mm_per_px = 25.4 / dpi
return Position(
x=self.x * mm_per_px,
y=self.y * mm_per_px,
unit="mm",
relative_to=self.relative_to,
offset=self.offset,
)
elif self.unit == "inch":
return Position(
x=self.x * 25.4,
y=self.y * 25.4,
unit="mm",
relative_to=self.relative_to,
offset=self.offset,
)
return self
def to_px(self, dpi: float = 300.0) -> "Position":
"""Convert position to px (for Fabric.js canvas)."""
mm_pos = self.to_mm(dpi)
px_per_mm = dpi / 25.4
return Position(
x=mm_pos.x * px_per_mm,
y=mm_pos.y * px_per_mm,
unit="px",
relative_to=self.relative_to,
offset=self.offset,
)
@dataclass
class StatStyling:
"""Styling configuration for statistical annotation display."""
font_size_pt: float = 7.0
font_family: str = "Arial"
color: str = "#000000"
symbol_style: SymbolStyle = "asterisk"
line_width_mm: Optional[float] = None
bracket_height_mm: Optional[float] = None
theme: Literal["light", "dark", "auto"] = "auto"
def to_dict(self) -> Dict[str, Any]:
return asdict(self)
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "StatStyling":
return cls(**data)
def get_theme_color(self, is_dark: bool = False) -> str:
"""Get appropriate color for theme."""
if self.theme == "auto":
return "#ffffff" if is_dark else "#000000"
elif self.theme == "dark":
return "#ffffff"
return "#000000"
@dataclass
class StatPositioning:
"""Position configuration for GUI-ready annotation placement."""
mode: PositionMode = "auto"
position: Optional[Position] = None
preferred_corner: Optional[str] = None # "top-right", "bottom-left", etc.
avoid_overlap: bool = True
min_distance_mm: float = 2.0
anchor_to: Optional[str] = None # "plot_center", "whisker_top", etc.
def to_dict(self) -> Dict[str, Any]:
return {
"mode": self.mode,
"position": self.position.to_dict() if self.position else None,
"preferred_corner": self.preferred_corner,
"avoid_overlap": self.avoid_overlap,
"min_distance_mm": self.min_distance_mm,
"anchor_to": self.anchor_to,
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "StatPositioning":
data_copy = data.copy()
if data_copy.get("position"):
data_copy["position"] = Position.from_dict(data_copy["position"])
return cls(**data_copy)
@dataclass
class EffectSize:
"""Effect size with confidence interval."""
name: str # cohens_d, hedges_g, eta_squared, r_squared, etc.
value: float
ci_lower: Optional[float] = None
ci_upper: Optional[float] = None
ci_level: float = 0.95
def to_dict(self) -> Dict[str, Any]:
result = {"name": self.name, "value": self.value}
if self.ci_lower is not None:
result["ci_lower"] = self.ci_lower
if self.ci_upper is not None:
result["ci_upper"] = self.ci_upper
if self.ci_level != 0.95:
result["ci_level"] = self.ci_level
return result
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "EffectSize":
return cls(
name=data.get("name", ""),
value=data.get("value", 0.0),
ci_lower=data.get("ci_lower"),
ci_upper=data.get("ci_upper"),
ci_level=data.get("ci_level", 0.95),
)
@dataclass
class StatMethod:
"""Statistical method specification."""
name: str # t-test, anova, chi-squared, correlation, etc.
variant: Optional[str] = None # independent, paired, one-way, etc.
parameters: Dict[str, Any] = field(default_factory=dict)
def to_dict(self) -> Dict[str, Any]:
result = {"name": self.name}
if self.variant:
result["variant"] = self.variant
if self.parameters:
result["parameters"] = self.parameters
return result
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "StatMethod":
return cls(
name=data.get("name", ""),
variant=data.get("variant"),
parameters=data.get("parameters", {}),
)
@dataclass
class StatResult:
"""Statistical test result."""
statistic: float
statistic_name: str # t, F, chi2, r, etc.
p_value: float
df: Optional[float] = None
effect_size: Optional[EffectSize] = None
significant: Optional[bool] = None
alpha: float = 0.05
def to_dict(self) -> Dict[str, Any]:
result = {
"statistic": self.statistic,
"statistic_name": self.statistic_name,
"p_value": self.p_value,
}
if self.df is not None:
result["df"] = self.df
if self.effect_size:
result["effect_size"] = self.effect_size.to_dict()
if self.significant is not None:
result["significant"] = self.significant
if self.alpha != 0.05:
result["alpha"] = self.alpha
return result
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "StatResult":
effect_size = None
if "effect_size" in data:
effect_size = EffectSize.from_dict(data["effect_size"])
return cls(
statistic=data.get("statistic", 0.0),
statistic_name=data.get("statistic_name", ""),
p_value=data.get("p_value", 1.0),
df=data.get("df"),
effect_size=effect_size,
significant=data.get("significant"),
alpha=data.get("alpha", 0.05),
)
@dataclass
class StatDisplay:
"""How to display the statistical result (with GUI support)."""
show_stars: bool = True
show_p_value: bool = True
show_effect_size: bool = False
bracket_groups: Optional[List[str]] = None
# Enhanced GUI support
positioning: Optional[StatPositioning] = None
styling: Optional[StatStyling] = None
plot_id: Optional[str] = None # Associated plot in TrackingMixin
def to_dict(self) -> Dict[str, Any]:
result = {
"show_stars": self.show_stars,
"show_p_value": self.show_p_value,
"show_effect_size": self.show_effect_size,
}
if self.bracket_groups:
result["bracket_groups"] = self.bracket_groups
if self.positioning:
result["positioning"] = self.positioning.to_dict()
if self.styling:
result["styling"] = self.styling.to_dict()
if self.plot_id:
result["plot_id"] = self.plot_id
return result
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "StatDisplay":
positioning = None
if "positioning" in data and data["positioning"]:
positioning = StatPositioning.from_dict(data["positioning"])
styling = None
if "styling" in data and data["styling"]:
styling = StatStyling.from_dict(data["styling"])
return cls(
show_stars=data.get("show_stars", True),
show_p_value=data.get("show_p_value", True),
show_effect_size=data.get("show_effect_size", False),
bracket_groups=data.get("bracket_groups"),
positioning=positioning,
styling=styling,
plot_id=data.get("plot_id"),
)
@dataclass
class Analysis:
"""Complete analysis record with provenance."""
result_id: str
method: StatMethod
results: StatResult
data_refs: List[DataRef] = field(default_factory=list)
inputs: Dict[str, Any] = field(default_factory=dict)
display: Optional[StatDisplay] = None
def to_dict(self) -> Dict[str, Any]:
result = {
"result_id": self.result_id,
"method": self.method.to_dict(),
"results": self.results.to_dict(),
}
if self.data_refs:
result["data_refs"] = [d.to_dict() for d in self.data_refs]
if self.inputs:
result["inputs"] = self.inputs
if self.display:
result["display"] = self.display.to_dict()
return result
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "Analysis":
display = None
if "display" in data:
display = StatDisplay.from_dict(data["display"])
return cls(
result_id=data.get("result_id", ""),
method=StatMethod.from_dict(data.get("method", {})),
results=StatResult.from_dict(data.get("results", {})),
data_refs=[DataRef.from_dict(d) for d in data.get("data_refs", [])],
inputs=data.get("inputs", {}),
display=display,
)
[docs]
@dataclass
class Stats:
"""Complete statistics specification for a bundle.
Stored in stats/stats.json.
"""
analyses: List[Analysis] = field(default_factory=list)
software: Dict[str, str] = field(default_factory=dict)
# Schema metadata
schema_name: str = "fsb.stats"
schema_version: str = STATS_VERSION
[docs]
def to_dict(self) -> Dict[str, Any]:
result = {"analyses": [a.to_dict() for a in self.analyses]}
if self.software:
result["software"] = self.software
return result
[docs]
def to_json(self, indent: int = 2) -> str:
return json.dumps(self.to_dict(), indent=indent)
[docs]
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "Stats":
return cls(
analyses=[Analysis.from_dict(a) for a in data.get("analyses", [])],
software=data.get("software", {}),
)
[docs]
@classmethod
def from_json(cls, json_str: str) -> "Stats":
return cls.from_dict(json.loads(json_str))
__all__ = [
# Version
"STATS_VERSION",
# Type aliases
"PositionMode",
"UnitType",
"SymbolStyle",
# Data reference
"DataRef",
# GUI classes (merged from scitex.schema._stats)
"Position",
"StatStyling",
"StatPositioning",
# Core stats classes
"EffectSize",
"StatMethod",
"StatResult",
"StatDisplay",
"Analysis",
"Stats",
]
# EOF