#!/usr/bin/env python3
# Timestamp: "2025-10-01 22:40:43 (ywatanabe)"
# File: scitex_stats/tests/nonparametric/_test_brunner_munzel.py
r"""Brunner-Munzel test (non-parametric alternative to t-test).
Functionalities:
- Perform Brunner-Munzel test (non-parametric alternative to t-test)
- Compute both P(X>Y) and Cliff's delta effect sizes
- Generate visualizations with significance indicators
- Support flexible output formats (dict or DataFrame)
Dependencies:
- packages: numpy, pandas, scipy, matplotlib
IO:
- input: Two samples (arrays or Series)
- output: Test results (dict or DataFrame) and optional figure
"""
from __future__ import annotations
import os
from typing import Literal, Optional, Union
import matplotlib
import matplotlib.axes
import numpy as np
import pandas as pd
from scipy import stats
import matplotlib.pyplot as _mpl_plt # noqa: E402
from scitex_stats._logging import getLogger
from scitex_stats._utils._formatters import fmt_stat, fmt_sym
__FILE__ = __file__
__DIR__ = os.path.dirname(__FILE__)
logger = getLogger(__name__)
[docs]
def test_brunner_munzel(
x: Union[np.ndarray, pd.Series, str],
y: Union[np.ndarray, pd.Series, str],
var_x: str = "x",
var_y: str = "y",
alternative: Literal["two-sided", "greater", "less"] = "two-sided",
alpha: float = 0.05,
plot: bool = False,
ax: Optional[matplotlib.axes.Axes] = None,
data: Union[pd.DataFrame, str, None] = None,
return_as: Literal["dict", "dataframe"] = "dict",
verbose: bool = False,
) -> Union[dict, pd.DataFrame]:
r"""
Perform Brunner-Munzel test (non-parametric).
Parameters
----------
x : array or Series
First sample
y : array or Series
Second sample
var_x : str, default 'x'
Label for first sample
var_y : str, default 'y'
Label for second sample
alternative : {'two-sided', 'greater', 'less'}, default 'two-sided'
Alternative hypothesis:
- 'two-sided': distributions differ
- 'greater': x tends to be greater than y
- 'less': x tends to be less than y
alpha : float, default 0.05
Significance level
plot : bool, default False
Whether to generate visualization
ax : matplotlib.axes.Axes, optional
Axes object to plot on. If None and plot=True, creates new figure.
If provided, automatically enables plotting.
data : DataFrame, str, or None, optional
DataFrame or CSV path. When provided, string values for x/y
are resolved as column names (seaborn-style).
return_as : {'dict', 'dataframe'}, default 'dict'
Output format
verbose : bool, default False
Whether to print test results
Returns
-------
results : dict or DataFrame
Test results including:
- test_method: 'Brunner-Munzel test'
- statistic_name: 'W'
- statistic: W-statistic value
- pvalue: p-value
- stars: Significance stars
- rejected: Whether null hypothesis is rejected
- effect_size: P(X > Y) (primary effect size)
- effect_size_metric: 'P(X>Y)'
- effect_size_interpretation: Interpretation of P(X>Y)
- effect_size_secondary: Cliff's delta (secondary effect size)
- effect_size_secondary_metric: "Cliff's delta"
- effect_size_secondary_interpretation: Interpretation of delta
- n_x, n_y: Sample sizes
- var_x, var_y: Variable labels
- H0: Null hypothesis description
Notes
-----
The Brunner-Munzel test is a non-parametric test for comparing two independent
samples. It is more robust than the t-test when:
- Distributions are non-normal
- Variances are unequal
- Sample sizes differ
- Data contain outliers
Unlike Mann-Whitney U test, Brunner-Munzel does not assume equal variances
and provides better control of Type I error rate.
The test statistic W is approximately t-distributed:
.. math::
W = \frac{\hat{p} - 0.5}{\sqrt{\hat{\sigma}^2}}
where :math:`\hat{p}` is an estimate of P(X > Y).
**Effect Sizes:**
1. **P(X > Y)**: Probability that a random value from X exceeds a random
value from Y. Interpretation:
- 0.50: No effect (chance)
- 0.56: Small effect
- 0.64: Medium effect
- 0.71: Large effect
2. **Cliff's delta (δ)**: Ranges from -1 to 1, related to P(X>Y) by:
δ = 2×P(X>Y) - 1. Interpretation:
- |δ| < 0.147: Negligible
- |δ| < 0.33: Small
- |δ| < 0.474: Medium
- |δ| ≥ 0.474: Large
References
----------
.. [1] Brunner, E., & Munzel, U. (2000). "The nonparametric Behrens-Fisher
problem: Asymptotic theory and a small-sample approximation".
Biometrical Journal, 42(1), 17-25.
.. [2] Neubert, K., & Brunner, E. (2007). "A studentized permutation test
for the non-parametric Behrens-Fisher problem". Computational
Statistics & Data Analysis, 51(10), 5192-5204.
Examples
--------
>>> x = np.array([1, 2, 3, 4, 5])
>>> y = np.array([2, 3, 4, 5, 6])
>>> result = test_brunner_munzel(x, y)
>>> result['pvalue']
0.109...
>>> result['effect_size'] # P(X > Y)
0.2
>>> result['effect_size_secondary'] # Cliff's delta
-0.6
>>> # With auto-created figure
>>> result = test_brunner_munzel(x, y, plot=True)
>>> # Plot on existing axes
>>> fig, ax = plt.subplots()
>>> result = test_brunner_munzel(x, y, ax=ax)
>>> # As DataFrame
>>> df = test_brunner_munzel(x, y, return_as='dataframe')
"""
# Resolve column names from DataFrame (seaborn-style data= parameter)
if data is not None:
from scitex_stats._utils._csv_support import resolve_columns
resolved = resolve_columns(data, x=x, y=y)
x, y = resolved["x"], resolved["y"]
from scitex_stats._utils._effect_size import (
cliffs_delta,
interpret_cliffs_delta,
interpret_prob_superiority,
prob_superiority,
)
from scitex_stats._utils._formatters import p2stars
from scitex_stats._utils._normalizers import force_dataframe
# Convert to numpy arrays and remove NaN
x = np.asarray(x)
y = np.asarray(y)
x = x[~np.isnan(x)]
y = y[~np.isnan(y)]
n_x = len(x)
n_y = len(y)
# Perform Brunner-Munzel test
bm_result = stats.brunnermunzel(x, y, alternative=alternative)
w_stat = float(bm_result.statistic)
pvalue = float(bm_result.pvalue)
# Compute effect sizes
prob_xy = prob_superiority(x, y)
delta = cliffs_delta(x, y)
# Interpretations
prob_interp = interpret_prob_superiority(prob_xy)
delta_interp = interpret_cliffs_delta(delta)
# Create null hypothesis description
if alternative == "two-sided":
H0 = f"P({var_x} > {var_y}) = 0.5"
elif alternative == "greater":
H0 = f"P({var_x} > {var_y}) ≤ 0.5"
else: # less
H0 = f"P({var_x} > {var_y}) ≥ 0.5"
# Compile results
result = {
"test_method": "Brunner-Munzel test",
"statistic": w_stat,
"stat_symbol": "BM",
"alternative": alternative,
"n_x": n_x,
"n_y": n_y,
"var_x": var_x,
"var_y": var_y,
"pvalue": pvalue,
"stars": p2stars(pvalue),
"alpha": alpha,
"significant": pvalue < alpha,
"effect_size": prob_xy,
"effect_size_metric": "P(X>Y)",
"effect_size_interpretation": prob_interp,
"effect_size_secondary": delta,
"effect_size_secondary_metric": "Cliff's delta",
"effect_size_secondary_interpretation": delta_interp,
"H0": H0,
}
# Log results if verbose
if verbose:
logger.info(
f"Brunner-Munzel: W = {w_stat:.3f}, p = {pvalue:.4f} {p2stars(pvalue)}"
)
logger.info(
f"P(X>Y) = {prob_xy:.3f} ({prob_interp}), Cliff's δ = {delta:.3f} ({delta_interp})"
)
# Auto-enable plotting if ax is provided
if ax is not None:
plot = True
# Generate plot if requested
if plot:
if ax is None:
_fig, ax = _mpl_plt.subplots()
_plot_brunner_munzel(x, y, var_x, var_y, result, ax)
# Convert to requested format
if return_as == "dataframe":
result = force_dataframe(result)
return result
def _plot_brunner_munzel(x, y, var_x, var_y, result, ax):
"""Create violin+swarm visualization for Brunner-Munzel test on given axes."""
from scitex_stats._plot_helpers import (
significance_bracket,
stats_text_box,
violin_swarm,
)
violin_swarm(ax, [x, y], [0, 1], [var_x, var_y])
significance_bracket(ax, 0, 1, result["stars"], [x, y])
stats_text_box(
ax,
[
fmt_stat("W", result["statistic"]),
fmt_stat("p", result["pvalue"], fmt=".4f", stars=result["stars"]),
f"P({fmt_sym('X')}>{fmt_sym('Y')}) = {result['effect_size']:.3f}",
fmt_stat("delta", result["effect_size_secondary"]),
],
)
ax.set_title("Brunner-Munzel Test")
# EOF