Source code for scitex_stats.tests.nonparametric._test_brunner_munzel

#!/usr/bin/env python3
# Timestamp: "2025-10-01 22:40:43 (ywatanabe)"
# File: scitex_stats/tests/nonparametric/_test_brunner_munzel.py

r"""Brunner-Munzel test (non-parametric alternative to t-test).

Functionalities:
  - Perform Brunner-Munzel test (non-parametric alternative to t-test)
  - Compute both P(X>Y) and Cliff's delta effect sizes
  - Generate visualizations with significance indicators
  - Support flexible output formats (dict or DataFrame)

Dependencies:
  - packages: numpy, pandas, scipy, matplotlib

IO:
  - input: Two samples (arrays or Series)
  - output: Test results (dict or DataFrame) and optional figure
"""

from __future__ import annotations

import os
from typing import Literal, Optional, Union

import matplotlib
import matplotlib.axes
import numpy as np
import pandas as pd
from scipy import stats

import matplotlib.pyplot as _mpl_plt  # noqa: E402
from scitex_stats._logging import getLogger
from scitex_stats._utils._formatters import fmt_stat, fmt_sym

__FILE__ = __file__
__DIR__ = os.path.dirname(__FILE__)

logger = getLogger(__name__)



[docs]
def test_brunner_munzel(
    x: Union[np.ndarray, pd.Series, str],
    y: Union[np.ndarray, pd.Series, str],
    var_x: str = "x",
    var_y: str = "y",
    alternative: Literal["two-sided", "greater", "less"] = "two-sided",
    alpha: float = 0.05,
    plot: bool = False,
    ax: Optional[matplotlib.axes.Axes] = None,
    data: Union[pd.DataFrame, str, None] = None,
    return_as: Literal["dict", "dataframe"] = "dict",
    verbose: bool = False,
) -> Union[dict, pd.DataFrame]:
    r"""
    Perform Brunner-Munzel test (non-parametric).

    Parameters
    ----------
    x : array or Series
        First sample
    y : array or Series
        Second sample
    var_x : str, default 'x'
        Label for first sample
    var_y : str, default 'y'
        Label for second sample
    alternative : {'two-sided', 'greater', 'less'}, default 'two-sided'
        Alternative hypothesis:
        - 'two-sided': distributions differ
        - 'greater': x tends to be greater than y
        - 'less': x tends to be less than y
    alpha : float, default 0.05
        Significance level
    plot : bool, default False
        Whether to generate visualization
    ax : matplotlib.axes.Axes, optional
        Axes object to plot on. If None and plot=True, creates new figure.
        If provided, automatically enables plotting.
    data : DataFrame, str, or None, optional
        DataFrame or CSV path. When provided, string values for x/y
        are resolved as column names (seaborn-style).
    return_as : {'dict', 'dataframe'}, default 'dict'
        Output format
    verbose : bool, default False
        Whether to print test results

    Returns
    -------
    results : dict or DataFrame
        Test results including:
        - test_method: 'Brunner-Munzel test'
        - statistic_name: 'W'
        - statistic: W-statistic value
        - pvalue: p-value
        - stars: Significance stars
        - rejected: Whether null hypothesis is rejected
        - effect_size: P(X > Y) (primary effect size)
        - effect_size_metric: 'P(X>Y)'
        - effect_size_interpretation: Interpretation of P(X>Y)
        - effect_size_secondary: Cliff's delta (secondary effect size)
        - effect_size_secondary_metric: "Cliff's delta"
        - effect_size_secondary_interpretation: Interpretation of delta
        - n_x, n_y: Sample sizes
        - var_x, var_y: Variable labels
        - H0: Null hypothesis description

    Notes
    -----
    The Brunner-Munzel test is a non-parametric test for comparing two independent
    samples. It is more robust than the t-test when:
    - Distributions are non-normal
    - Variances are unequal
    - Sample sizes differ
    - Data contain outliers

    Unlike Mann-Whitney U test, Brunner-Munzel does not assume equal variances
    and provides better control of Type I error rate.

    The test statistic W is approximately t-distributed:

    .. math::
        W = \frac{\hat{p} - 0.5}{\sqrt{\hat{\sigma}^2}}

    where :math:`\hat{p}` is an estimate of P(X > Y).

    **Effect Sizes:**

    1. **P(X > Y)**: Probability that a random value from X exceeds a random
       value from Y. Interpretation:
       - 0.50: No effect (chance)
       - 0.56: Small effect
       - 0.64: Medium effect
       - 0.71: Large effect

    2. **Cliff's delta (δ)**: Ranges from -1 to 1, related to P(X>Y) by:
       δ = 2×P(X>Y) - 1. Interpretation:
       - |δ| < 0.147: Negligible
       - |δ| < 0.33: Small
       - |δ| < 0.474: Medium
       - |δ| ≥ 0.474: Large

    References
    ----------
    .. [1] Brunner, E., & Munzel, U. (2000). "The nonparametric Behrens-Fisher
           problem: Asymptotic theory and a small-sample approximation".
           Biometrical Journal, 42(1), 17-25.
    .. [2] Neubert, K., & Brunner, E. (2007). "A studentized permutation test
           for the non-parametric Behrens-Fisher problem". Computational
           Statistics & Data Analysis, 51(10), 5192-5204.

    Examples
    --------
    >>> x = np.array([1, 2, 3, 4, 5])
    >>> y = np.array([2, 3, 4, 5, 6])
    >>> result = test_brunner_munzel(x, y)
    >>> result['pvalue']
    0.109...
    >>> result['effect_size']  # P(X > Y)
    0.2
    >>> result['effect_size_secondary']  # Cliff's delta
    -0.6

    >>> # With auto-created figure
    >>> result = test_brunner_munzel(x, y, plot=True)

    >>> # Plot on existing axes
    >>> fig, ax = plt.subplots()
    >>> result = test_brunner_munzel(x, y, ax=ax)

    >>> # As DataFrame
    >>> df = test_brunner_munzel(x, y, return_as='dataframe')
    """
    # Resolve column names from DataFrame (seaborn-style data= parameter)
    if data is not None:
        from scitex_stats._utils._csv_support import resolve_columns

        resolved = resolve_columns(data, x=x, y=y)
        x, y = resolved["x"], resolved["y"]

    from scitex_stats._utils._effect_size import (
        cliffs_delta,
        interpret_cliffs_delta,
        interpret_prob_superiority,
        prob_superiority,
    )
    from scitex_stats._utils._formatters import p2stars
    from scitex_stats._utils._normalizers import force_dataframe

    # Convert to numpy arrays and remove NaN
    x = np.asarray(x)
    y = np.asarray(y)
    x = x[~np.isnan(x)]
    y = y[~np.isnan(y)]

    n_x = len(x)
    n_y = len(y)

    # Perform Brunner-Munzel test
    bm_result = stats.brunnermunzel(x, y, alternative=alternative)
    w_stat = float(bm_result.statistic)
    pvalue = float(bm_result.pvalue)

    # Compute effect sizes
    prob_xy = prob_superiority(x, y)
    delta = cliffs_delta(x, y)

    # Interpretations
    prob_interp = interpret_prob_superiority(prob_xy)
    delta_interp = interpret_cliffs_delta(delta)

    # Create null hypothesis description
    if alternative == "two-sided":
        H0 = f"P({var_x} > {var_y}) = 0.5"
    elif alternative == "greater":
        H0 = f"P({var_x} > {var_y}) ≤ 0.5"
    else:  # less
        H0 = f"P({var_x} > {var_y}) ≥ 0.5"

    # Compile results
    result = {
        "test_method": "Brunner-Munzel test",
        "statistic": w_stat,
        "stat_symbol": "BM",
        "alternative": alternative,
        "n_x": n_x,
        "n_y": n_y,
        "var_x": var_x,
        "var_y": var_y,
        "pvalue": pvalue,
        "stars": p2stars(pvalue),
        "alpha": alpha,
        "significant": pvalue < alpha,
        "effect_size": prob_xy,
        "effect_size_metric": "P(X>Y)",
        "effect_size_interpretation": prob_interp,
        "effect_size_secondary": delta,
        "effect_size_secondary_metric": "Cliff's delta",
        "effect_size_secondary_interpretation": delta_interp,
        "H0": H0,
    }

    # Log results if verbose
    if verbose:
        logger.info(
            f"Brunner-Munzel: W = {w_stat:.3f}, p = {pvalue:.4f} {p2stars(pvalue)}"
        )
        logger.info(
            f"P(X>Y) = {prob_xy:.3f} ({prob_interp}), Cliff's δ = {delta:.3f} ({delta_interp})"
        )

    # Auto-enable plotting if ax is provided
    if ax is not None:
        plot = True

    # Generate plot if requested
    if plot:
        if ax is None:
            _fig, ax = _mpl_plt.subplots()
        _plot_brunner_munzel(x, y, var_x, var_y, result, ax)

    # Convert to requested format
    if return_as == "dataframe":
        result = force_dataframe(result)

    return result



def _plot_brunner_munzel(x, y, var_x, var_y, result, ax):
    """Create violin+swarm visualization for Brunner-Munzel test on given axes."""
    from scitex_stats._plot_helpers import (
        significance_bracket,
        stats_text_box,
        violin_swarm,
    )

    violin_swarm(ax, [x, y], [0, 1], [var_x, var_y])
    significance_bracket(ax, 0, 1, result["stars"], [x, y])

    stats_text_box(
        ax,
        [
            fmt_stat("W", result["statistic"]),
            fmt_stat("p", result["pvalue"], fmt=".4f", stars=result["stars"]),
            f"P({fmt_sym('X')}>{fmt_sym('Y')}) = {result['effect_size']:.3f}",
            fmt_stat("delta", result["effect_size_secondary"]),
        ],
    )

    ax.set_title("Brunner-Munzel Test")


# EOF