Source code for scitex_stats.tests.categorical._test_mcnemar

#!/usr/bin/env python3
# Timestamp: "2025-10-01 16:30:00 (ywatanabe)"
# File: scitex_stats/tests/categorical/_test_mcnemar.py
# ----------------------------------------

r"""McNemar's test for paired categorical data.

Functionalities:
  - McNemar's test for paired categorical data
  - Test for marginal homogeneity in 2×2 contingency tables
  - Compute odds ratio for matched pairs
  - Generate visualizations for paired outcomes

Dependencies:
  - packages: numpy, pandas, scipy, matplotlib

IO:
  - input: 2×2 contingency table (array or DataFrame)
  - output: Test results (dict or DataFrame) and optional figure
"""

from __future__ import annotations

import os
from typing import Literal, Optional, Union

import matplotlib.axes
import numpy as np
import pandas as pd
import matplotlib.pyplot as _mpl_plt  # noqa: E402
from scipy import stats

from scitex_stats._logging import getLogger
from scitex_stats._utils._formatters import p2stars
from scitex_stats._utils._normalizers import convert_results, force_dataframe

__FILE__ = __file__
__DIR__ = os.path.dirname(__FILE__)

logger = getLogger(__name__)


def mcnemar_odds_ratio(b: int, c: int) -> float:
    """
    Compute odds ratio for McNemar's test from discordant pairs.

    Parameters
    ----------
    b : int
        Count in cell [0,1] (success before, failure after)
    c : int
        Count in cell [1,0] (failure before, success after)

    Returns
    -------
    or_val : float
        Odds ratio = b / c

    Notes
    -----
    For McNemar's test, OR = b/c where b and c are the discordant pairs.
    OR > 1 indicates more b than c (positive change)
    OR < 1 indicates more c than b (negative change)
    OR = 1 indicates no change
    """
    if c == 0:
        if b == 0:
            return 1.0  # No discordant pairs
        return float("inf")  # Only b discordant pairs
    return float(b / c)


def interpret_mcnemar_or(or_val: float) -> str:
    """
    Interpret McNemar's odds ratio.

    Parameters
    ----------
    or_val : float
        Odds ratio

    Returns
    -------
    interpretation : str
        Interpretation
    """
    if or_val == 1.0:
        return "no change"
    elif or_val > 1.0:
        if or_val < 2.0:
            return "small increase"
        elif or_val < 4.0:
            return "medium increase"
        else:
            return "large increase"
    else:  # or_val < 1.0
        if or_val > 0.5:
            return "small decrease"
        elif or_val > 0.25:
            return "medium decrease"
        else:
            return "large decrease"



[docs]
def test_mcnemar(  # noqa: C901
    observed: Union[np.ndarray, pd.DataFrame, list],
    var_before: Optional[str] = None,
    var_after: Optional[str] = None,
    correction: bool = True,
    alpha: float = 0.05,
    plot: bool = False,
    ax: Optional[matplotlib.axes.Axes] = None,
    return_as: Literal["dict", "dataframe"] = "dict",
    decimals: int = 3,
    verbose: bool = False,
) -> Union[dict, pd.DataFrame]:
    r"""
    Perform McNemar's test for paired categorical data.

    Tests whether there is a significant change in proportions for paired binary data.
    Appropriate for before-after studies with binary outcomes.

    Parameters
    ----------
    observed : array-like, shape (2, 2)
        2×2 contingency table:
        [[a, b],
         [c, d]]
        where:
        - a: both conditions negative (0,0)
        - b: before negative, after positive (0,1)
        - c: before positive, after negative (1,0)
        - d: both conditions positive (1,1)
    var_before : str, optional
        Name for before condition
    var_after : str, optional
        Name for after condition
    correction : bool, default True
        Whether to apply continuity correction (recommended for small samples)
    alpha : float, default 0.05
        Significance level
    plot : bool, default False
        Whether to generate visualization
    ax : matplotlib.axes.Axes, optional
        Axes to plot on. If provided, plot is set to True
    return_as : {'dict', 'dataframe'}, default 'dict'
        Output format
    decimals : int, default 3
        Number of decimal places for rounding
    verbose : bool, default False
        If True, print test results to logger

    Returns
    -------
    result : dict or DataFrame
        Test results including:
        - test_method: Name of test
        - statistic: χ² statistic
        - pvalue: p-value
        - df: degrees of freedom (always 1)
        - b: count of (before=0, after=1)
        - c: count of (before=1, after=0)
        - odds_ratio: b / c
        - effect_size: odds ratio
        - effect_size_interpretation: interpretation
        - significant: whether to reject null hypothesis
        - stars: significance stars

    Notes
    -----
    McNemar's test is used for paired nominal data, testing whether row and column
    marginal frequencies are equal (marginal homogeneity).

    The test statistic is based on the discordant pairs (b and c):

    .. math::
        \\chi^2 = \\frac{(b - c)^2}{b + c}  \\quad \\text{(without correction)}

    .. math::
        \\chi^2 = \\frac{(|b - c| - 1)^2}{b + c}  \\quad \\text{(with correction)}

    **Null hypothesis:** The marginal proportions are equal (no change)
    **Alternative:** The marginal proportions differ (significant change)

    **Assumptions:**
    - Paired data (matched observations)
    - Binary outcomes for both conditions
    - Large enough sample (b + c ≥ 10 recommended for chi-square approximation)

    **Effect size (Odds Ratio):**
    OR = b / c
    - OR = 1: no change
    - OR > 1: increase (more transitions from 0→1 than 1→0)
    - OR < 1: decrease (more transitions from 1→0 than 0→1)

    Examples
    --------
    >>> import numpy as np
    >>> from scitex_stats.tests.categorical import test_mcnemar
    >>>
    >>> # Example: Treatment effectiveness (before/after)
    >>> # Rows: before, Columns: after
    >>> # [[no→no, no→yes],
    >>> #  [yes→no, yes→yes]]
    >>> observed = [[59, 6],   # 59 stayed negative, 6 improved
    ...             [16, 19]]  # 16 relapsed, 19 stayed positive
    >>>
    >>> result = test_mcnemar(observed, var_before='Before Treatment',
    ...                       var_after='After Treatment', plot=True)
    >>> print(f"χ² = {result['statistic']:.2f}, p = {result['pvalue']:.4f}")
    >>> print(f"Odds Ratio = {result['odds_ratio']:.2f}")

    References
    ----------
    .. [1] McNemar, Q. (1947). "Note on the sampling error of the difference between
           correlated proportions or percentages". Psychometrika, 12(2), 153-157.
    .. [2] Edwards, A. L. (1948). "Note on the correction for continuity in testing
           the significance of the difference between correlated proportions".
           Psychometrika, 13(3), 185-187.

    See Also
    --------
    test_chi2 : For independent (unpaired) categorical data
    test_fisher : For 2×2 tables with small expected frequencies
    """
    # Convert to numpy array
    if isinstance(observed, pd.DataFrame):
        observed_array = observed.values
    elif isinstance(observed, list):
        observed_array = np.array(observed)
    else:
        observed_array = np.asarray(observed)

    # Validate shape
    if observed_array.shape != (2, 2):
        raise ValueError(
            f"McNemar's test requires a 2×2 table, got shape {observed_array.shape}"
        )

    # Extract cells
    a, b = observed_array[0]
    c, d = observed_array[1]

    # Validate data types
    if not all(
        isinstance(x, (int, np.integer))
        or (isinstance(x, (float, np.floating)) and x == int(x))  # type: ignore[unreachable]
        for x in [a, b, c, d]
    ):
        raise ValueError("All values must be non-negative integers (counts)")

    if any(x < 0 for x in [a, b, c, d]):
        raise ValueError("All counts must be non-negative")

    a, b, c, d = int(a), int(b), int(c), int(d)

    # Perform McNemar's test
    # scipy.stats.contingency.mcnemar is not available in older scipy
    # We'll compute manually
    n_discordant = b + c

    if n_discordant == 0:
        # No discordant pairs - perfect agreement
        statistic = 0.0
        pvalue = 1.0
    else:
        if correction:
            # With continuity correction
            statistic = (abs(b - c) - 1.0) ** 2 / n_discordant
        else:
            # Without continuity correction
            statistic = (b - c) ** 2 / n_discordant

        # p-value from chi-square distribution with df=1
        pvalue = 1.0 - stats.chi2.cdf(statistic, df=1)

    # Compute odds ratio
    odds_ratio = mcnemar_odds_ratio(b, c)
    or_interpretation = interpret_mcnemar_or(odds_ratio)

    # Variable names
    var_before = var_before or "Before"
    var_after = var_after or "After"

    # Build result dictionary
    result = {
        "test_method": "McNemar's test",
        "var_before": var_before,
        "var_after": var_after,
        "statistic": round(float(statistic), decimals),
        "pvalue": round(float(pvalue), decimals + 1),
        "df": 1,
        "b": int(b),  # Changed (0→1)
        "c": int(c),  # Changed (1→0)
        "n_discordant": int(n_discordant),
        "odds_ratio": (
            round(float(odds_ratio), decimals)
            if np.isfinite(odds_ratio)
            else odds_ratio
        ),
        "effect_size": (
            round(float(odds_ratio), decimals)
            if np.isfinite(odds_ratio)
            else odds_ratio
        ),
        "effect_size_metric": "Odds ratio",
        "effect_size_interpretation": or_interpretation,
        "correction": correction,
        "alpha": alpha,
        "significant": pvalue < alpha,
        "stars": p2stars(pvalue),
        "H0": "Marginal proportions are equal (no change)",
    }

    # Log results if verbose
    if verbose:
        logger.info(
            f"McNemar: χ² = {statistic:.3f}, p = {pvalue:.4f} {p2stars(pvalue)}"
        )
        logger.info(f"OR = {odds_ratio:.3f}, {or_interpretation} (b={b}, c={c})")

    # Auto-enable plotting if ax is provided
    if ax is not None:
        plot = True

    # Generate plot if requested
    if plot:
        if ax is None:
            fig, axes = _mpl_plt.subplots(1, 3, figsize=(12, 4))
            _plot_mcnemar_full(observed_array, result, var_before, var_after, axes)
        else:
            _plot_mcnemar_simple(observed_array, result, var_before, var_after, ax)

    # Convert to requested format
    if return_as == "dataframe":
        result = force_dataframe(result)
    elif return_as not in ["dict", "dataframe"]:
        return convert_results(result, return_as=return_as)

    return result



def _plot_mcnemar_full(observed, result, var_before, var_after, axes):
    """Create 3-panel visualization for McNemar's test."""
    a, b = observed[0]
    c, d = observed[1]

    # Panel 1: Contingency table heatmap
    ax = axes[0]
    im = ax.imshow(observed, cmap="Blues", aspect="auto")

    # Add text annotations
    for i in range(2):
        for j in range(2):
            ax.text(
                j,
                i,
                int(observed[i, j]),
                ha="center",
                va="center",
                color="black",
            )

    ax.set_xticks([0, 1])
    ax.set_yticks([0, 1])
    ax.set_xticklabels(["0", "1"])
    ax.set_yticklabels(["0", "1"])
    ax.set_xlabel(var_after)
    ax.set_ylabel(var_before)
    ax.set_title("Contingency Table")
    _mpl_plt.colorbar(im, ax=ax)

    # Panel 2: Discordant pairs comparison
    ax = axes[1]
    categories = ["0→1\n(b)", "1→0\n(c)"]
    counts = [b, c]

    bars = ax.bar(categories, counts)

    # Add count labels
    for bar, count in zip(bars, counts):
        height = bar.get_height()
        ax.text(
            bar.get_x() + bar.get_width() / 2.0,
            height,
            f"{int(count)}",
            ha="center",
            va="bottom",
        )

    ax.set_ylabel("Count")
    ax.set_title("Discordant Pairs")
    ax.set_ylim(0, max(counts) * 1.2 if max(counts) > 0 else 1)

    # Panel 3: McNemar's Test stats
    ax = axes[2]
    ax.axis("off")
    ax.set_title("McNemar's Test")

    # Add stats text box
    stars_text = result["stars"].replace("ns", "$n$s")
    if np.isfinite(result["odds_ratio"]):
        text_str = (
            f"$\\chi^2$ = {result['statistic']:.3f}\n"
            f"$p$ = {result['pvalue']:.4f} {stars_text}\n"
            f"$b$ = {result['b']}, $c$ = {result['c']}\n"
            f"$OR$ = {result['odds_ratio']:.3f}"
        )
    else:
        text_str = (
            f"$\\chi^2$ = {result['statistic']:.3f}\n"
            f"$p$ = {result['pvalue']:.4f} {stars_text}\n"
            f"$b$ = {result['b']}, $c$ = {result['c']}"
        )
    ax.text(
        0.5,
        0.5,
        text_str,
        transform=ax.transAxes,
        fontsize=10,
        verticalalignment="center",
        horizontalalignment="center",
        color="black",
    )


def _plot_mcnemar_simple(observed, result, var_before, var_after, ax):
    """Create simplified single-panel discordant pairs plot on given axes."""
    a, b = observed[0]
    c, d = observed[1]

    # Discordant pairs comparison
    categories = ["0→1\n(b)", "1→0\n(c)"]
    counts = [b, c]

    bars = ax.bar(categories, counts)

    # Add count labels
    for bar, count in zip(bars, counts):
        height = bar.get_height()
        ax.text(
            bar.get_x() + bar.get_width() / 2.0,
            height,
            f"{int(count)}",
            ha="center",
            va="bottom",
        )

    ax.set_ylabel("Count")
    ax.set_title("McNemar's Test")
    ax.set_ylim(0, max(counts) * 1.2 if max(counts) > 0 else 1)

    # Add stats text box
    stars_text = result["stars"].replace("ns", "$n$s")
    if np.isfinite(result["odds_ratio"]):
        text_str = (
            f"$\\chi^2$ = {result['statistic']:.3f}\n"
            f"$p$ = {result['pvalue']:.4f} {stars_text}\n"
            f"$b$ = {result['b']}, $c$ = {result['c']}\n"
            f"$OR$ = {result['odds_ratio']:.3f}"
        )
    else:
        text_str = (
            f"$\\chi^2$ = {result['statistic']:.3f}\n"
            f"$p$ = {result['pvalue']:.4f} {stars_text}\n"
            f"$b$ = {result['b']}, $c$ = {result['c']}"
        )
    ax.text(
        0.02,
        0.98,
        text_str,
        transform=ax.transAxes,
        verticalalignment="top",
        color="black",
        fontsize=6,
    )


# Demo: python -m scitex_stats.tests.categorical._demo_mcnemar

# EOF