#!/usr/bin/env python3
# Timestamp: "2025-10-01 16:30:00 (ywatanabe)"
# File: scitex_stats/tests/categorical/_test_mcnemar.py
# ----------------------------------------
r"""McNemar's test for paired categorical data.
Functionalities:
- McNemar's test for paired categorical data
- Test for marginal homogeneity in 2×2 contingency tables
- Compute odds ratio for matched pairs
- Generate visualizations for paired outcomes
Dependencies:
- packages: numpy, pandas, scipy, matplotlib
IO:
- input: 2×2 contingency table (array or DataFrame)
- output: Test results (dict or DataFrame) and optional figure
"""
from __future__ import annotations
import os
from typing import Literal, Optional, Union
import matplotlib.axes
import numpy as np
import pandas as pd
import matplotlib.pyplot as _mpl_plt # noqa: E402
from scipy import stats
from scitex_stats._logging import getLogger
from scitex_stats._utils._formatters import p2stars
from scitex_stats._utils._normalizers import convert_results, force_dataframe
__FILE__ = __file__
__DIR__ = os.path.dirname(__FILE__)
logger = getLogger(__name__)
def mcnemar_odds_ratio(b: int, c: int) -> float:
"""
Compute odds ratio for McNemar's test from discordant pairs.
Parameters
----------
b : int
Count in cell [0,1] (success before, failure after)
c : int
Count in cell [1,0] (failure before, success after)
Returns
-------
or_val : float
Odds ratio = b / c
Notes
-----
For McNemar's test, OR = b/c where b and c are the discordant pairs.
OR > 1 indicates more b than c (positive change)
OR < 1 indicates more c than b (negative change)
OR = 1 indicates no change
"""
if c == 0:
if b == 0:
return 1.0 # No discordant pairs
return float("inf") # Only b discordant pairs
return float(b / c)
def interpret_mcnemar_or(or_val: float) -> str:
"""
Interpret McNemar's odds ratio.
Parameters
----------
or_val : float
Odds ratio
Returns
-------
interpretation : str
Interpretation
"""
if or_val == 1.0:
return "no change"
elif or_val > 1.0:
if or_val < 2.0:
return "small increase"
elif or_val < 4.0:
return "medium increase"
else:
return "large increase"
else: # or_val < 1.0
if or_val > 0.5:
return "small decrease"
elif or_val > 0.25:
return "medium decrease"
else:
return "large decrease"
[docs]
def test_mcnemar( # noqa: C901
observed: Union[np.ndarray, pd.DataFrame, list],
var_before: Optional[str] = None,
var_after: Optional[str] = None,
correction: bool = True,
alpha: float = 0.05,
plot: bool = False,
ax: Optional[matplotlib.axes.Axes] = None,
return_as: Literal["dict", "dataframe"] = "dict",
decimals: int = 3,
verbose: bool = False,
) -> Union[dict, pd.DataFrame]:
r"""
Perform McNemar's test for paired categorical data.
Tests whether there is a significant change in proportions for paired binary data.
Appropriate for before-after studies with binary outcomes.
Parameters
----------
observed : array-like, shape (2, 2)
2×2 contingency table:
[[a, b],
[c, d]]
where:
- a: both conditions negative (0,0)
- b: before negative, after positive (0,1)
- c: before positive, after negative (1,0)
- d: both conditions positive (1,1)
var_before : str, optional
Name for before condition
var_after : str, optional
Name for after condition
correction : bool, default True
Whether to apply continuity correction (recommended for small samples)
alpha : float, default 0.05
Significance level
plot : bool, default False
Whether to generate visualization
ax : matplotlib.axes.Axes, optional
Axes to plot on. If provided, plot is set to True
return_as : {'dict', 'dataframe'}, default 'dict'
Output format
decimals : int, default 3
Number of decimal places for rounding
verbose : bool, default False
If True, print test results to logger
Returns
-------
result : dict or DataFrame
Test results including:
- test_method: Name of test
- statistic: χ² statistic
- pvalue: p-value
- df: degrees of freedom (always 1)
- b: count of (before=0, after=1)
- c: count of (before=1, after=0)
- odds_ratio: b / c
- effect_size: odds ratio
- effect_size_interpretation: interpretation
- significant: whether to reject null hypothesis
- stars: significance stars
Notes
-----
McNemar's test is used for paired nominal data, testing whether row and column
marginal frequencies are equal (marginal homogeneity).
The test statistic is based on the discordant pairs (b and c):
.. math::
\\chi^2 = \\frac{(b - c)^2}{b + c} \\quad \\text{(without correction)}
.. math::
\\chi^2 = \\frac{(|b - c| - 1)^2}{b + c} \\quad \\text{(with correction)}
**Null hypothesis:** The marginal proportions are equal (no change)
**Alternative:** The marginal proportions differ (significant change)
**Assumptions:**
- Paired data (matched observations)
- Binary outcomes for both conditions
- Large enough sample (b + c ≥ 10 recommended for chi-square approximation)
**Effect size (Odds Ratio):**
OR = b / c
- OR = 1: no change
- OR > 1: increase (more transitions from 0→1 than 1→0)
- OR < 1: decrease (more transitions from 1→0 than 0→1)
Examples
--------
>>> import numpy as np
>>> from scitex_stats.tests.categorical import test_mcnemar
>>>
>>> # Example: Treatment effectiveness (before/after)
>>> # Rows: before, Columns: after
>>> # [[no→no, no→yes],
>>> # [yes→no, yes→yes]]
>>> observed = [[59, 6], # 59 stayed negative, 6 improved
... [16, 19]] # 16 relapsed, 19 stayed positive
>>>
>>> result = test_mcnemar(observed, var_before='Before Treatment',
... var_after='After Treatment', plot=True)
>>> print(f"χ² = {result['statistic']:.2f}, p = {result['pvalue']:.4f}")
>>> print(f"Odds Ratio = {result['odds_ratio']:.2f}")
References
----------
.. [1] McNemar, Q. (1947). "Note on the sampling error of the difference between
correlated proportions or percentages". Psychometrika, 12(2), 153-157.
.. [2] Edwards, A. L. (1948). "Note on the correction for continuity in testing
the significance of the difference between correlated proportions".
Psychometrika, 13(3), 185-187.
See Also
--------
test_chi2 : For independent (unpaired) categorical data
test_fisher : For 2×2 tables with small expected frequencies
"""
# Convert to numpy array
if isinstance(observed, pd.DataFrame):
observed_array = observed.values
elif isinstance(observed, list):
observed_array = np.array(observed)
else:
observed_array = np.asarray(observed)
# Validate shape
if observed_array.shape != (2, 2):
raise ValueError(
f"McNemar's test requires a 2×2 table, got shape {observed_array.shape}"
)
# Extract cells
a, b = observed_array[0]
c, d = observed_array[1]
# Validate data types
if not all(
isinstance(x, (int, np.integer))
or (isinstance(x, (float, np.floating)) and x == int(x)) # type: ignore[unreachable]
for x in [a, b, c, d]
):
raise ValueError("All values must be non-negative integers (counts)")
if any(x < 0 for x in [a, b, c, d]):
raise ValueError("All counts must be non-negative")
a, b, c, d = int(a), int(b), int(c), int(d)
# Perform McNemar's test
# scipy.stats.contingency.mcnemar is not available in older scipy
# We'll compute manually
n_discordant = b + c
if n_discordant == 0:
# No discordant pairs - perfect agreement
statistic = 0.0
pvalue = 1.0
else:
if correction:
# With continuity correction
statistic = (abs(b - c) - 1.0) ** 2 / n_discordant
else:
# Without continuity correction
statistic = (b - c) ** 2 / n_discordant
# p-value from chi-square distribution with df=1
pvalue = 1.0 - stats.chi2.cdf(statistic, df=1)
# Compute odds ratio
odds_ratio = mcnemar_odds_ratio(b, c)
or_interpretation = interpret_mcnemar_or(odds_ratio)
# Variable names
var_before = var_before or "Before"
var_after = var_after or "After"
# Build result dictionary
result = {
"test_method": "McNemar's test",
"var_before": var_before,
"var_after": var_after,
"statistic": round(float(statistic), decimals),
"pvalue": round(float(pvalue), decimals + 1),
"df": 1,
"b": int(b), # Changed (0→1)
"c": int(c), # Changed (1→0)
"n_discordant": int(n_discordant),
"odds_ratio": (
round(float(odds_ratio), decimals)
if np.isfinite(odds_ratio)
else odds_ratio
),
"effect_size": (
round(float(odds_ratio), decimals)
if np.isfinite(odds_ratio)
else odds_ratio
),
"effect_size_metric": "Odds ratio",
"effect_size_interpretation": or_interpretation,
"correction": correction,
"alpha": alpha,
"significant": pvalue < alpha,
"stars": p2stars(pvalue),
"H0": "Marginal proportions are equal (no change)",
}
# Log results if verbose
if verbose:
logger.info(
f"McNemar: χ² = {statistic:.3f}, p = {pvalue:.4f} {p2stars(pvalue)}"
)
logger.info(f"OR = {odds_ratio:.3f}, {or_interpretation} (b={b}, c={c})")
# Auto-enable plotting if ax is provided
if ax is not None:
plot = True
# Generate plot if requested
if plot:
if ax is None:
fig, axes = _mpl_plt.subplots(1, 3, figsize=(12, 4))
_plot_mcnemar_full(observed_array, result, var_before, var_after, axes)
else:
_plot_mcnemar_simple(observed_array, result, var_before, var_after, ax)
# Convert to requested format
if return_as == "dataframe":
result = force_dataframe(result)
elif return_as not in ["dict", "dataframe"]:
return convert_results(result, return_as=return_as)
return result
def _plot_mcnemar_full(observed, result, var_before, var_after, axes):
"""Create 3-panel visualization for McNemar's test."""
a, b = observed[0]
c, d = observed[1]
# Panel 1: Contingency table heatmap
ax = axes[0]
im = ax.imshow(observed, cmap="Blues", aspect="auto")
# Add text annotations
for i in range(2):
for j in range(2):
ax.text(
j,
i,
int(observed[i, j]),
ha="center",
va="center",
color="black",
)
ax.set_xticks([0, 1])
ax.set_yticks([0, 1])
ax.set_xticklabels(["0", "1"])
ax.set_yticklabels(["0", "1"])
ax.set_xlabel(var_after)
ax.set_ylabel(var_before)
ax.set_title("Contingency Table")
_mpl_plt.colorbar(im, ax=ax)
# Panel 2: Discordant pairs comparison
ax = axes[1]
categories = ["0→1\n(b)", "1→0\n(c)"]
counts = [b, c]
bars = ax.bar(categories, counts)
# Add count labels
for bar, count in zip(bars, counts):
height = bar.get_height()
ax.text(
bar.get_x() + bar.get_width() / 2.0,
height,
f"{int(count)}",
ha="center",
va="bottom",
)
ax.set_ylabel("Count")
ax.set_title("Discordant Pairs")
ax.set_ylim(0, max(counts) * 1.2 if max(counts) > 0 else 1)
# Panel 3: McNemar's Test stats
ax = axes[2]
ax.axis("off")
ax.set_title("McNemar's Test")
# Add stats text box
stars_text = result["stars"].replace("ns", "$n$s")
if np.isfinite(result["odds_ratio"]):
text_str = (
f"$\\chi^2$ = {result['statistic']:.3f}\n"
f"$p$ = {result['pvalue']:.4f} {stars_text}\n"
f"$b$ = {result['b']}, $c$ = {result['c']}\n"
f"$OR$ = {result['odds_ratio']:.3f}"
)
else:
text_str = (
f"$\\chi^2$ = {result['statistic']:.3f}\n"
f"$p$ = {result['pvalue']:.4f} {stars_text}\n"
f"$b$ = {result['b']}, $c$ = {result['c']}"
)
ax.text(
0.5,
0.5,
text_str,
transform=ax.transAxes,
fontsize=10,
verticalalignment="center",
horizontalalignment="center",
color="black",
)
def _plot_mcnemar_simple(observed, result, var_before, var_after, ax):
"""Create simplified single-panel discordant pairs plot on given axes."""
a, b = observed[0]
c, d = observed[1]
# Discordant pairs comparison
categories = ["0→1\n(b)", "1→0\n(c)"]
counts = [b, c]
bars = ax.bar(categories, counts)
# Add count labels
for bar, count in zip(bars, counts):
height = bar.get_height()
ax.text(
bar.get_x() + bar.get_width() / 2.0,
height,
f"{int(count)}",
ha="center",
va="bottom",
)
ax.set_ylabel("Count")
ax.set_title("McNemar's Test")
ax.set_ylim(0, max(counts) * 1.2 if max(counts) > 0 else 1)
# Add stats text box
stars_text = result["stars"].replace("ns", "$n$s")
if np.isfinite(result["odds_ratio"]):
text_str = (
f"$\\chi^2$ = {result['statistic']:.3f}\n"
f"$p$ = {result['pvalue']:.4f} {stars_text}\n"
f"$b$ = {result['b']}, $c$ = {result['c']}\n"
f"$OR$ = {result['odds_ratio']:.3f}"
)
else:
text_str = (
f"$\\chi^2$ = {result['statistic']:.3f}\n"
f"$p$ = {result['pvalue']:.4f} {stars_text}\n"
f"$b$ = {result['b']}, $c$ = {result['c']}"
)
ax.text(
0.02,
0.98,
text_str,
transform=ax.transAxes,
verticalalignment="top",
color="black",
fontsize=6,
)
# Demo: python -m scitex_stats.tests.categorical._demo_mcnemar
# EOF