#!/usr/bin/env python3
# Timestamp: "2026-02-12 02:21:50 (ywatanabe)"
# File: scitex_stats/tests/categorical/_test_cochran_q.py
r"""Cochran's Q test for binary repeated measures.
Functionalities:
- Perform Cochran's Q test for binary repeated measures
- Extension of McNemar's test to 3+ conditions
- Test for differences in success proportions across conditions
- Generate proportion visualizations
Dependencies:
- packages: numpy, pandas, scipy, matplotlib
IO:
- input: Binary data (subjects × conditions)
- output: Test results (dict or DataFrame) and optional figure
"""
from typing import List, Literal, Optional, Tuple, Union
import numpy as np
import pandas as pd
from scipy import stats
from scitex_stats._logging import getLogger
from scitex_stats._utils._formatters import p2stars
__FILE__ = __file__
logger = getLogger(__name__)
try:
import matplotlib.pyplot as plt # noqa: E402
HAS_PLT = True
except ImportError:
HAS_PLT = False
def cochran_q_statistic(data: np.ndarray) -> Tuple[float, int]:
"""
Compute Cochran's Q statistic.
Parameters
----------
data : array, shape (n_subjects, n_conditions)
Binary data (0/1)
Returns
-------
Q : float
Cochran's Q statistic
df : int
Degrees of freedom
Notes
-----
Q follows chi-square distribution with k-1 degrees of freedom.
"""
n, k = data.shape
# Column sums (successes per condition)
G = data.sum(axis=0)
# Row sums (successes per subject)
L = data.sum(axis=1)
# Total successes
N = data.sum()
# Cochran's Q
numerator = (k - 1) * (k * np.sum(G**2) - N**2)
denominator = k * N - np.sum(L**2)
if denominator == 0:
Q = 0.0
else:
Q = numerator / denominator
df = k - 1
return float(Q), int(df)
def effect_size_cochran(data: np.ndarray) -> float:
"""
Compute effect size for Cochran's Q (Kendall's W for binary data).
Parameters
----------
data : array, shape (n_subjects, n_conditions)
Binary data
Returns
-------
W : float
Effect size (0 to 1)
"""
n, k = data.shape
# Column sums
G = data.sum(axis=0)
# Mean column sum
G_mean = G.mean()
# Sum of squared deviations
S = np.sum((G - G_mean) ** 2)
# Kendall's W for binary data
W = S / (n * (k - 1) * k / 12)
# Bound between 0 and 1
W = min(max(W, 0.0), 1.0)
return float(W)
def interpret_effect_size(W: float) -> str:
"""Interpret Cochran's Q effect size."""
if W < 0.1:
return "negligible"
elif W < 0.3:
return "small"
elif W < 0.5:
return "medium"
else:
return "large"
[docs]
def test_cochran_q( # noqa: C901
data: Union[np.ndarray, pd.DataFrame],
subject_col: Optional[str] = None,
condition_col: Optional[str] = None,
value_col: Optional[str] = None,
condition_names: Optional[List[str]] = None,
alpha: float = 0.05,
plot: bool = False,
return_as: Literal["dict", "dataframe"] = "dict",
decimals: int = 3,
) -> Union[dict, pd.DataFrame, Tuple]:
r"""
Perform Cochran's Q test for binary repeated measures.
Extension of McNemar's test to 3+ conditions. Tests whether proportions
of successes differ across multiple related binary measurements.
Parameters
----------
data : array or DataFrame
- If array: shape (n_subjects, n_conditions), wide format with 0/1 values
- If DataFrame with subject_col/condition_col: long format
- If DataFrame without: wide format (rows=subjects, cols=conditions)
subject_col : str, optional
Column name for subject IDs (long format)
condition_col : str, optional
Column name for conditions (long format)
value_col : str, optional
Column name for binary values (long format)
condition_names : list of str, optional
Names for conditions (wide format)
alpha : float, default 0.05
Significance level
plot : bool, default False
Whether to generate visualization
return_as : {'dict', 'dataframe'}, default 'dict'
Output format
decimals : int, default 3
Number of decimal places for rounding
Returns
-------
result : dict or DataFrame
Test results including:
- statistic: Cochran's Q statistic
- pvalue: p-value
- df: Degrees of freedom (k - 1)
- effect_size: Kendall's W
- effect_size_interpretation: interpretation
- n_subjects: Number of subjects
- n_conditions: Number of conditions
- proportions: Success proportion for each condition
- n_successes: Number of successes per condition
- significant: Whether to reject null hypothesis
- stars: Significance stars
If plot=True, returns tuple of (result, figure)
Notes
-----
Cochran's Q test is used for repeated binary measurements (dichotomous data)
on the same subjects across 3+ conditions.
**Null Hypothesis (H0)**: Proportions of successes are equal across conditions
**Alternative Hypothesis (H1)**: At least one proportion differs
**Test Statistic**:
.. math::
Q = \\frac{(k-1)[k\\sum_{j=1}^{k}G_j^2 - N^2]}{k\\sum_{i=1}^{n}L_i - \\sum_{i=1}^{n}L_i^2} # noqa: D301
Where:
- k: Number of conditions
- n: Number of subjects
- G_j: Number of successes in condition j
- L_i: Number of successes for subject i (across conditions)
- N: Total number of successes
Q follows chi-square distribution with k-1 degrees of freedom.
**Effect Size (Kendall's W for binary)**:
.. math::
W = \\frac{\\sum_{j=1}^{k}(G_j - \\bar{G})^2}{n(k-1)k/12} # noqa: D301
Interpretation:
- W < 0.1: negligible
- W < 0.3: small
- W < 0.5: medium
- W ≥ 0.5: large
**Assumptions**:
- Binary outcomes (0/1, success/failure, yes/no)
- Repeated measurements on same subjects
- At least 3 conditions (for 2 conditions, use McNemar's test)
**Relation to other tests**:
- Extension of McNemar's test (2 conditions → 3+ conditions)
- Binary version of Friedman test
- Can use Friedman test on same data (Q ≈ Friedman χ²)
**Post-hoc tests**:
If significant:
- Pairwise McNemar tests
- Apply corrections: correct_bonferroni(), correct_holm()
**Advantages**:
- Appropriate for binary repeated measures
- No normality assumption
- Accounts for within-subject correlation
**Disadvantages**:
- Requires binary data
- Sensitive to subjects with all 0s or all 1s
- Less powerful than parametric alternatives if assumptions met
Examples
--------
>>> import numpy as np
>>> from scitex_stats.tests.categorical import test_cochran_q
>>>
>>> # Example: Treatment success (0=fail, 1=success) across 4 visits
>>> data = np.array([
... [0, 0, 1, 1], # Subject 1: improved over time
... [0, 1, 1, 1], # Subject 2: improved
... [0, 0, 0, 1], # Subject 3: late improvement
... [1, 1, 1, 1], # Subject 4: always success
... [0, 0, 1, 1], # Subject 5: improved
... ])
>>>
>>> result = test_cochran_q(
... data,
... condition_names=['Visit 1', 'Visit 2', 'Visit 3', 'Visit 4'],
... plot=True
... )
>>>
>>> print(f"Q = {result['statistic']:.2f}, p = {result['pvalue']:.4f}")
>>> print(f"Proportions: {result['proportions']}")
References
----------
.. [1] Cochran, W. G. (1950). "The comparison of percentages in matched
samples". Biometrika, 37(3/4), 256-266.
.. [2] McNemar, Q. (1947). "Note on the sampling error of the difference
between correlated proportions or percentages". Psychometrika,
12(2), 153-157.
See Also
--------
test_mcnemar : For 2 binary conditions
test_friedman : Non-parametric repeated measures (non-binary)
"""
# Convert data to wide format array
if isinstance(data, pd.DataFrame):
if (
subject_col is not None
and condition_col is not None
and value_col is not None
):
# Long format - pivot to wide
data_wide = data.pivot(
index=subject_col, columns=condition_col, values=value_col
)
data_array = data_wide.values
if condition_names is None:
condition_names = list(data_wide.columns)
else:
# Already wide format
data_array = data.values
if condition_names is None:
condition_names = list(data.columns)
else:
data_array = np.asarray(data)
if data_array.ndim != 2:
raise ValueError("Data must be 2D (subjects × conditions)")
n_subjects, n_conditions = data_array.shape
if n_conditions < 3:
raise ValueError(
"Cochran's Q requires at least 3 conditions. Use test_mcnemar for 2 conditions."
)
if n_subjects < 2:
raise ValueError("Need at least 2 subjects")
# Validate binary data
unique_vals = np.unique(data_array)
if not np.all(np.isin(unique_vals, [0, 1])):
raise ValueError("Data must be binary (0 or 1)")
if condition_names is None:
condition_names = [f"Condition {i + 1}" for i in range(n_conditions)]
# Compute Cochran's Q statistic
Q, df = cochran_q_statistic(data_array)
# p-value from chi-square distribution
pvalue = 1 - stats.chi2.cdf(Q, df)
# Compute proportions and counts
n_successes = data_array.sum(axis=0)
proportions = n_successes / n_subjects
# Compute effect size
W = effect_size_cochran(data_array)
W_interpretation = interpret_effect_size(W)
# Build result dictionary
result = {
"test": "Cochran's Q test",
"statistic": round(float(Q), decimals),
"pvalue": round(float(pvalue), decimals + 1),
"df": int(df),
"effect_size": round(float(W), decimals),
"effect_size_metric": "kendall_w_binary",
"effect_size_interpretation": W_interpretation,
"n_subjects": int(n_subjects),
"n_conditions": int(n_conditions),
"condition_names": condition_names,
"n_successes": [int(n) for n in n_successes],
"proportions": [round(float(p), decimals) for p in proportions],
"alpha": alpha,
"significant": pvalue < alpha,
"stars": p2stars(pvalue),
"H0": "Proportions of successes are equal across conditions",
}
# Generate plot if requested
fig = None
if plot and HAS_PLT:
fig = _plot_cochran_q(data_array, result, condition_names)
# Return based on format
if return_as == "dataframe":
result_df = pd.DataFrame([result])
if plot and fig is not None:
return result_df, fig
return result_df
else:
if plot and fig is not None:
return result, fig
return result
def _plot_cochran_q(data, result, condition_names):
"""Create visualization for Cochran's Q test."""
fig, axes = plt.subplots(1, 3)
n_subjects, n_conditions = data.shape
proportions = result["proportions"]
n_successes = result["n_successes"]
# Panel 1: Stacked bar chart (individual subjects)
ax = axes[0]
bottom = np.zeros(n_conditions)
for i in range(n_subjects):
ax.bar(
range(n_conditions),
data[i, :],
bottom=bottom,
)
bottom += data[i, :]
ax.set_xticks(range(n_conditions))
ax.set_xticklabels(condition_names, rotation=45, ha="right")
ax.set_xlabel("Condition")
ax.set_ylabel("Cumulative Successes")
ax.set_title("Individual Subject Patterns")
ax.set_ylim([0, n_subjects])
# Panel 2: Proportion bar chart
ax = axes[1]
bars = ax.bar(
range(n_conditions),
proportions,
)
# Add count labels
for i, (bar, count) in enumerate(zip(bars, n_successes)):
height = bar.get_height()
ax.text(
bar.get_x() + bar.get_width() / 2.0,
height,
f"{count}/{n_subjects}\n({proportions[i]:.1%})",
ha="center",
va="bottom",
)
ax.set_xticks(range(n_conditions))
ax.set_xticklabels(condition_names, rotation=45, ha="right")
ax.set_xlabel("Condition")
ax.set_ylabel("Proportion of Successes")
ax.set_title("Success Proportions")
ax.set_ylim([0, 1.0])
# Add overall mean line
mean_prop = np.mean(proportions)
ax.axhline(
y=mean_prop,
linestyle="--",
alpha=0.5,
label=f"Mean: {mean_prop:.2%}",
)
ax.legend()
# Panel 3: Cochran's Q Test stats
ax = axes[2]
ax.axis("off")
ax.set_title("Cochran's Q Test")
# Add stats text box
stars_text = result["stars"].replace("ns", "$n$s")
text_str = (
f"$Q$ = {result['statistic']:.3f} {stars_text}\n"
f"$p$ = {result['pvalue']:.4f}\n"
f"$k$ = {result['n_conditions']}"
)
ax.text(
0.5,
0.5,
text_str,
transform=ax.transAxes,
verticalalignment="center",
horizontalalignment="center",
color="black",
)
return fig
# Demo: python -m scitex_stats.tests.categorical._demo_cochran_q
# EOF