Source code for scitex_stats.tests.categorical._test_cochran_q

#!/usr/bin/env python3
# Timestamp: "2026-02-12 02:21:50 (ywatanabe)"
# File: scitex_stats/tests/categorical/_test_cochran_q.py

r"""Cochran's Q test for binary repeated measures.

Functionalities:
  - Perform Cochran's Q test for binary repeated measures
  - Extension of McNemar's test to 3+ conditions
  - Test for differences in success proportions across conditions
  - Generate proportion visualizations

Dependencies:
  - packages: numpy, pandas, scipy, matplotlib

IO:
  - input: Binary data (subjects × conditions)
  - output: Test results (dict or DataFrame) and optional figure
"""

from typing import List, Literal, Optional, Tuple, Union

import numpy as np
import pandas as pd
from scipy import stats

from scitex_stats._logging import getLogger
from scitex_stats._utils._formatters import p2stars

__FILE__ = __file__

logger = getLogger(__name__)

try:
    import matplotlib.pyplot as plt  # noqa: E402

    HAS_PLT = True
except ImportError:
    HAS_PLT = False


def cochran_q_statistic(data: np.ndarray) -> Tuple[float, int]:
    """
    Compute Cochran's Q statistic.

    Parameters
    ----------
    data : array, shape (n_subjects, n_conditions)
        Binary data (0/1)

    Returns
    -------
    Q : float
        Cochran's Q statistic
    df : int
        Degrees of freedom

    Notes
    -----
    Q follows chi-square distribution with k-1 degrees of freedom.
    """
    n, k = data.shape

    # Column sums (successes per condition)
    G = data.sum(axis=0)

    # Row sums (successes per subject)
    L = data.sum(axis=1)

    # Total successes
    N = data.sum()

    # Cochran's Q
    numerator = (k - 1) * (k * np.sum(G**2) - N**2)
    denominator = k * N - np.sum(L**2)

    if denominator == 0:
        Q = 0.0
    else:
        Q = numerator / denominator

    df = k - 1

    return float(Q), int(df)


def effect_size_cochran(data: np.ndarray) -> float:
    """
    Compute effect size for Cochran's Q (Kendall's W for binary data).

    Parameters
    ----------
    data : array, shape (n_subjects, n_conditions)
        Binary data

    Returns
    -------
    W : float
        Effect size (0 to 1)
    """
    n, k = data.shape

    # Column sums
    G = data.sum(axis=0)

    # Mean column sum
    G_mean = G.mean()

    # Sum of squared deviations
    S = np.sum((G - G_mean) ** 2)

    # Kendall's W for binary data
    W = S / (n * (k - 1) * k / 12)

    # Bound between 0 and 1
    W = min(max(W, 0.0), 1.0)

    return float(W)


def interpret_effect_size(W: float) -> str:
    """Interpret Cochran's Q effect size."""
    if W < 0.1:
        return "negligible"
    elif W < 0.3:
        return "small"
    elif W < 0.5:
        return "medium"
    else:
        return "large"


[docs] def test_cochran_q( # noqa: C901 data: Union[np.ndarray, pd.DataFrame], subject_col: Optional[str] = None, condition_col: Optional[str] = None, value_col: Optional[str] = None, condition_names: Optional[List[str]] = None, alpha: float = 0.05, plot: bool = False, return_as: Literal["dict", "dataframe"] = "dict", decimals: int = 3, ) -> Union[dict, pd.DataFrame, Tuple]: r""" Perform Cochran's Q test for binary repeated measures. Extension of McNemar's test to 3+ conditions. Tests whether proportions of successes differ across multiple related binary measurements. Parameters ---------- data : array or DataFrame - If array: shape (n_subjects, n_conditions), wide format with 0/1 values - If DataFrame with subject_col/condition_col: long format - If DataFrame without: wide format (rows=subjects, cols=conditions) subject_col : str, optional Column name for subject IDs (long format) condition_col : str, optional Column name for conditions (long format) value_col : str, optional Column name for binary values (long format) condition_names : list of str, optional Names for conditions (wide format) alpha : float, default 0.05 Significance level plot : bool, default False Whether to generate visualization return_as : {'dict', 'dataframe'}, default 'dict' Output format decimals : int, default 3 Number of decimal places for rounding Returns ------- result : dict or DataFrame Test results including: - statistic: Cochran's Q statistic - pvalue: p-value - df: Degrees of freedom (k - 1) - effect_size: Kendall's W - effect_size_interpretation: interpretation - n_subjects: Number of subjects - n_conditions: Number of conditions - proportions: Success proportion for each condition - n_successes: Number of successes per condition - significant: Whether to reject null hypothesis - stars: Significance stars If plot=True, returns tuple of (result, figure) Notes ----- Cochran's Q test is used for repeated binary measurements (dichotomous data) on the same subjects across 3+ conditions. **Null Hypothesis (H0)**: Proportions of successes are equal across conditions **Alternative Hypothesis (H1)**: At least one proportion differs **Test Statistic**: .. math:: Q = \\frac{(k-1)[k\\sum_{j=1}^{k}G_j^2 - N^2]}{k\\sum_{i=1}^{n}L_i - \\sum_{i=1}^{n}L_i^2} # noqa: D301 Where: - k: Number of conditions - n: Number of subjects - G_j: Number of successes in condition j - L_i: Number of successes for subject i (across conditions) - N: Total number of successes Q follows chi-square distribution with k-1 degrees of freedom. **Effect Size (Kendall's W for binary)**: .. math:: W = \\frac{\\sum_{j=1}^{k}(G_j - \\bar{G})^2}{n(k-1)k/12} # noqa: D301 Interpretation: - W < 0.1: negligible - W < 0.3: small - W < 0.5: medium - W ≥ 0.5: large **Assumptions**: - Binary outcomes (0/1, success/failure, yes/no) - Repeated measurements on same subjects - At least 3 conditions (for 2 conditions, use McNemar's test) **Relation to other tests**: - Extension of McNemar's test (2 conditions → 3+ conditions) - Binary version of Friedman test - Can use Friedman test on same data (Q ≈ Friedman χ²) **Post-hoc tests**: If significant: - Pairwise McNemar tests - Apply corrections: correct_bonferroni(), correct_holm() **Advantages**: - Appropriate for binary repeated measures - No normality assumption - Accounts for within-subject correlation **Disadvantages**: - Requires binary data - Sensitive to subjects with all 0s or all 1s - Less powerful than parametric alternatives if assumptions met Examples -------- >>> import numpy as np >>> from scitex_stats.tests.categorical import test_cochran_q >>> >>> # Example: Treatment success (0=fail, 1=success) across 4 visits >>> data = np.array([ ... [0, 0, 1, 1], # Subject 1: improved over time ... [0, 1, 1, 1], # Subject 2: improved ... [0, 0, 0, 1], # Subject 3: late improvement ... [1, 1, 1, 1], # Subject 4: always success ... [0, 0, 1, 1], # Subject 5: improved ... ]) >>> >>> result = test_cochran_q( ... data, ... condition_names=['Visit 1', 'Visit 2', 'Visit 3', 'Visit 4'], ... plot=True ... ) >>> >>> print(f"Q = {result['statistic']:.2f}, p = {result['pvalue']:.4f}") >>> print(f"Proportions: {result['proportions']}") References ---------- .. [1] Cochran, W. G. (1950). "The comparison of percentages in matched samples". Biometrika, 37(3/4), 256-266. .. [2] McNemar, Q. (1947). "Note on the sampling error of the difference between correlated proportions or percentages". Psychometrika, 12(2), 153-157. See Also -------- test_mcnemar : For 2 binary conditions test_friedman : Non-parametric repeated measures (non-binary) """ # Convert data to wide format array if isinstance(data, pd.DataFrame): if ( subject_col is not None and condition_col is not None and value_col is not None ): # Long format - pivot to wide data_wide = data.pivot( index=subject_col, columns=condition_col, values=value_col ) data_array = data_wide.values if condition_names is None: condition_names = list(data_wide.columns) else: # Already wide format data_array = data.values if condition_names is None: condition_names = list(data.columns) else: data_array = np.asarray(data) if data_array.ndim != 2: raise ValueError("Data must be 2D (subjects × conditions)") n_subjects, n_conditions = data_array.shape if n_conditions < 3: raise ValueError( "Cochran's Q requires at least 3 conditions. Use test_mcnemar for 2 conditions." ) if n_subjects < 2: raise ValueError("Need at least 2 subjects") # Validate binary data unique_vals = np.unique(data_array) if not np.all(np.isin(unique_vals, [0, 1])): raise ValueError("Data must be binary (0 or 1)") if condition_names is None: condition_names = [f"Condition {i + 1}" for i in range(n_conditions)] # Compute Cochran's Q statistic Q, df = cochran_q_statistic(data_array) # p-value from chi-square distribution pvalue = 1 - stats.chi2.cdf(Q, df) # Compute proportions and counts n_successes = data_array.sum(axis=0) proportions = n_successes / n_subjects # Compute effect size W = effect_size_cochran(data_array) W_interpretation = interpret_effect_size(W) # Build result dictionary result = { "test": "Cochran's Q test", "statistic": round(float(Q), decimals), "pvalue": round(float(pvalue), decimals + 1), "df": int(df), "effect_size": round(float(W), decimals), "effect_size_metric": "kendall_w_binary", "effect_size_interpretation": W_interpretation, "n_subjects": int(n_subjects), "n_conditions": int(n_conditions), "condition_names": condition_names, "n_successes": [int(n) for n in n_successes], "proportions": [round(float(p), decimals) for p in proportions], "alpha": alpha, "significant": pvalue < alpha, "stars": p2stars(pvalue), "H0": "Proportions of successes are equal across conditions", } # Generate plot if requested fig = None if plot and HAS_PLT: fig = _plot_cochran_q(data_array, result, condition_names) # Return based on format if return_as == "dataframe": result_df = pd.DataFrame([result]) if plot and fig is not None: return result_df, fig return result_df else: if plot and fig is not None: return result, fig return result
def _plot_cochran_q(data, result, condition_names): """Create visualization for Cochran's Q test.""" fig, axes = plt.subplots(1, 3) n_subjects, n_conditions = data.shape proportions = result["proportions"] n_successes = result["n_successes"] # Panel 1: Stacked bar chart (individual subjects) ax = axes[0] bottom = np.zeros(n_conditions) for i in range(n_subjects): ax.bar( range(n_conditions), data[i, :], bottom=bottom, ) bottom += data[i, :] ax.set_xticks(range(n_conditions)) ax.set_xticklabels(condition_names, rotation=45, ha="right") ax.set_xlabel("Condition") ax.set_ylabel("Cumulative Successes") ax.set_title("Individual Subject Patterns") ax.set_ylim([0, n_subjects]) # Panel 2: Proportion bar chart ax = axes[1] bars = ax.bar( range(n_conditions), proportions, ) # Add count labels for i, (bar, count) in enumerate(zip(bars, n_successes)): height = bar.get_height() ax.text( bar.get_x() + bar.get_width() / 2.0, height, f"{count}/{n_subjects}\n({proportions[i]:.1%})", ha="center", va="bottom", ) ax.set_xticks(range(n_conditions)) ax.set_xticklabels(condition_names, rotation=45, ha="right") ax.set_xlabel("Condition") ax.set_ylabel("Proportion of Successes") ax.set_title("Success Proportions") ax.set_ylim([0, 1.0]) # Add overall mean line mean_prop = np.mean(proportions) ax.axhline( y=mean_prop, linestyle="--", alpha=0.5, label=f"Mean: {mean_prop:.2%}", ) ax.legend() # Panel 3: Cochran's Q Test stats ax = axes[2] ax.axis("off") ax.set_title("Cochran's Q Test") # Add stats text box stars_text = result["stars"].replace("ns", "$n$s") text_str = ( f"$Q$ = {result['statistic']:.3f} {stars_text}\n" f"$p$ = {result['pvalue']:.4f}\n" f"$k$ = {result['n_conditions']}" ) ax.text( 0.5, 0.5, text_str, transform=ax.transAxes, verticalalignment="center", horizontalalignment="center", color="black", ) return fig # Demo: python -m scitex_stats.tests.categorical._demo_cochran_q # EOF