Source code for scitex_stats.tests.categorical._test_cochran_q

#!/usr/bin/env python3
# Timestamp: "2026-02-12 02:21:50 (ywatanabe)"
# File: scitex_stats/tests/categorical/_test_cochran_q.py

r"""Cochran's Q test for binary repeated measures.

Functionalities:
  - Perform Cochran's Q test for binary repeated measures
  - Extension of McNemar's test to 3+ conditions
  - Test for differences in success proportions across conditions
  - Generate proportion visualizations

Dependencies:
  - packages: numpy, pandas, scipy, matplotlib

IO:
  - input: Binary data (subjects × conditions)
  - output: Test results (dict or DataFrame) and optional figure
"""

from typing import List, Literal, Optional, Tuple, Union

import numpy as np
import pandas as pd
from scipy import stats

from scitex_stats._logging import getLogger
from scitex_stats._utils._formatters import p2stars

__FILE__ = __file__

logger = getLogger(__name__)

try:
    import matplotlib.pyplot as plt  # noqa: E402

    HAS_PLT = True
except ImportError:
    HAS_PLT = False


def cochran_q_statistic(data: np.ndarray) -> Tuple[float, int]:
    """
    Compute Cochran's Q statistic.

    Parameters
    ----------
    data : array, shape (n_subjects, n_conditions)
        Binary data (0/1)

    Returns
    -------
    Q : float
        Cochran's Q statistic
    df : int
        Degrees of freedom

    Notes
    -----
    Q follows chi-square distribution with k-1 degrees of freedom.
    """
    n, k = data.shape

    # Column sums (successes per condition)
    G = data.sum(axis=0)

    # Row sums (successes per subject)
    L = data.sum(axis=1)

    # Total successes
    N = data.sum()

    # Cochran's Q
    numerator = (k - 1) * (k * np.sum(G**2) - N**2)
    denominator = k * N - np.sum(L**2)

    if denominator == 0:
        Q = 0.0
    else:
        Q = numerator / denominator

    df = k - 1

    return float(Q), int(df)


def effect_size_cochran(data: np.ndarray) -> float:
    """
    Compute effect size for Cochran's Q (Kendall's W for binary data).

    Parameters
    ----------
    data : array, shape (n_subjects, n_conditions)
        Binary data

    Returns
    -------
    W : float
        Effect size (0 to 1)
    """
    n, k = data.shape

    # Column sums
    G = data.sum(axis=0)

    # Mean column sum
    G_mean = G.mean()

    # Sum of squared deviations
    S = np.sum((G - G_mean) ** 2)

    # Kendall's W for binary data
    W = S / (n * (k - 1) * k / 12)

    # Bound between 0 and 1
    W = min(max(W, 0.0), 1.0)

    return float(W)


def interpret_effect_size(W: float) -> str:
    """Interpret Cochran's Q effect size."""
    if W < 0.1:
        return "negligible"
    elif W < 0.3:
        return "small"
    elif W < 0.5:
        return "medium"
    else:
        return "large"



[docs]
def test_cochran_q(  # noqa: C901
    data: Union[np.ndarray, pd.DataFrame],
    subject_col: Optional[str] = None,
    condition_col: Optional[str] = None,
    value_col: Optional[str] = None,
    condition_names: Optional[List[str]] = None,
    alpha: float = 0.05,
    plot: bool = False,
    return_as: Literal["dict", "dataframe"] = "dict",
    decimals: int = 3,
) -> Union[dict, pd.DataFrame, Tuple]:
    r"""
    Perform Cochran's Q test for binary repeated measures.

    Extension of McNemar's test to 3+ conditions. Tests whether proportions
    of successes differ across multiple related binary measurements.

    Parameters
    ----------
    data : array or DataFrame
        - If array: shape (n_subjects, n_conditions), wide format with 0/1 values
        - If DataFrame with subject_col/condition_col: long format
        - If DataFrame without: wide format (rows=subjects, cols=conditions)
    subject_col : str, optional
        Column name for subject IDs (long format)
    condition_col : str, optional
        Column name for conditions (long format)
    value_col : str, optional
        Column name for binary values (long format)
    condition_names : list of str, optional
        Names for conditions (wide format)
    alpha : float, default 0.05
        Significance level
    plot : bool, default False
        Whether to generate visualization
    return_as : {'dict', 'dataframe'}, default 'dict'
        Output format
    decimals : int, default 3
        Number of decimal places for rounding

    Returns
    -------
    result : dict or DataFrame
        Test results including:
        - statistic: Cochran's Q statistic
        - pvalue: p-value
        - df: Degrees of freedom (k - 1)
        - effect_size: Kendall's W
        - effect_size_interpretation: interpretation
        - n_subjects: Number of subjects
        - n_conditions: Number of conditions
        - proportions: Success proportion for each condition
        - n_successes: Number of successes per condition
        - significant: Whether to reject null hypothesis
        - stars: Significance stars

    If plot=True, returns tuple of (result, figure)

    Notes
    -----
    Cochran's Q test is used for repeated binary measurements (dichotomous data)
    on the same subjects across 3+ conditions.

    **Null Hypothesis (H0)**: Proportions of successes are equal across conditions

    **Alternative Hypothesis (H1)**: At least one proportion differs

    **Test Statistic**:

    .. math::
        Q = \\frac{(k-1)[k\\sum_{j=1}^{k}G_j^2 - N^2]}{k\\sum_{i=1}^{n}L_i - \\sum_{i=1}^{n}L_i^2}  # noqa: D301

    Where:
    - k: Number of conditions
    - n: Number of subjects
    - G_j: Number of successes in condition j
    - L_i: Number of successes for subject i (across conditions)
    - N: Total number of successes

    Q follows chi-square distribution with k-1 degrees of freedom.

    **Effect Size (Kendall's W for binary)**:

    .. math::
        W = \\frac{\\sum_{j=1}^{k}(G_j - \\bar{G})^2}{n(k-1)k/12}  # noqa: D301

    Interpretation:
    - W < 0.1: negligible
    - W < 0.3: small
    - W < 0.5: medium
    - W ≥ 0.5: large

    **Assumptions**:
    - Binary outcomes (0/1, success/failure, yes/no)
    - Repeated measurements on same subjects
    - At least 3 conditions (for 2 conditions, use McNemar's test)

    **Relation to other tests**:
    - Extension of McNemar's test (2 conditions → 3+ conditions)
    - Binary version of Friedman test
    - Can use Friedman test on same data (Q ≈ Friedman χ²)

    **Post-hoc tests**:
    If significant:
    - Pairwise McNemar tests
    - Apply corrections: correct_bonferroni(), correct_holm()

    **Advantages**:
    - Appropriate for binary repeated measures
    - No normality assumption
    - Accounts for within-subject correlation

    **Disadvantages**:
    - Requires binary data
    - Sensitive to subjects with all 0s or all 1s
    - Less powerful than parametric alternatives if assumptions met

    Examples
    --------
    >>> import numpy as np
    >>> from scitex_stats.tests.categorical import test_cochran_q
    >>>
    >>> # Example: Treatment success (0=fail, 1=success) across 4 visits
    >>> data = np.array([
    ...     [0, 0, 1, 1],  # Subject 1: improved over time
    ...     [0, 1, 1, 1],  # Subject 2: improved
    ...     [0, 0, 0, 1],  # Subject 3: late improvement
    ...     [1, 1, 1, 1],  # Subject 4: always success
    ...     [0, 0, 1, 1],  # Subject 5: improved
    ... ])
    >>>
    >>> result = test_cochran_q(
    ...     data,
    ...     condition_names=['Visit 1', 'Visit 2', 'Visit 3', 'Visit 4'],
    ...     plot=True
    ... )
    >>>
    >>> print(f"Q = {result['statistic']:.2f}, p = {result['pvalue']:.4f}")
    >>> print(f"Proportions: {result['proportions']}")

    References
    ----------
    .. [1] Cochran, W. G. (1950). "The comparison of percentages in matched
           samples". Biometrika, 37(3/4), 256-266.
    .. [2] McNemar, Q. (1947). "Note on the sampling error of the difference
           between correlated proportions or percentages". Psychometrika,
           12(2), 153-157.

    See Also
    --------
    test_mcnemar : For 2 binary conditions
    test_friedman : Non-parametric repeated measures (non-binary)
    """
    # Convert data to wide format array
    if isinstance(data, pd.DataFrame):
        if (
            subject_col is not None
            and condition_col is not None
            and value_col is not None
        ):
            # Long format - pivot to wide
            data_wide = data.pivot(
                index=subject_col, columns=condition_col, values=value_col
            )
            data_array = data_wide.values
            if condition_names is None:
                condition_names = list(data_wide.columns)
        else:
            # Already wide format
            data_array = data.values
            if condition_names is None:
                condition_names = list(data.columns)
    else:
        data_array = np.asarray(data)
        if data_array.ndim != 2:
            raise ValueError("Data must be 2D (subjects × conditions)")

    n_subjects, n_conditions = data_array.shape

    if n_conditions < 3:
        raise ValueError(
            "Cochran's Q requires at least 3 conditions. Use test_mcnemar for 2 conditions."
        )

    if n_subjects < 2:
        raise ValueError("Need at least 2 subjects")

    # Validate binary data
    unique_vals = np.unique(data_array)
    if not np.all(np.isin(unique_vals, [0, 1])):
        raise ValueError("Data must be binary (0 or 1)")

    if condition_names is None:
        condition_names = [f"Condition {i + 1}" for i in range(n_conditions)]

    # Compute Cochran's Q statistic
    Q, df = cochran_q_statistic(data_array)

    # p-value from chi-square distribution
    pvalue = 1 - stats.chi2.cdf(Q, df)

    # Compute proportions and counts
    n_successes = data_array.sum(axis=0)
    proportions = n_successes / n_subjects

    # Compute effect size
    W = effect_size_cochran(data_array)
    W_interpretation = interpret_effect_size(W)

    # Build result dictionary
    result = {
        "test": "Cochran's Q test",
        "statistic": round(float(Q), decimals),
        "pvalue": round(float(pvalue), decimals + 1),
        "df": int(df),
        "effect_size": round(float(W), decimals),
        "effect_size_metric": "kendall_w_binary",
        "effect_size_interpretation": W_interpretation,
        "n_subjects": int(n_subjects),
        "n_conditions": int(n_conditions),
        "condition_names": condition_names,
        "n_successes": [int(n) for n in n_successes],
        "proportions": [round(float(p), decimals) for p in proportions],
        "alpha": alpha,
        "significant": pvalue < alpha,
        "stars": p2stars(pvalue),
        "H0": "Proportions of successes are equal across conditions",
    }

    # Generate plot if requested
    fig = None
    if plot and HAS_PLT:
        fig = _plot_cochran_q(data_array, result, condition_names)

    # Return based on format
    if return_as == "dataframe":
        result_df = pd.DataFrame([result])
        if plot and fig is not None:
            return result_df, fig
        return result_df
    else:
        if plot and fig is not None:
            return result, fig
        return result



def _plot_cochran_q(data, result, condition_names):
    """Create visualization for Cochran's Q test."""
    fig, axes = plt.subplots(1, 3)

    n_subjects, n_conditions = data.shape
    proportions = result["proportions"]
    n_successes = result["n_successes"]

    # Panel 1: Stacked bar chart (individual subjects)
    ax = axes[0]
    bottom = np.zeros(n_conditions)

    for i in range(n_subjects):
        ax.bar(
            range(n_conditions),
            data[i, :],
            bottom=bottom,
        )
        bottom += data[i, :]

    ax.set_xticks(range(n_conditions))
    ax.set_xticklabels(condition_names, rotation=45, ha="right")
    ax.set_xlabel("Condition")
    ax.set_ylabel("Cumulative Successes")
    ax.set_title("Individual Subject Patterns")
    ax.set_ylim([0, n_subjects])

    # Panel 2: Proportion bar chart
    ax = axes[1]
    bars = ax.bar(
        range(n_conditions),
        proportions,
    )

    # Add count labels
    for i, (bar, count) in enumerate(zip(bars, n_successes)):
        height = bar.get_height()
        ax.text(
            bar.get_x() + bar.get_width() / 2.0,
            height,
            f"{count}/{n_subjects}\n({proportions[i]:.1%})",
            ha="center",
            va="bottom",
        )

    ax.set_xticks(range(n_conditions))
    ax.set_xticklabels(condition_names, rotation=45, ha="right")
    ax.set_xlabel("Condition")
    ax.set_ylabel("Proportion of Successes")
    ax.set_title("Success Proportions")
    ax.set_ylim([0, 1.0])

    # Add overall mean line
    mean_prop = np.mean(proportions)
    ax.axhline(
        y=mean_prop,
        linestyle="--",
        alpha=0.5,
        label=f"Mean: {mean_prop:.2%}",
    )
    ax.legend()

    # Panel 3: Cochran's Q Test stats
    ax = axes[2]
    ax.axis("off")
    ax.set_title("Cochran's Q Test")

    # Add stats text box
    stars_text = result["stars"].replace("ns", "$n$s")
    text_str = (
        f"$Q$ = {result['statistic']:.3f} {stars_text}\n"
        f"$p$ = {result['pvalue']:.4f}\n"
        f"$k$ = {result['n_conditions']}"
    )
    ax.text(
        0.5,
        0.5,
        text_str,
        transform=ax.transAxes,
        verticalalignment="center",
        horizontalalignment="center",
        color="black",
    )

    return fig


# Demo: python -m scitex_stats.tests.categorical._demo_cochran_q

# EOF