Source code for sctrial.stats.power

"""Power analysis utilities for trial planning.

This module provides sample size and power calculations for both
**two-arm DiD** and **single-arm paired** (pre/post) designs in
single-cell studies.

Mathematical Background
-----------------------
**Two-arm DiD** (treatment × time interaction):

    Power = Φ(|δ|/SE - z_{1-α/2}),   SE = σ × √(4/n)

    n = 4σ²(z_{1-α/2} + z_{1-β})² / δ²

**Single-arm paired** (within-arm pre→post change):

    Power = Φ(|δ|/SE - z_{1-α/2}),   SE = σ × √(2/n)

    n = 2σ²(z_{1-α/2} + z_{1-β})² / δ²

The paired design has half the variance of DiD (factor 2 vs 4)
because it involves one arm instead of two.

For cluster-robust inference with ICC (intraclass correlation):

    Design effect = 1 + (m-1)×ICC

where m = average cluster size.

The effective sample size is:
    n_eff = n / design_effect

Key Considerations for Single-Cell Studies
-------------------------------------------
1. **Unit of analysis**: Participants, not cells. Power is driven by the
   number of participants, not cells per participant.

2. **ICC matters**: High within-participant correlation (cells from same donor
   are similar) inflates variance estimates. Account for this in power calculations.

3. **Cell-type stratification**: Testing multiple cell types increases multiple
   testing burden but may reveal cell-type-specific effects.

4. **Effect size uncertainty**: Use pilot data or published studies to estimate
   expected effect sizes. Be conservative.
"""

from __future__ import annotations

import numpy as np
from scipy import stats

__all__ = [
    "power_did",
    "sample_size_did",
    "power_curve",
    "power_paired",
    "sample_size_paired",
    "sensitivity_paired",
    "design_effect",
    "effective_sample_size",
]



[docs]
def design_effect(
    cluster_size: float,
    icc: float,
) -> float:
    """Calculate the design effect for clustered data.

    The design effect (DEFF) quantifies how much clustering inflates
    variance compared to simple random sampling:

        DEFF = 1 + (m - 1) × ICC

    where m is the average cluster size and ICC is the intraclass
    correlation coefficient.

    Parameters
    ----------
    cluster_size
        Average number of observations per cluster (e.g., cells per participant).
    icc
        Intraclass correlation coefficient (0 to 1).
        - ICC ≈ 0: observations within clusters are independent
        - ICC ≈ 1: observations within clusters are identical

    Returns
    -------
    float
        Design effect. Values > 1 indicate variance inflation.

    Examples
    --------
    >>> # 100 cells per participant, ICC=0.05
    >>> deff = design_effect(100, 0.05)
    >>> print(f"Design effect: {deff:.1f}")
    Design effect: 5.95
    """
    if not (0 <= icc <= 1):
        raise ValueError("icc must be between 0 and 1.")
    if not np.isfinite(cluster_size) or cluster_size <= 0:
        raise ValueError("cluster_size must be > 0.")
    return 1 + (cluster_size - 1) * icc




[docs]
def effective_sample_size(
    n_clusters: int,
    cluster_size: float,
    icc: float,
) -> float:
    """Calculate the effective sample size accounting for clustering.

    The effective sample size is the equivalent number of independent
    observations after accounting for within-cluster correlation:

        n_eff = (n_clusters × cluster_size) / DEFF

    Parameters
    ----------
    n_clusters
        Number of clusters (e.g., participants).
    cluster_size
        Average observations per cluster.
    icc
        Intraclass correlation coefficient.

    Returns
    -------
    float
        Effective sample size.

    Notes
    -----
    For single-cell studies, the effective sample size is typically
    close to the number of participants, not the number of cells,
    when aggregating to participant level.
    """
    if not np.isfinite(n_clusters) or n_clusters <= 0:
        raise ValueError("n_clusters must be > 0.")
    deff = design_effect(cluster_size, icc)
    n_total = n_clusters * cluster_size
    return n_total / deff




[docs]
def power_did(
    n_per_group: int,
    effect_size: float,
    sigma: float = 1.0,
    alpha: float = 0.05,
    two_sided: bool = True,
    icc: float = 0.0,
    cluster_size: float = 1.0,
) -> float:
    """Calculate statistical power for a DiD analysis.

    This function computes the power to detect a given DiD effect
    (treatment × time interaction) with the specified sample size.

    Model:
        Y_it = α + β₁×Treat + β₂×Post + β₃×(Treat×Post) + ε

    H₀: β₃ = 0  (no differential change)
    H₁: β₃ ≠ 0  (treatment causes differential change)

    Parameters
    ----------
    n_per_group
        Number of participants per arm (assumes balanced design).
        Total participants = 2 × n_per_group.
    effect_size
        Expected DiD effect size (β₃) in original units.
        Can also be Cohen's d if sigma=1.
    sigma
        Within-group standard deviation of the outcome.
    alpha
        Significance level (default 0.05).
    two_sided
        If True (default), use two-sided test.
    icc
        Intraclass correlation for cluster adjustment.
    cluster_size
        Average cluster size (e.g., cells per participant).

    Returns
    -------
    float
        Statistical power (probability of rejecting H₀ when H₁ is true).

    Examples
    --------
    >>> # 20 participants per arm, effect = 0.5 SD
    >>> pwr = power_did(n_per_group=20, effect_size=0.5, sigma=1.0)
    >>> print(f"Power: {pwr:.1%}")
    Power: 69.8%

    >>> # With clustering (100 cells/participant, ICC=0.05)
    >>> pwr = power_did(20, 0.5, icc=0.05, cluster_size=100)
    >>> print(f"Power with clustering: {pwr:.1%}")

    Notes
    -----
    For DiD, the variance of the interaction term is approximately:

        Var(β₃) = 4σ² / n

    where n is the total number of participants (not cells).
    """
    if not np.isfinite(n_per_group) or n_per_group < 1:
        raise ValueError("n_per_group must be a finite integer >= 1.")
    if int(n_per_group) != n_per_group:
        raise ValueError("n_per_group must be an integer.")
    if not np.isfinite(effect_size):
        raise ValueError("effect_size must be finite.")
    if not np.isfinite(sigma) or sigma <= 0:
        raise ValueError("sigma must be a finite value > 0.")
    if not (0 < alpha < 1):
        raise ValueError("alpha must be between 0 and 1.")
    if not (0 <= icc <= 1):
        raise ValueError("icc must be between 0 and 1.")
    if not np.isfinite(cluster_size) or cluster_size < 1:
        raise ValueError("cluster_size must be >= 1.")
    if n_per_group < 2:
        return 0.0

    # Total participants
    n_total = 2 * n_per_group

    # Apply design effect if clustering
    if icc > 0 and cluster_size > 1:
        deff = design_effect(cluster_size, icc)
        n_eff = n_total / deff
    else:
        n_eff = n_total

    # Standard error of DiD coefficient
    # For balanced 2x2 DiD: SE = σ × √(4/n)
    se = sigma * np.sqrt(4 / n_eff)

    # Noncentrality parameter
    ncp = abs(effect_size) / se

    # Critical value
    if two_sided:
        z_crit = stats.norm.ppf(1 - alpha / 2)
    else:
        z_crit = stats.norm.ppf(1 - alpha)

    # Power = P(Z > z_crit - ncp) for one-sided
    # For two-sided, use both tails
    if two_sided:
        power = stats.norm.sf(z_crit - ncp) + stats.norm.cdf(-z_crit - ncp)
    else:
        power = stats.norm.sf(z_crit - ncp)

    return float(power)




[docs]
def sample_size_did(
    effect_size: float,
    sigma: float = 1.0,
    power: float = 0.80,
    alpha: float = 0.05,
    two_sided: bool = True,
    icc: float = 0.0,
    cluster_size: float = 1.0,
) -> int:
    """Calculate required sample size for a DiD analysis.

    Determines the number of participants per arm needed to achieve
    the desired power for detecting a given effect size.

    Parameters
    ----------
    effect_size
        Minimum detectable DiD effect size.
    sigma
        Expected within-group standard deviation.
    power
        Desired statistical power (default 0.80).
    alpha
        Significance level (default 0.05).
    two_sided
        If True (default), use two-sided test.
    icc
        Intraclass correlation for cluster adjustment.
    cluster_size
        Average cluster size.

    Returns
    -------
    int
        Required participants per arm (total = 2 × this value).

    Examples
    --------
    >>> # Sample size to detect d=0.5 with 80% power
    >>> n = sample_size_did(effect_size=0.5, sigma=1.0, power=0.80)
    >>> print(f"Need {n} participants per arm ({2*n} total)")

    >>> # With multiple testing correction (Bonferroni for 10 tests)
    >>> n = sample_size_did(effect_size=0.5, alpha=0.05/10, power=0.80)
    >>> print(f"With Bonferroni: {n} per arm")
    """
    if not np.isfinite(effect_size) or effect_size == 0:
        raise ValueError("effect_size must be non-zero for sample size calculation.")
    if effect_size < 0:
        raise ValueError("effect_size must be > 0.")
    if not np.isfinite(sigma) or sigma <= 0:
        raise ValueError("sigma must be a finite value > 0.")
    if not (0 < power < 1):
        raise ValueError("power must be between 0 and 1.")
    if not (0 < alpha < 1):
        raise ValueError("alpha must be between 0 and 1.")
    if not (0 <= icc <= 1):
        raise ValueError("icc must be between 0 and 1.")
    if not np.isfinite(cluster_size) or cluster_size < 1:
        raise ValueError("cluster_size must be >= 1.")

    # Critical values
    if two_sided:
        z_alpha = stats.norm.ppf(1 - alpha / 2)
    else:
        z_alpha = stats.norm.ppf(1 - alpha)
    z_beta = stats.norm.ppf(power)

    # Base sample size formula for DiD
    # n = 4σ²(z_α + z_β)² / δ²
    n_base = 4 * (sigma**2) * ((z_alpha + z_beta) ** 2) / (effect_size**2)

    # Apply design effect
    if icc > 0 and cluster_size > 1:
        deff = design_effect(cluster_size, icc)
        n_required = n_base * deff
    else:
        n_required = n_base

    # Participants per arm (divide total by 2)
    n_per_arm = int(np.ceil(n_required / 2))

    return max(n_per_arm, 2)  # Minimum 2 per arm




[docs]
def power_curve(
    n_range: tuple[int, int] = (5, 50),
    effect_size: float = 0.5,
    sigma: float = 1.0,
    alpha: float = 0.05,
    icc: float = 0.0,
    cluster_size: float = 1.0,
    n_points: int = 20,
) -> tuple[np.ndarray, np.ndarray]:
    """Generate a power curve across sample sizes.

    Parameters
    ----------
    n_range
        (min, max) participants per arm.
    effect_size
        Expected DiD effect size.
    sigma
        Within-group standard deviation.
    alpha
        Significance level.
    icc
        Intraclass correlation.
    cluster_size
        Average cluster size.
    n_points
        Number of points to compute.

    Returns
    -------
    tuple[np.ndarray, np.ndarray]
        (sample_sizes, powers) arrays.

    Examples
    --------
    >>> ns, powers = power_curve(n_range=(10, 100), effect_size=0.3)
    >>> import matplotlib.pyplot as plt
    >>> plt.plot(ns, powers)
    >>> plt.axhline(0.8, linestyle='--', label='80% power')
    >>> plt.xlabel('Participants per arm')
    >>> plt.ylabel('Power')
    >>> plt.show()
    """
    ns = np.linspace(n_range[0], n_range[1], n_points).astype(int)
    powers = np.array(
        [power_did(n, effect_size, sigma, alpha, icc=icc, cluster_size=cluster_size) for n in ns]
    )
    return ns, powers



def sensitivity_analysis(
    n_per_group: int,
    sigma: float = 1.0,
    power: float = 0.80,
    alpha: float = 0.05,
    two_sided: bool = True,
) -> float:
    """Calculate the minimum detectable effect size for given power.

    This is the inverse of sample_size_did: given a fixed sample size,
    what is the smallest effect that can be detected?

    Parameters
    ----------
    n_per_group
        Number of participants per arm.
    sigma
        Within-group standard deviation.
    power
        Desired power.
    alpha
        Significance level.
    two_sided
        Use two-sided test.

    Returns
    -------
    float
        Minimum detectable effect size.

    Examples
    --------
    >>> # What effect can we detect with 15 per arm?
    >>> mde = sensitivity_analysis(n_per_group=15, power=0.80)
    >>> print(f"Minimum detectable effect: {mde:.2f} SD")
    """
    if n_per_group < 2:
        return np.inf

    n_total = 2 * n_per_group

    if two_sided:
        z_alpha = stats.norm.ppf(1 - alpha / 2)
    else:
        z_alpha = stats.norm.ppf(1 - alpha)
    z_beta = stats.norm.ppf(power)

    # Solve for effect size: δ = σ × √(4/n) × (z_α + z_β)
    se_unit = sigma * np.sqrt(4 / n_total)
    mde = se_unit * (z_alpha + z_beta)

    return mde


# ======================================================================
# Paired (single-arm pre/post) power functions
# ======================================================================



[docs]
def power_paired(
    n_participants: int,
    effect_size: float,
    sigma: float = 1.0,
    alpha: float = 0.05,
    two_sided: bool = True,
) -> float:
    """Calculate statistical power for a paired pre/post comparison.

    This function computes the power to detect a given within-arm
    longitudinal effect (post − pre) with the specified sample size.

    Model:
        Y_post - Y_pre = δ + ε   (paired differences)

    H₀: δ = 0  (no change)
    H₁: δ ≠ 0  (treatment causes change)

    Parameters
    ----------
    n_participants
        Number of participants with paired measurements.
    effect_size
        Expected pre-to-post effect size (δ) in original units.
        Can also be Cohen's d if sigma=1.
    sigma
        Within-group standard deviation of the outcome (not the
        paired difference SD; the paired difference variance is 2σ²).
    alpha
        Significance level (default 0.05).
    two_sided
        If True (default), use two-sided test.

    Returns
    -------
    float
        Statistical power (probability of rejecting H₀ when H₁ is true).

    Examples
    --------
    >>> # 6 participants, effect = 0.8 SD
    >>> pwr = power_paired(n_participants=6, effect_size=0.8, sigma=1.0)
    >>> print(f"Power: {pwr:.1%}")

    Notes
    -----
    For paired pre/post, the variance of the time coefficient is:

        Var(β_time) = 2σ² / n

    where n is the number of paired participants.  This is half the
    variance of a DiD coefficient (4σ²/n) because there is only one
    arm instead of two.
    """
    if not np.isfinite(n_participants) or n_participants < 1:
        raise ValueError("n_participants must be a finite integer >= 1.")
    if int(n_participants) != n_participants:
        raise ValueError("n_participants must be an integer.")
    if not np.isfinite(effect_size):
        raise ValueError("effect_size must be finite.")
    if not np.isfinite(sigma) or sigma <= 0:
        raise ValueError("sigma must be a finite value > 0.")
    if not (0 < alpha < 1):
        raise ValueError("alpha must be between 0 and 1.")
    if n_participants < 2:
        return 0.0

    # Standard error for paired comparison: SE = σ × √(2/n)
    se = sigma * np.sqrt(2.0 / n_participants)

    # Noncentrality parameter
    ncp = abs(effect_size) / se

    # Critical value
    if two_sided:
        z_crit = stats.norm.ppf(1 - alpha / 2)
    else:
        z_crit = stats.norm.ppf(1 - alpha)

    # Power
    if two_sided:
        power = stats.norm.sf(z_crit - ncp) + stats.norm.cdf(-z_crit - ncp)
    else:
        power = stats.norm.sf(z_crit - ncp)

    return float(power)




[docs]
def sample_size_paired(
    effect_size: float,
    sigma: float = 1.0,
    power: float = 0.80,
    alpha: float = 0.05,
    two_sided: bool = True,
) -> int:
    """Calculate required sample size for a paired pre/post comparison.

    Determines the number of participants with paired measurements
    needed to achieve the desired power.

    Parameters
    ----------
    effect_size
        Minimum detectable effect size (must be > 0).
    sigma
        Expected within-group standard deviation.
    power
        Desired statistical power (default 0.80).
    alpha
        Significance level (default 0.05).
    two_sided
        If True (default), use two-sided test.

    Returns
    -------
    int
        Required number of paired participants.

    Examples
    --------
    >>> n = sample_size_paired(effect_size=0.5, sigma=1.0, power=0.80)
    >>> print(f"Need {n} paired participants")
    """
    if not np.isfinite(effect_size) or effect_size <= 0:
        raise ValueError("effect_size must be a finite value > 0.")
    if not np.isfinite(sigma) or sigma <= 0:
        raise ValueError("sigma must be a finite value > 0.")
    if not (0 < power < 1):
        raise ValueError("power must be between 0 and 1.")
    if not (0 < alpha < 1):
        raise ValueError("alpha must be between 0 and 1.")

    if two_sided:
        z_alpha = stats.norm.ppf(1 - alpha / 2)
    else:
        z_alpha = stats.norm.ppf(1 - alpha)
    z_beta = stats.norm.ppf(power)

    # n = 2σ²(z_α + z_β)² / δ²
    n = 2.0 * sigma**2 * (z_alpha + z_beta) ** 2 / effect_size**2

    return max(int(np.ceil(n)), 2)




[docs]
def sensitivity_paired(
    n_participants: int,
    sigma: float = 1.0,
    power: float = 0.80,
    alpha: float = 0.05,
    two_sided: bool = True,
) -> float:
    """Calculate the minimum detectable effect for a paired comparison.

    Given a fixed sample size, what is the smallest effect that can
    be detected with the desired power?

    Parameters
    ----------
    n_participants
        Number of paired participants.
    sigma
        Within-group standard deviation.
    power
        Desired power.
    alpha
        Significance level.
    two_sided
        Use two-sided test.

    Returns
    -------
    float
        Minimum detectable effect size.

    Examples
    --------
    >>> mde = sensitivity_paired(n_participants=6, power=0.80)
    >>> print(f"Minimum detectable effect: {mde:.2f} SD")
    """
    if n_participants < 2:
        return np.inf

    if two_sided:
        z_alpha = stats.norm.ppf(1 - alpha / 2)
    else:
        z_alpha = stats.norm.ppf(1 - alpha)
    z_beta = stats.norm.ppf(power)

    # δ = σ × √(2/n) × (z_α + z_β)
    se_unit = sigma * np.sqrt(2.0 / n_participants)
    return float(se_unit * (z_alpha + z_beta))