"""Power analysis utilities for trial planning.
This module provides sample size and power calculations for both
**two-arm DiD** and **single-arm paired** (pre/post) designs in
single-cell studies.
Mathematical Background
-----------------------
**Two-arm DiD** (treatment × time interaction):
Power = Φ(|δ|/SE - z_{1-α/2}), SE = σ × √(4/n)
n = 4σ²(z_{1-α/2} + z_{1-β})² / δ²
**Single-arm paired** (within-arm pre→post change):
Power = Φ(|δ|/SE - z_{1-α/2}), SE = σ × √(2/n)
n = 2σ²(z_{1-α/2} + z_{1-β})² / δ²
The paired design has half the variance of DiD (factor 2 vs 4)
because it involves one arm instead of two.
For cluster-robust inference with ICC (intraclass correlation):
Design effect = 1 + (m-1)×ICC
where m = average cluster size.
The effective sample size is:
n_eff = n / design_effect
Key Considerations for Single-Cell Studies
-------------------------------------------
1. **Unit of analysis**: Participants, not cells. Power is driven by the
number of participants, not cells per participant.
2. **ICC matters**: High within-participant correlation (cells from same donor
are similar) inflates variance estimates. Account for this in power calculations.
3. **Cell-type stratification**: Testing multiple cell types increases multiple
testing burden but may reveal cell-type-specific effects.
4. **Effect size uncertainty**: Use pilot data or published studies to estimate
expected effect sizes. Be conservative.
"""
from __future__ import annotations
import numpy as np
from scipy import stats
__all__ = [
"power_did",
"sample_size_did",
"power_curve",
"power_paired",
"sample_size_paired",
"sensitivity_paired",
"design_effect",
"effective_sample_size",
]
[docs]
def design_effect(
cluster_size: float,
icc: float,
) -> float:
"""Calculate the design effect for clustered data.
The design effect (DEFF) quantifies how much clustering inflates
variance compared to simple random sampling:
DEFF = 1 + (m - 1) × ICC
where m is the average cluster size and ICC is the intraclass
correlation coefficient.
Parameters
----------
cluster_size
Average number of observations per cluster (e.g., cells per participant).
icc
Intraclass correlation coefficient (0 to 1).
- ICC ≈ 0: observations within clusters are independent
- ICC ≈ 1: observations within clusters are identical
Returns
-------
float
Design effect. Values > 1 indicate variance inflation.
Examples
--------
>>> # 100 cells per participant, ICC=0.05
>>> deff = design_effect(100, 0.05)
>>> print(f"Design effect: {deff:.1f}")
Design effect: 5.95
"""
if not (0 <= icc <= 1):
raise ValueError("icc must be between 0 and 1.")
if not np.isfinite(cluster_size) or cluster_size <= 0:
raise ValueError("cluster_size must be > 0.")
return 1 + (cluster_size - 1) * icc
[docs]
def effective_sample_size(
n_clusters: int,
cluster_size: float,
icc: float,
) -> float:
"""Calculate the effective sample size accounting for clustering.
The effective sample size is the equivalent number of independent
observations after accounting for within-cluster correlation:
n_eff = (n_clusters × cluster_size) / DEFF
Parameters
----------
n_clusters
Number of clusters (e.g., participants).
cluster_size
Average observations per cluster.
icc
Intraclass correlation coefficient.
Returns
-------
float
Effective sample size.
Notes
-----
For single-cell studies, the effective sample size is typically
close to the number of participants, not the number of cells,
when aggregating to participant level.
"""
if not np.isfinite(n_clusters) or n_clusters <= 0:
raise ValueError("n_clusters must be > 0.")
deff = design_effect(cluster_size, icc)
n_total = n_clusters * cluster_size
return n_total / deff
[docs]
def power_did(
n_per_group: int,
effect_size: float,
sigma: float = 1.0,
alpha: float = 0.05,
two_sided: bool = True,
icc: float = 0.0,
cluster_size: float = 1.0,
) -> float:
"""Calculate statistical power for a DiD analysis.
This function computes the power to detect a given DiD effect
(treatment × time interaction) with the specified sample size.
Model:
Y_it = α + β₁×Treat + β₂×Post + β₃×(Treat×Post) + ε
H₀: β₃ = 0 (no differential change)
H₁: β₃ ≠ 0 (treatment causes differential change)
Parameters
----------
n_per_group
Number of participants per arm (assumes balanced design).
Total participants = 2 × n_per_group.
effect_size
Expected DiD effect size (β₃) in original units.
Can also be Cohen's d if sigma=1.
sigma
Within-group standard deviation of the outcome.
alpha
Significance level (default 0.05).
two_sided
If True (default), use two-sided test.
icc
Intraclass correlation for cluster adjustment.
cluster_size
Average cluster size (e.g., cells per participant).
Returns
-------
float
Statistical power (probability of rejecting H₀ when H₁ is true).
Examples
--------
>>> # 20 participants per arm, effect = 0.5 SD
>>> pwr = power_did(n_per_group=20, effect_size=0.5, sigma=1.0)
>>> print(f"Power: {pwr:.1%}")
Power: 69.8%
>>> # With clustering (100 cells/participant, ICC=0.05)
>>> pwr = power_did(20, 0.5, icc=0.05, cluster_size=100)
>>> print(f"Power with clustering: {pwr:.1%}")
Notes
-----
For DiD, the variance of the interaction term is approximately:
Var(β₃) = 4σ² / n
where n is the total number of participants (not cells).
"""
if not np.isfinite(n_per_group) or n_per_group < 1:
raise ValueError("n_per_group must be a finite integer >= 1.")
if int(n_per_group) != n_per_group:
raise ValueError("n_per_group must be an integer.")
if not np.isfinite(effect_size):
raise ValueError("effect_size must be finite.")
if not np.isfinite(sigma) or sigma <= 0:
raise ValueError("sigma must be a finite value > 0.")
if not (0 < alpha < 1):
raise ValueError("alpha must be between 0 and 1.")
if not (0 <= icc <= 1):
raise ValueError("icc must be between 0 and 1.")
if not np.isfinite(cluster_size) or cluster_size < 1:
raise ValueError("cluster_size must be >= 1.")
if n_per_group < 2:
return 0.0
# Total participants
n_total = 2 * n_per_group
# Apply design effect if clustering
if icc > 0 and cluster_size > 1:
deff = design_effect(cluster_size, icc)
n_eff = n_total / deff
else:
n_eff = n_total
# Standard error of DiD coefficient
# For balanced 2x2 DiD: SE = σ × √(4/n)
se = sigma * np.sqrt(4 / n_eff)
# Noncentrality parameter
ncp = abs(effect_size) / se
# Critical value
if two_sided:
z_crit = stats.norm.ppf(1 - alpha / 2)
else:
z_crit = stats.norm.ppf(1 - alpha)
# Power = P(Z > z_crit - ncp) for one-sided
# For two-sided, use both tails
if two_sided:
power = stats.norm.sf(z_crit - ncp) + stats.norm.cdf(-z_crit - ncp)
else:
power = stats.norm.sf(z_crit - ncp)
return float(power)
[docs]
def sample_size_did(
effect_size: float,
sigma: float = 1.0,
power: float = 0.80,
alpha: float = 0.05,
two_sided: bool = True,
icc: float = 0.0,
cluster_size: float = 1.0,
) -> int:
"""Calculate required sample size for a DiD analysis.
Determines the number of participants per arm needed to achieve
the desired power for detecting a given effect size.
Parameters
----------
effect_size
Minimum detectable DiD effect size.
sigma
Expected within-group standard deviation.
power
Desired statistical power (default 0.80).
alpha
Significance level (default 0.05).
two_sided
If True (default), use two-sided test.
icc
Intraclass correlation for cluster adjustment.
cluster_size
Average cluster size.
Returns
-------
int
Required participants per arm (total = 2 × this value).
Examples
--------
>>> # Sample size to detect d=0.5 with 80% power
>>> n = sample_size_did(effect_size=0.5, sigma=1.0, power=0.80)
>>> print(f"Need {n} participants per arm ({2*n} total)")
>>> # With multiple testing correction (Bonferroni for 10 tests)
>>> n = sample_size_did(effect_size=0.5, alpha=0.05/10, power=0.80)
>>> print(f"With Bonferroni: {n} per arm")
"""
if not np.isfinite(effect_size) or effect_size == 0:
raise ValueError("effect_size must be non-zero for sample size calculation.")
if effect_size < 0:
raise ValueError("effect_size must be > 0.")
if not np.isfinite(sigma) or sigma <= 0:
raise ValueError("sigma must be a finite value > 0.")
if not (0 < power < 1):
raise ValueError("power must be between 0 and 1.")
if not (0 < alpha < 1):
raise ValueError("alpha must be between 0 and 1.")
if not (0 <= icc <= 1):
raise ValueError("icc must be between 0 and 1.")
if not np.isfinite(cluster_size) or cluster_size < 1:
raise ValueError("cluster_size must be >= 1.")
# Critical values
if two_sided:
z_alpha = stats.norm.ppf(1 - alpha / 2)
else:
z_alpha = stats.norm.ppf(1 - alpha)
z_beta = stats.norm.ppf(power)
# Base sample size formula for DiD
# n = 4σ²(z_α + z_β)² / δ²
n_base = 4 * (sigma**2) * ((z_alpha + z_beta) ** 2) / (effect_size**2)
# Apply design effect
if icc > 0 and cluster_size > 1:
deff = design_effect(cluster_size, icc)
n_required = n_base * deff
else:
n_required = n_base
# Participants per arm (divide total by 2)
n_per_arm = int(np.ceil(n_required / 2))
return max(n_per_arm, 2) # Minimum 2 per arm
[docs]
def power_curve(
n_range: tuple[int, int] = (5, 50),
effect_size: float = 0.5,
sigma: float = 1.0,
alpha: float = 0.05,
icc: float = 0.0,
cluster_size: float = 1.0,
n_points: int = 20,
) -> tuple[np.ndarray, np.ndarray]:
"""Generate a power curve across sample sizes.
Parameters
----------
n_range
(min, max) participants per arm.
effect_size
Expected DiD effect size.
sigma
Within-group standard deviation.
alpha
Significance level.
icc
Intraclass correlation.
cluster_size
Average cluster size.
n_points
Number of points to compute.
Returns
-------
tuple[np.ndarray, np.ndarray]
(sample_sizes, powers) arrays.
Examples
--------
>>> ns, powers = power_curve(n_range=(10, 100), effect_size=0.3)
>>> import matplotlib.pyplot as plt
>>> plt.plot(ns, powers)
>>> plt.axhline(0.8, linestyle='--', label='80% power')
>>> plt.xlabel('Participants per arm')
>>> plt.ylabel('Power')
>>> plt.show()
"""
ns = np.linspace(n_range[0], n_range[1], n_points).astype(int)
powers = np.array(
[power_did(n, effect_size, sigma, alpha, icc=icc, cluster_size=cluster_size) for n in ns]
)
return ns, powers
def sensitivity_analysis(
n_per_group: int,
sigma: float = 1.0,
power: float = 0.80,
alpha: float = 0.05,
two_sided: bool = True,
) -> float:
"""Calculate the minimum detectable effect size for given power.
This is the inverse of sample_size_did: given a fixed sample size,
what is the smallest effect that can be detected?
Parameters
----------
n_per_group
Number of participants per arm.
sigma
Within-group standard deviation.
power
Desired power.
alpha
Significance level.
two_sided
Use two-sided test.
Returns
-------
float
Minimum detectable effect size.
Examples
--------
>>> # What effect can we detect with 15 per arm?
>>> mde = sensitivity_analysis(n_per_group=15, power=0.80)
>>> print(f"Minimum detectable effect: {mde:.2f} SD")
"""
if n_per_group < 2:
return np.inf
n_total = 2 * n_per_group
if two_sided:
z_alpha = stats.norm.ppf(1 - alpha / 2)
else:
z_alpha = stats.norm.ppf(1 - alpha)
z_beta = stats.norm.ppf(power)
# Solve for effect size: δ = σ × √(4/n) × (z_α + z_β)
se_unit = sigma * np.sqrt(4 / n_total)
mde = se_unit * (z_alpha + z_beta)
return mde
# ======================================================================
# Paired (single-arm pre/post) power functions
# ======================================================================
[docs]
def power_paired(
n_participants: int,
effect_size: float,
sigma: float = 1.0,
alpha: float = 0.05,
two_sided: bool = True,
) -> float:
"""Calculate statistical power for a paired pre/post comparison.
This function computes the power to detect a given within-arm
longitudinal effect (post − pre) with the specified sample size.
Model:
Y_post - Y_pre = δ + ε (paired differences)
H₀: δ = 0 (no change)
H₁: δ ≠ 0 (treatment causes change)
Parameters
----------
n_participants
Number of participants with paired measurements.
effect_size
Expected pre-to-post effect size (δ) in original units.
Can also be Cohen's d if sigma=1.
sigma
Within-group standard deviation of the outcome (not the
paired difference SD; the paired difference variance is 2σ²).
alpha
Significance level (default 0.05).
two_sided
If True (default), use two-sided test.
Returns
-------
float
Statistical power (probability of rejecting H₀ when H₁ is true).
Examples
--------
>>> # 6 participants, effect = 0.8 SD
>>> pwr = power_paired(n_participants=6, effect_size=0.8, sigma=1.0)
>>> print(f"Power: {pwr:.1%}")
Notes
-----
For paired pre/post, the variance of the time coefficient is:
Var(β_time) = 2σ² / n
where n is the number of paired participants. This is half the
variance of a DiD coefficient (4σ²/n) because there is only one
arm instead of two.
"""
if not np.isfinite(n_participants) or n_participants < 1:
raise ValueError("n_participants must be a finite integer >= 1.")
if int(n_participants) != n_participants:
raise ValueError("n_participants must be an integer.")
if not np.isfinite(effect_size):
raise ValueError("effect_size must be finite.")
if not np.isfinite(sigma) or sigma <= 0:
raise ValueError("sigma must be a finite value > 0.")
if not (0 < alpha < 1):
raise ValueError("alpha must be between 0 and 1.")
if n_participants < 2:
return 0.0
# Standard error for paired comparison: SE = σ × √(2/n)
se = sigma * np.sqrt(2.0 / n_participants)
# Noncentrality parameter
ncp = abs(effect_size) / se
# Critical value
if two_sided:
z_crit = stats.norm.ppf(1 - alpha / 2)
else:
z_crit = stats.norm.ppf(1 - alpha)
# Power
if two_sided:
power = stats.norm.sf(z_crit - ncp) + stats.norm.cdf(-z_crit - ncp)
else:
power = stats.norm.sf(z_crit - ncp)
return float(power)
[docs]
def sample_size_paired(
effect_size: float,
sigma: float = 1.0,
power: float = 0.80,
alpha: float = 0.05,
two_sided: bool = True,
) -> int:
"""Calculate required sample size for a paired pre/post comparison.
Determines the number of participants with paired measurements
needed to achieve the desired power.
Parameters
----------
effect_size
Minimum detectable effect size (must be > 0).
sigma
Expected within-group standard deviation.
power
Desired statistical power (default 0.80).
alpha
Significance level (default 0.05).
two_sided
If True (default), use two-sided test.
Returns
-------
int
Required number of paired participants.
Examples
--------
>>> n = sample_size_paired(effect_size=0.5, sigma=1.0, power=0.80)
>>> print(f"Need {n} paired participants")
"""
if not np.isfinite(effect_size) or effect_size <= 0:
raise ValueError("effect_size must be a finite value > 0.")
if not np.isfinite(sigma) or sigma <= 0:
raise ValueError("sigma must be a finite value > 0.")
if not (0 < power < 1):
raise ValueError("power must be between 0 and 1.")
if not (0 < alpha < 1):
raise ValueError("alpha must be between 0 and 1.")
if two_sided:
z_alpha = stats.norm.ppf(1 - alpha / 2)
else:
z_alpha = stats.norm.ppf(1 - alpha)
z_beta = stats.norm.ppf(power)
# n = 2σ²(z_α + z_β)² / δ²
n = 2.0 * sigma**2 * (z_alpha + z_beta) ** 2 / effect_size**2
return max(int(np.ceil(n)), 2)
[docs]
def sensitivity_paired(
n_participants: int,
sigma: float = 1.0,
power: float = 0.80,
alpha: float = 0.05,
two_sided: bool = True,
) -> float:
"""Calculate the minimum detectable effect for a paired comparison.
Given a fixed sample size, what is the smallest effect that can
be detected with the desired power?
Parameters
----------
n_participants
Number of paired participants.
sigma
Within-group standard deviation.
power
Desired power.
alpha
Significance level.
two_sided
Use two-sided test.
Returns
-------
float
Minimum detectable effect size.
Examples
--------
>>> mde = sensitivity_paired(n_participants=6, power=0.80)
>>> print(f"Minimum detectable effect: {mde:.2f} SD")
"""
if n_participants < 2:
return np.inf
if two_sided:
z_alpha = stats.norm.ppf(1 - alpha / 2)
else:
z_alpha = stats.norm.ppf(1 - alpha)
z_beta = stats.norm.ppf(power)
# δ = σ × √(2/n) × (z_α + z_β)
se_unit = sigma * np.sqrt(2.0 / n_participants)
return float(se_unit * (z_alpha + z_beta))