Source code for sctrial.stats.effect_size

"""Effect size calculations for trial-aware inference.

This module provides standardized effect size measures (Cohen's d, Hedge's g)
and confidence intervals for DiD analyses.

Statistical Background
----------------------
**Cohen's d** and **Hedge's g** are standardized effect size measures that express
the difference between groups in standard deviation units, enabling comparison
across studies with different scales.

**Cohen's d vs Bootstrap CI**: These serve complementary purposes:
- Cohen's d: Standardized effect size for meta-analysis and interpretation
- Bootstrap CI: Uncertainty quantification on any estimator

**Recommendation**: Report BOTH standardized effect sizes (for interpretation/meta-analysis)
AND confidence intervals (for statistical inference). Bootstrap CI on Cohen's d
provides the best of both worlds.

**Hedge's g vs Cohen's d**: Hedge's g applies a small-sample correction factor
(J = 1 - 3/(4(n₁+n₂-2)-1) = 1 - 3/(4*df - 1)) that reduces upward bias in small samples. Use Hedge's g when
n < 20 per group; for larger samples they are nearly identical.

Mathematical Definitions
------------------------
Cohen's d for DiD:
    d = β_DiD / s_pooled

    where s_pooled = sqrt(((n₁-1)s₁² + (n₂-1)s₂²) / (n₁+n₂-2))

Hedge's g:
    g = d × J

    where J = 1 - 3/(4(n₁+n₂-2)-1) = 1 - 3/(4*df - 1)  (Hedges' correction factor)

95% CI via noncentral t-distribution:
    SE(d) ≈ sqrt((n₁+n₂)/(n₁×n₂) + d²/(2(n₁+n₂-2)))
"""

from __future__ import annotations

import warnings
from typing import TYPE_CHECKING, Literal

import numpy as np
import pandas as pd
from scipy import stats
from scipy.special import gammaln

if TYPE_CHECKING:
    from statsmodels.regression.linear_model import RegressionResultsWrapper

__all__ = [
    "cohens_d",
    "hedges_g",
    "cohens_d_from_did",
    "cohens_d_from_did_approx",
    "effect_size_ci",
    "add_effect_sizes_to_did",
    "EffectSizeMethod",
]

EffectSizeMethod = Literal["cohens_d", "hedges_g"]


[docs] def cohens_d( group1: np.ndarray, group2: np.ndarray, pooled: bool = True, ) -> float: """Calculate Cohen's d effect size between two groups. Cohen's d expresses the difference between group means in standard deviation units: d = (mean₁ - mean₂) / s_pooled Parameters ---------- group1 First group values (e.g., treatment group). group2 Second group values (e.g., control group). pooled If True (default), use pooled standard deviation. If False, use only group1's standard deviation (Glass's delta). Returns ------- float Cohen's d effect size. Positive values indicate group1 > group2. Notes ----- Interpretation guidelines (Cohen, 1988): - ``|d|`` ≈ 0.2: small effect - ``|d|`` ≈ 0.5: medium effect - ``|d|`` ≈ 0.8: large effect These are rough guidelines; practical significance depends on context. """ g1 = np.asarray(group1, dtype=float) g2 = np.asarray(group2, dtype=float) # Remove NaNs g1 = g1[~np.isnan(g1)] g2 = g2[~np.isnan(g2)] n1, n2 = len(g1), len(g2) if n1 < 2 or n2 < 2: return np.nan mean_diff = np.mean(g1) - np.mean(g2) if pooled: var1 = np.var(g1, ddof=1) var2 = np.var(g2, ddof=1) pooled_var = ((n1 - 1) * var1 + (n2 - 1) * var2) / (n1 + n2 - 2) pooled_sd = np.sqrt(pooled_var) else: # Glass's delta: use only group1's SD pooled_sd = np.std(g1, ddof=1) if pooled_sd < 1e-12: return np.nan return mean_diff / pooled_sd
[docs] def hedges_g( group1: np.ndarray, group2: np.ndarray, ) -> float: """Calculate Hedge's g effect size (bias-corrected Cohen's d). Hedge's g applies a correction factor J to Cohen's d that reduces upward bias in small samples: g = d × J J = 1 - 3/(4(n₁+n₂-2) - 1) (equivalently 1 - 3/(4*df - 1)) Parameters ---------- group1 First group values. group2 Second group values. Returns ------- float Hedge's g effect size. Notes ----- Uses the exact small-sample correction via the gamma function: J = Γ(df/2) / (√(df/2) × Γ((df-1)/2)) which is more accurate than the common approximation for small df. Use Hedge's g when sample sizes are small (n < 20 per group). For larger samples, Cohen's d and Hedge's g are nearly identical. """ g1 = np.asarray(group1, dtype=float) g2 = np.asarray(group2, dtype=float) g1 = g1[~np.isnan(g1)] g2 = g2[~np.isnan(g2)] n1, n2 = len(g1), len(g2) d = cohens_d(g1, g2, pooled=True) if np.isnan(d): return np.nan # Hedges' correction factor df = n1 + n2 - 2 if df <= 0: return np.nan # Exact correction using gamma function # J = Γ(df/2) / (√(df/2) × Γ((df-1)/2)) j = float(np.exp(gammaln(df / 2) - (0.5 * np.log(df / 2)) - gammaln((df - 1) / 2))) return d * j
[docs] def effect_size_ci( d: float, n1: int, n2: int, alpha: float = 0.05, method: Literal["nct", "bootstrap"] = "nct", ) -> tuple[float, float]: """Calculate confidence interval for an effect size. Parameters ---------- d Effect size (Cohen's d or Hedge's g). n1 Sample size of group 1. n2 Sample size of group 2. alpha Significance level (default 0.05 for 95% CI). method - "nct": Wald-type CI using the Hedges & Olkin (1985) large-sample variance approximation with t critical values. Fast and adequate for most sample sizes. - "bootstrap": Not implemented here; use bootstrap_effect_size_ci Returns ------- tuple[float, float] (lower, upper) confidence interval bounds. Notes ----- Uses the Hedges & Olkin (1985) variance formula: SE(d) = √( (n₁+n₂)/(n₁·n₂) + d² / (2·(n₁+n₂-2)) ) with t critical values at df = n₁ + n₂ - 2. """ if np.isnan(d) or n1 < 2 or n2 < 2: return (np.nan, np.nan) if method == "nct": # Approximate SE using Hedges & Olkin (1985) formula se_d = np.sqrt((n1 + n2) / (n1 * n2) + (d**2) / (2 * (n1 + n2 - 2))) df = n1 + n2 - 2 t_crit = stats.t.ppf(1 - alpha / 2, df) lower = d - t_crit * se_d upper = d + t_crit * se_d return (lower, upper) else: raise ValueError(f"Unknown method: {method}. Use 'nct'.")
[docs] def cohens_d_from_did( delta_treated: np.ndarray, delta_control: np.ndarray, ) -> float: """Calculate Cohen's d for DiD from participant-level change scores. This computes the **exact** Cohen's d for the difference in change scores: d = (mean(Δ_treated) - mean(Δ_control)) / s_pooled where s_pooled is the pooled SD of participant-level deltas. Parameters ---------- delta_treated Participant-level change scores (post - pre) for the treated arm. delta_control Participant-level change scores (post - pre) for the control arm. Returns ------- float Cohen's d effect size based on change-score distributions. """ d_t = np.asarray(delta_treated, dtype=float) d_c = np.asarray(delta_control, dtype=float) d_t = d_t[~np.isnan(d_t)] d_c = d_c[~np.isnan(d_c)] if d_t.size < 2 or d_c.size < 2: return np.nan return cohens_d(d_t, d_c, pooled=True)
[docs] def cohens_d_from_did_approx( beta_did: float, residual_std: float, ) -> float: """Approximate Cohen's d from a DiD regression coefficient (deprecated). This uses the regression residual SD instead of pooled SD of change scores, which can mis-scale effect sizes when designs are imbalanced or heteroscedastic. Parameters ---------- beta_did DiD regression coefficient. residual_std Residual standard deviation from the DiD model. Returns ------- float Approximate Cohen's d effect size. Warns ----- DeprecationWarning Always raised; use ``cohens_d_from_did`` instead. Notes ----- Deprecated: use ``cohens_d_from_did(delta_treated, delta_control)`` instead. """ warnings.warn( "cohens_d_from_did_approx() is deprecated. " "Use cohens_d_from_did(delta_treated, delta_control) for exact d.", DeprecationWarning, stacklevel=2, ) if residual_std < 1e-12 or np.isnan(beta_did): return np.nan return beta_did / residual_std
def _compute_effect_size_from_fit( fit: RegressionResultsWrapper, term_name: str, method: EffectSizeMethod = "hedges_g", ) -> dict: """Extract effect size from a fitted regression model. Parameters ---------- fit Fitted statsmodels regression object. term_name Name of the coefficient to compute effect size for. method "cohens_d" or "hedges_g". Returns ------- dict Dictionary with 'd', 'se_d', 'd_lower', 'd_upper' keys. """ if term_name not in fit.params.index: return {"d": np.nan, "se_d": np.nan, "d_lower": np.nan, "d_upper": np.nan} beta = fit.params[term_name] resid_std = np.sqrt(fit.scale) # Residual standard deviation d = beta / resid_std if resid_std > 1e-12 else np.nan # Apply Hedge's correction if requested (exact gamma correction) if method == "hedges_g" and not np.isnan(d): df = fit.df_resid if df > 0: j = float(np.exp(gammaln(df / 2) - (0.5 * np.log(df / 2)) - gammaln((df - 1) / 2))) d = d * j # Approximate CI using Hedges & Olkin (1985) formula: # SE(d) = sqrt((n1+n2)/(n1*n2) + d²/(2*(n1+n2-2))) # Assume balanced design: n1 = n2 = n/2 n = fit.nobs n1 = n2 = max(n / 2, 1) df_resid = max(n1 + n2 - 2, 1) se_d = np.sqrt((n1 + n2) / (n1 * n2) + (d**2) / (2 * df_resid)) if not np.isnan(d) else np.nan if not np.isnan(se_d): t_crit = stats.t.ppf(0.975, fit.df_resid) d_lower = d - t_crit * se_d d_upper = d + t_crit * se_d else: d_lower = d_upper = np.nan return {"d": d, "se_d": se_d, "d_lower": d_lower, "d_upper": d_upper}
[docs] def add_effect_sizes_to_did( df: pd.DataFrame, beta_col: str = "beta_DiD", se_col: str = "se_DiD", n_col: str = "n_units", resid_sd_col: str = "resid_sd", method: EffectSizeMethod = "hedges_g", ) -> pd.DataFrame: """Add standardized effect sizes to a DiD results DataFrame. This function computes Cohen's d or Hedge's g from the DiD coefficients and adds columns for the effect size and its confidence interval. When the ``resid_sd`` column is present (populated by ``did_table``), effect sizes are computed directly as ``beta / resid_sd``. Otherwise, a balanced-design approximation is used as a fallback. Parameters ---------- df DataFrame with DiD results (from did_table). beta_col Name of the coefficient column. se_col Name of the standard error column. n_col Name of the sample size column. resid_sd_col Name of the residual standard deviation column (from OLS/WLS fit). method "cohens_d" or "hedges_g" (recommended for small samples). Returns ------- pd.DataFrame Input DataFrame with added columns: - 'effect_size': Cohen's d or Hedge's g - 'effect_size_lower': Lower 95% CI bound - 'effect_size_upper': Upper 95% CI bound - 'effect_size_interpretation': "small", "medium", "large" Examples -------- >>> res = did_table(adata, features=genes, design=design, visits=visits) >>> res = add_effect_sizes_to_did(res) >>> print(res[["feature", "beta_DiD", "effect_size", "effect_size_interpretation"]]) """ df = df.copy() has_resid_sd = resid_sd_col in df.columns effect_sizes = [] effect_lower = [] effect_upper = [] interpretations = [] for _, row in df.iterrows(): beta = row.get(beta_col, np.nan) se = row.get(se_col, np.nan) n = row.get(n_col, 10) # Default assumption if pd.isna(beta) or pd.isna(se) or se < 1e-12: effect_sizes.append(np.nan) effect_lower.append(np.nan) effect_upper.append(np.nan) interpretations.append("") continue # Assume balanced arms for CI calculation (n1 = n2 = n/2) n1 = n2 = max(n / 2, 1) # Use residual SD from model fit when available (preferred), # otherwise fall back to balanced-design approximation. resid_sd = row.get(resid_sd_col, np.nan) if has_resid_sd else np.nan if pd.notna(resid_sd) and resid_sd > 1e-12: denom = resid_sd else: # Fallback: approximate residual SD assuming balanced arms denom = se / np.sqrt((n1 + n2) / (n1 * n2)) if n > 0 else se d = beta / denom if denom > 1e-12 else np.nan # Apply Hedge's correction (exact gamma correction) if method == "hedges_g" and not np.isnan(d) and n > 2: df_hedges = n - 2 # Renamed to avoid shadowing DataFrame parameter if df_hedges > 0: j = float( np.exp( gammaln(df_hedges / 2) - (0.5 * np.log(df_hedges / 2)) - gammaln((df_hedges - 1) / 2) ) ) else: j = 1.0 d = d * j # CI via approximate SE (Hedges & Olkin 1985) if not np.isnan(d) and n > 2: df_ci = n - 2 se_d = np.sqrt((n1 + n2) / (n1 * n2) + (d**2) / (2 * df_ci)) t_crit = stats.t.ppf(0.975, df_ci) lower = d - t_crit * se_d upper = d + t_crit * se_d else: lower = upper = np.nan effect_sizes.append(d) effect_lower.append(lower) effect_upper.append(upper) # Interpretation abs_d = abs(d) if not np.isnan(d) else 0 if abs_d < 0.2: interp = "negligible" elif abs_d < 0.5: interp = "small" elif abs_d < 0.8: interp = "medium" else: interp = "large" interpretations.append(interp) df["effect_size"] = effect_sizes df["effect_size_lower"] = effect_lower df["effect_size_upper"] = effect_upper df["effect_size_interpretation"] = interpretations return df
def bootstrap_effect_size_ci( group1: np.ndarray, group2: np.ndarray, method: EffectSizeMethod = "hedges_g", n_boot: int = 999, alpha: float = 0.05, seed: int = 42, *, n_bootstrap: int | None = None, ci: float | None = None, ) -> tuple[float, float, float]: """Compute effect size with bootstrap confidence interval. This provides the most robust CI for effect sizes, especially with non-normal distributions or small samples. Parameters ---------- group1, group2 Arrays of values for each group. method "cohens_d" or "hedges_g". n_boot Number of bootstrap resamples. alpha Significance level for CI (e.g., 0.05 for 95% CI). seed Random seed for reproducibility. n_bootstrap Deprecated alias for n_boot. ci Deprecated alias for confidence level. If provided, alpha = 1 - ci. Returns ------- tuple[float, float, float] (effect_size, ci_lower, ci_upper) """ # Handle deprecated parameter aliases if n_bootstrap is not None: n_boot = n_bootstrap if ci is not None: alpha = 1 - ci rng = np.random.default_rng(seed) g1 = np.asarray(group1, dtype=float) g2 = np.asarray(group2, dtype=float) g1 = g1[~np.isnan(g1)] g2 = g2[~np.isnan(g2)] n1, n2 = len(g1), len(g2) if n1 < 2 or n2 < 2: return (np.nan, np.nan, np.nan) # Point estimate if method == "hedges_g": d_obs = hedges_g(g1, g2) else: d_obs = cohens_d(g1, g2) # Bootstrap boot_d = np.empty(n_boot) for b in range(n_boot): b1 = rng.choice(g1, size=n1, replace=True) b2 = rng.choice(g2, size=n2, replace=True) if method == "hedges_g": boot_d[b] = hedges_g(b1, b2) else: boot_d[b] = cohens_d(b1, b2) # Percentile CI lower = np.nanpercentile(boot_d, 100 * alpha / 2) upper = np.nanpercentile(boot_d, 100 * (1 - alpha / 2)) return (float(d_obs), float(lower), float(upper))