"""Effect size calculations for trial-aware inference.
This module provides standardized effect size measures (Cohen's d, Hedge's g)
and confidence intervals for DiD analyses.
Statistical Background
----------------------
**Cohen's d** and **Hedge's g** are standardized effect size measures that express
the difference between groups in standard deviation units, enabling comparison
across studies with different scales.
**Cohen's d vs Bootstrap CI**: These serve complementary purposes:
- Cohen's d: Standardized effect size for meta-analysis and interpretation
- Bootstrap CI: Uncertainty quantification on any estimator
**Recommendation**: Report BOTH standardized effect sizes (for interpretation/meta-analysis)
AND confidence intervals (for statistical inference). Bootstrap CI on Cohen's d
provides the best of both worlds.
**Hedge's g vs Cohen's d**: Hedge's g applies a small-sample correction factor
(J = 1 - 3/(4(n₁+n₂-2)-1) = 1 - 3/(4*df - 1)) that reduces upward bias in small samples. Use Hedge's g when
n < 20 per group; for larger samples they are nearly identical.
Mathematical Definitions
------------------------
Cohen's d for DiD:
d = β_DiD / s_pooled
where s_pooled = sqrt(((n₁-1)s₁² + (n₂-1)s₂²) / (n₁+n₂-2))
Hedge's g:
g = d × J
where J = 1 - 3/(4(n₁+n₂-2)-1) = 1 - 3/(4*df - 1) (Hedges' correction factor)
95% CI via noncentral t-distribution:
SE(d) ≈ sqrt((n₁+n₂)/(n₁×n₂) + d²/(2(n₁+n₂-2)))
"""
from __future__ import annotations
import warnings
from typing import TYPE_CHECKING, Literal
import numpy as np
import pandas as pd
from scipy import stats
from scipy.special import gammaln
if TYPE_CHECKING:
from statsmodels.regression.linear_model import RegressionResultsWrapper
__all__ = [
"cohens_d",
"hedges_g",
"cohens_d_from_did",
"cohens_d_from_did_approx",
"effect_size_ci",
"add_effect_sizes_to_did",
"EffectSizeMethod",
]
EffectSizeMethod = Literal["cohens_d", "hedges_g"]
[docs]
def cohens_d(
group1: np.ndarray,
group2: np.ndarray,
pooled: bool = True,
) -> float:
"""Calculate Cohen's d effect size between two groups.
Cohen's d expresses the difference between group means in standard
deviation units:
d = (mean₁ - mean₂) / s_pooled
Parameters
----------
group1
First group values (e.g., treatment group).
group2
Second group values (e.g., control group).
pooled
If True (default), use pooled standard deviation.
If False, use only group1's standard deviation (Glass's delta).
Returns
-------
float
Cohen's d effect size. Positive values indicate group1 > group2.
Notes
-----
Interpretation guidelines (Cohen, 1988):
- ``|d|`` ≈ 0.2: small effect
- ``|d|`` ≈ 0.5: medium effect
- ``|d|`` ≈ 0.8: large effect
These are rough guidelines; practical significance depends on context.
"""
g1 = np.asarray(group1, dtype=float)
g2 = np.asarray(group2, dtype=float)
# Remove NaNs
g1 = g1[~np.isnan(g1)]
g2 = g2[~np.isnan(g2)]
n1, n2 = len(g1), len(g2)
if n1 < 2 or n2 < 2:
return np.nan
mean_diff = np.mean(g1) - np.mean(g2)
if pooled:
var1 = np.var(g1, ddof=1)
var2 = np.var(g2, ddof=1)
pooled_var = ((n1 - 1) * var1 + (n2 - 1) * var2) / (n1 + n2 - 2)
pooled_sd = np.sqrt(pooled_var)
else:
# Glass's delta: use only group1's SD
pooled_sd = np.std(g1, ddof=1)
if pooled_sd < 1e-12:
return np.nan
return mean_diff / pooled_sd
[docs]
def hedges_g(
group1: np.ndarray,
group2: np.ndarray,
) -> float:
"""Calculate Hedge's g effect size (bias-corrected Cohen's d).
Hedge's g applies a correction factor J to Cohen's d that reduces
upward bias in small samples:
g = d × J
J = 1 - 3/(4(n₁+n₂-2) - 1) (equivalently 1 - 3/(4*df - 1))
Parameters
----------
group1
First group values.
group2
Second group values.
Returns
-------
float
Hedge's g effect size.
Notes
-----
Uses the exact small-sample correction via the gamma function:
J = Γ(df/2) / (√(df/2) × Γ((df-1)/2))
which is more accurate than the common approximation for small df.
Use Hedge's g when sample sizes are small (n < 20 per group).
For larger samples, Cohen's d and Hedge's g are nearly identical.
"""
g1 = np.asarray(group1, dtype=float)
g2 = np.asarray(group2, dtype=float)
g1 = g1[~np.isnan(g1)]
g2 = g2[~np.isnan(g2)]
n1, n2 = len(g1), len(g2)
d = cohens_d(g1, g2, pooled=True)
if np.isnan(d):
return np.nan
# Hedges' correction factor
df = n1 + n2 - 2
if df <= 0:
return np.nan
# Exact correction using gamma function
# J = Γ(df/2) / (√(df/2) × Γ((df-1)/2))
j = float(np.exp(gammaln(df / 2) - (0.5 * np.log(df / 2)) - gammaln((df - 1) / 2)))
return d * j
[docs]
def effect_size_ci(
d: float,
n1: int,
n2: int,
alpha: float = 0.05,
method: Literal["nct", "bootstrap"] = "nct",
) -> tuple[float, float]:
"""Calculate confidence interval for an effect size.
Parameters
----------
d
Effect size (Cohen's d or Hedge's g).
n1
Sample size of group 1.
n2
Sample size of group 2.
alpha
Significance level (default 0.05 for 95% CI).
method
- "nct": Wald-type CI using the Hedges & Olkin (1985) large-sample
variance approximation with t critical values. Fast and adequate for
most sample sizes.
- "bootstrap": Not implemented here; use bootstrap_effect_size_ci
Returns
-------
tuple[float, float]
(lower, upper) confidence interval bounds.
Notes
-----
Uses the Hedges & Olkin (1985) variance formula:
SE(d) = √( (n₁+n₂)/(n₁·n₂) + d² / (2·(n₁+n₂-2)) )
with t critical values at df = n₁ + n₂ - 2.
"""
if np.isnan(d) or n1 < 2 or n2 < 2:
return (np.nan, np.nan)
if method == "nct":
# Approximate SE using Hedges & Olkin (1985) formula
se_d = np.sqrt((n1 + n2) / (n1 * n2) + (d**2) / (2 * (n1 + n2 - 2)))
df = n1 + n2 - 2
t_crit = stats.t.ppf(1 - alpha / 2, df)
lower = d - t_crit * se_d
upper = d + t_crit * se_d
return (lower, upper)
else:
raise ValueError(f"Unknown method: {method}. Use 'nct'.")
[docs]
def cohens_d_from_did(
delta_treated: np.ndarray,
delta_control: np.ndarray,
) -> float:
"""Calculate Cohen's d for DiD from participant-level change scores.
This computes the **exact** Cohen's d for the difference in change scores:
d = (mean(Δ_treated) - mean(Δ_control)) / s_pooled
where s_pooled is the pooled SD of participant-level deltas.
Parameters
----------
delta_treated
Participant-level change scores (post - pre) for the treated arm.
delta_control
Participant-level change scores (post - pre) for the control arm.
Returns
-------
float
Cohen's d effect size based on change-score distributions.
"""
d_t = np.asarray(delta_treated, dtype=float)
d_c = np.asarray(delta_control, dtype=float)
d_t = d_t[~np.isnan(d_t)]
d_c = d_c[~np.isnan(d_c)]
if d_t.size < 2 or d_c.size < 2:
return np.nan
return cohens_d(d_t, d_c, pooled=True)
[docs]
def cohens_d_from_did_approx(
beta_did: float,
residual_std: float,
) -> float:
"""Approximate Cohen's d from a DiD regression coefficient (deprecated).
This uses the regression residual SD instead of pooled SD of change scores,
which can mis-scale effect sizes when designs are imbalanced or heteroscedastic.
Parameters
----------
beta_did
DiD regression coefficient.
residual_std
Residual standard deviation from the DiD model.
Returns
-------
float
Approximate Cohen's d effect size.
Warns
-----
DeprecationWarning
Always raised; use ``cohens_d_from_did`` instead.
Notes
-----
Deprecated: use ``cohens_d_from_did(delta_treated, delta_control)`` instead.
"""
warnings.warn(
"cohens_d_from_did_approx() is deprecated. "
"Use cohens_d_from_did(delta_treated, delta_control) for exact d.",
DeprecationWarning,
stacklevel=2,
)
if residual_std < 1e-12 or np.isnan(beta_did):
return np.nan
return beta_did / residual_std
def _compute_effect_size_from_fit(
fit: RegressionResultsWrapper,
term_name: str,
method: EffectSizeMethod = "hedges_g",
) -> dict:
"""Extract effect size from a fitted regression model.
Parameters
----------
fit
Fitted statsmodels regression object.
term_name
Name of the coefficient to compute effect size for.
method
"cohens_d" or "hedges_g".
Returns
-------
dict
Dictionary with 'd', 'se_d', 'd_lower', 'd_upper' keys.
"""
if term_name not in fit.params.index:
return {"d": np.nan, "se_d": np.nan, "d_lower": np.nan, "d_upper": np.nan}
beta = fit.params[term_name]
resid_std = np.sqrt(fit.scale) # Residual standard deviation
d = beta / resid_std if resid_std > 1e-12 else np.nan
# Apply Hedge's correction if requested (exact gamma correction)
if method == "hedges_g" and not np.isnan(d):
df = fit.df_resid
if df > 0:
j = float(np.exp(gammaln(df / 2) - (0.5 * np.log(df / 2)) - gammaln((df - 1) / 2)))
d = d * j
# Approximate CI using Hedges & Olkin (1985) formula:
# SE(d) = sqrt((n1+n2)/(n1*n2) + d²/(2*(n1+n2-2)))
# Assume balanced design: n1 = n2 = n/2
n = fit.nobs
n1 = n2 = max(n / 2, 1)
df_resid = max(n1 + n2 - 2, 1)
se_d = np.sqrt((n1 + n2) / (n1 * n2) + (d**2) / (2 * df_resid)) if not np.isnan(d) else np.nan
if not np.isnan(se_d):
t_crit = stats.t.ppf(0.975, fit.df_resid)
d_lower = d - t_crit * se_d
d_upper = d + t_crit * se_d
else:
d_lower = d_upper = np.nan
return {"d": d, "se_d": se_d, "d_lower": d_lower, "d_upper": d_upper}
[docs]
def add_effect_sizes_to_did(
df: pd.DataFrame,
beta_col: str = "beta_DiD",
se_col: str = "se_DiD",
n_col: str = "n_units",
resid_sd_col: str = "resid_sd",
method: EffectSizeMethod = "hedges_g",
) -> pd.DataFrame:
"""Add standardized effect sizes to a DiD results DataFrame.
This function computes Cohen's d or Hedge's g from the DiD coefficients
and adds columns for the effect size and its confidence interval.
When the ``resid_sd`` column is present (populated by ``did_table``),
effect sizes are computed directly as ``beta / resid_sd``. Otherwise,
a balanced-design approximation is used as a fallback.
Parameters
----------
df
DataFrame with DiD results (from did_table).
beta_col
Name of the coefficient column.
se_col
Name of the standard error column.
n_col
Name of the sample size column.
resid_sd_col
Name of the residual standard deviation column (from OLS/WLS fit).
method
"cohens_d" or "hedges_g" (recommended for small samples).
Returns
-------
pd.DataFrame
Input DataFrame with added columns:
- 'effect_size': Cohen's d or Hedge's g
- 'effect_size_lower': Lower 95% CI bound
- 'effect_size_upper': Upper 95% CI bound
- 'effect_size_interpretation': "small", "medium", "large"
Examples
--------
>>> res = did_table(adata, features=genes, design=design, visits=visits)
>>> res = add_effect_sizes_to_did(res)
>>> print(res[["feature", "beta_DiD", "effect_size", "effect_size_interpretation"]])
"""
df = df.copy()
has_resid_sd = resid_sd_col in df.columns
effect_sizes = []
effect_lower = []
effect_upper = []
interpretations = []
for _, row in df.iterrows():
beta = row.get(beta_col, np.nan)
se = row.get(se_col, np.nan)
n = row.get(n_col, 10) # Default assumption
if pd.isna(beta) or pd.isna(se) or se < 1e-12:
effect_sizes.append(np.nan)
effect_lower.append(np.nan)
effect_upper.append(np.nan)
interpretations.append("")
continue
# Assume balanced arms for CI calculation (n1 = n2 = n/2)
n1 = n2 = max(n / 2, 1)
# Use residual SD from model fit when available (preferred),
# otherwise fall back to balanced-design approximation.
resid_sd = row.get(resid_sd_col, np.nan) if has_resid_sd else np.nan
if pd.notna(resid_sd) and resid_sd > 1e-12:
denom = resid_sd
else:
# Fallback: approximate residual SD assuming balanced arms
denom = se / np.sqrt((n1 + n2) / (n1 * n2)) if n > 0 else se
d = beta / denom if denom > 1e-12 else np.nan
# Apply Hedge's correction (exact gamma correction)
if method == "hedges_g" and not np.isnan(d) and n > 2:
df_hedges = n - 2 # Renamed to avoid shadowing DataFrame parameter
if df_hedges > 0:
j = float(
np.exp(
gammaln(df_hedges / 2)
- (0.5 * np.log(df_hedges / 2))
- gammaln((df_hedges - 1) / 2)
)
)
else:
j = 1.0
d = d * j
# CI via approximate SE (Hedges & Olkin 1985)
if not np.isnan(d) and n > 2:
df_ci = n - 2
se_d = np.sqrt((n1 + n2) / (n1 * n2) + (d**2) / (2 * df_ci))
t_crit = stats.t.ppf(0.975, df_ci)
lower = d - t_crit * se_d
upper = d + t_crit * se_d
else:
lower = upper = np.nan
effect_sizes.append(d)
effect_lower.append(lower)
effect_upper.append(upper)
# Interpretation
abs_d = abs(d) if not np.isnan(d) else 0
if abs_d < 0.2:
interp = "negligible"
elif abs_d < 0.5:
interp = "small"
elif abs_d < 0.8:
interp = "medium"
else:
interp = "large"
interpretations.append(interp)
df["effect_size"] = effect_sizes
df["effect_size_lower"] = effect_lower
df["effect_size_upper"] = effect_upper
df["effect_size_interpretation"] = interpretations
return df
def bootstrap_effect_size_ci(
group1: np.ndarray,
group2: np.ndarray,
method: EffectSizeMethod = "hedges_g",
n_boot: int = 999,
alpha: float = 0.05,
seed: int = 42,
*,
n_bootstrap: int | None = None,
ci: float | None = None,
) -> tuple[float, float, float]:
"""Compute effect size with bootstrap confidence interval.
This provides the most robust CI for effect sizes, especially
with non-normal distributions or small samples.
Parameters
----------
group1, group2
Arrays of values for each group.
method
"cohens_d" or "hedges_g".
n_boot
Number of bootstrap resamples.
alpha
Significance level for CI (e.g., 0.05 for 95% CI).
seed
Random seed for reproducibility.
n_bootstrap
Deprecated alias for n_boot.
ci
Deprecated alias for confidence level. If provided, alpha = 1 - ci.
Returns
-------
tuple[float, float, float]
(effect_size, ci_lower, ci_upper)
"""
# Handle deprecated parameter aliases
if n_bootstrap is not None:
n_boot = n_bootstrap
if ci is not None:
alpha = 1 - ci
rng = np.random.default_rng(seed)
g1 = np.asarray(group1, dtype=float)
g2 = np.asarray(group2, dtype=float)
g1 = g1[~np.isnan(g1)]
g2 = g2[~np.isnan(g2)]
n1, n2 = len(g1), len(g2)
if n1 < 2 or n2 < 2:
return (np.nan, np.nan, np.nan)
# Point estimate
if method == "hedges_g":
d_obs = hedges_g(g1, g2)
else:
d_obs = cohens_d(g1, g2)
# Bootstrap
boot_d = np.empty(n_boot)
for b in range(n_boot):
b1 = rng.choice(g1, size=n1, replace=True)
b2 = rng.choice(g2, size=n2, replace=True)
if method == "hedges_g":
boot_d[b] = hedges_g(b1, b2)
else:
boot_d[b] = cohens_d(b1, b2)
# Percentile CI
lower = np.nanpercentile(boot_d, 100 * alpha / 2)
upper = np.nanpercentile(boot_d, 100 * (1 - alpha / 2))
return (float(d_obs), float(lower), float(upper))