Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 15 additions & 7 deletions tests/test_methodology_sdid.py
Original file line number Diff line number Diff line change
Expand Up @@ -3076,13 +3076,17 @@ def test_baseline_parity_small_scale(self, variance_method):
assert len(r.placebo_effects) == n0

@pytest.mark.parametrize("variance_method", ["placebo", "bootstrap", "jackknife"])
def test_scale_equivariance(self, variance_method):
def test_scale_equivariance(self, variance_method, ci_params):
"""τ/a, SE/|a|, p-value, and n_successful must be invariant under
(Y → a*Y + b) across ~15 orders of magnitude."""
# Pure invariance check (baseline captured at runtime, not vs _BASELINE), so the
# absolute n_bootstrap is irrelevant: r0 and the scaled refits all use the same
# (ci_params-scaled in pure-Python, 200 under Rust) count, preserving equivariance.
nb = ci_params.bootstrap(200)
data = _make_panel(seed=42)
with warnings.catch_warnings():
warnings.simplefilter("ignore", UserWarning)
r0 = self._fit(data, variance_method)
r0 = self._fit(data, variance_method, n_bootstrap=nb)
att0, se0, p0 = r0.att, r0.se, r0.p_value
n0 = len(r0.placebo_effects)
noise0 = r0.noise_level
Expand All @@ -3092,7 +3096,7 @@ def test_scale_equivariance(self, variance_method):
scaled = self._rescale(data, a, b)
with warnings.catch_warnings():
warnings.simplefilter("ignore", UserWarning)
r = self._fit(scaled, variance_method)
r = self._fit(scaled, variance_method, n_bootstrap=nb)
# Variance-method success count must be identical; divergence
# would shift the empirical p-value floor 1/(n+1).
assert len(r.placebo_effects) == n0, (
Expand Down Expand Up @@ -3172,13 +3176,15 @@ class TestPValueSemantics:
null draws either and also use the analytical p-value.
"""

def test_bootstrap_p_value_matches_analytical(self):
def test_bootstrap_p_value_matches_analytical(self, ci_params):
"""Bootstrap p-value must equal safe_inference(att, se)[1]."""
# Self-consistency check (reported p vs the analytical formula on the reported se) —
# independent of the bootstrap draw count, so ci_params scaling is safe.
df = _make_panel(seed=42)
with warnings.catch_warnings():
warnings.simplefilter("ignore", UserWarning)
r = SyntheticDiD(
variance_method="bootstrap", n_bootstrap=200, seed=1
variance_method="bootstrap", n_bootstrap=ci_params.bootstrap(200), seed=1
).fit(
df, outcome="outcome", treatment="treated",
unit="unit", time="period",
Expand All @@ -3189,13 +3195,15 @@ def test_bootstrap_p_value_matches_analytical(self):
f"bootstrap p_value={r.p_value} != analytical {expected_p}"
)

def test_placebo_p_value_uses_empirical_formula(self):
def test_placebo_p_value_uses_empirical_formula(self, ci_params):
"""Placebo p-value must equal max(mean(|draws| >= |att|), 1/(r+1))."""
# Self-consistency check (reported p vs the empirical formula on the reported
# placebo_effects) — independent of the draw count, so ci_params scaling is safe.
df = _make_panel(seed=42)
with warnings.catch_warnings():
warnings.simplefilter("ignore", UserWarning)
r = SyntheticDiD(
variance_method="placebo", n_bootstrap=200, seed=1
variance_method="placebo", n_bootstrap=ci_params.bootstrap(200), seed=1
).fit(
df, outcome="outcome", treatment="treated",
unit="unit", time="period",
Expand Down
106 changes: 90 additions & 16 deletions tests/test_methodology_synthetic_control.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,31 @@
]


# ---------------------------------------------------------------------------
# Cheap optimizer settings for behavior tests (pure-Python CI speed)
# ---------------------------------------------------------------------------
# Behavior tests only need a VALID, cleanly-converged fit, not data-driven V quality.
# The production nested defaults (n_starts=4, inner_max_iter=10000, inner_min_decrease=1e-5)
# cost 30-150s per *pure-Python* fit because the inner Frank-Wolfe solve grinds its slow
# sublinear tail to hit the tight tolerance on every objective evaluation. Loosening the
# inner tolerance + a single start + a small outer cap gives a clean ~0.1s fit without
# changing what these tests assert. Pure-Python coverage of the production-default nested
# path (n_starts=4 with the _v_starts heuristic candidates + the tight inner_min_decrease=1e-5)
# is kept by the dedicated non-slow ``test_nested_production_defaults_smoke`` (a 2-donor panel
# whose inner FW simplex is ~1-D, so defaults stay <0.1s). The @slow Tier-2 Basque test
# additionally covers the defaults in the Rust matrix, and the Rust<->numpy Frank-Wolfe kernel
# equivalence is locked by tests/test_rust_backend.py::test_sc_weight_fw_matches_numpy.
#
# NB: inner_max_iter is deliberately LEFT AT DEFAULT here — the speedup comes from the
# looser tolerance letting FW terminate on *convergence* (not on an iteration cap), so the
# solve stays clean (no non-convergence warning). Do NOT fold inner_max_iter into _FAST or
# the inner-non-convergence warning starts firing spuriously.
_FAST = dict(n_starts=1, optimizer_options={"maxiter": 50}, inner_min_decrease=1e-3)
# Churn tests deliberately force inner non-convergence (inner_max_iter=1); KEEP that and only
# cap the outer optimizer so it does not iterate to maxiter on the flat penalty landscape.
_FAST_CHURN = dict(n_starts=1, optimizer_options={"maxiter": 5})


# ---------------------------------------------------------------------------
# Synthetic panel builders (fast; no R needed)
# ---------------------------------------------------------------------------
Expand Down Expand Up @@ -197,8 +222,12 @@ def test_post_periods_canonicalized_and_gap_order_independent():
df, years, T0 = _make_panel()
ordered = years[T0:]
scrambled = list(reversed(ordered)) + [ordered[-1]] # unsorted + duplicate
r1 = synthetic_control(df, "y", "treated", "unit", "year", post_periods=ordered, seed=0)
r2 = synthetic_control(df, "y", "treated", "unit", "year", post_periods=scrambled, seed=0)
r1 = synthetic_control(
df, "y", "treated", "unit", "year", post_periods=ordered, seed=0, **_FAST
)
r2 = synthetic_control(
df, "y", "treated", "unit", "year", post_periods=scrambled, seed=0, **_FAST
)
assert r1.post_periods == r2.post_periods == ordered
assert abs(r1.att - r2.att) < 1e-12
gdf = r2.get_gap_df()
Expand All @@ -214,7 +243,9 @@ def test_post_periods_canonicalized_and_gap_order_independent():

def test_donor_pool_restricts_donors():
df, years, T0 = _make_panel(n_donors=4)
res = synthetic_control(df, "y", "treated", "unit", "year", donor_pool=["d0", "d1"], seed=0)
res = synthetic_control(
df, "y", "treated", "unit", "year", donor_pool=["d0", "d1"], seed=0, **_FAST
)
assert res.n_donors == 2
assert set(res.get_weights_df()["unit"]) <= {"d0", "d1"}

Expand Down Expand Up @@ -309,8 +340,19 @@ def test_outer_v_nonconvergence_warning():
# Outer V-search non-convergence must not be silent (optimizer capped at 1 iter).
df, _, _ = _make_panel()
with pytest.warns(UserWarning, match="Outer V-search"):
# maxiter=1 forces the OUTER non-convergence; n_starts=1 + a loose inner tolerance
# keep the (still-real) inner solves cheap. Loosening inner_min_decrease does not
# affect whether the outer optimizer hits its 1-iteration cap.
synthetic_control(
df, "y", "treated", "unit", "year", seed=0, optimizer_options={"maxiter": 1}
df,
"y",
"treated",
"unit",
"year",
seed=0,
n_starts=1,
optimizer_options={"maxiter": 1},
inner_min_decrease=1e-3,
)


Expand All @@ -319,7 +361,9 @@ def test_inner_v_search_nonconvergence_warning():
# inner_max_iter=1 makes them truncate, and the estimator emits an aggregated warning.
df, _, _ = _make_panel()
with pytest.warns(UserWarning, match="during nested V selection"):
synthetic_control(df, "y", "treated", "unit", "year", seed=0, inner_max_iter=1)
synthetic_control(
df, "y", "treated", "unit", "year", seed=0, inner_max_iter=1, **_FAST_CHURN
)


def test_single_inner_nonconvergence_excluded_from_v_ranking(monkeypatch):
Expand Down Expand Up @@ -348,7 +392,7 @@ def patched(X1s, X0s, v, max_iter, min_decrease):

monkeypatch.setattr(sc, "_inner_solve_W", patched)
with pytest.warns(UserWarning, match="during nested V selection"):
res = synthetic_control(df, "y", "treated", "unit", "year", seed=0)
res = synthetic_control(df, "y", "treated", "unit", "year", seed=0, **_FAST)

assert state["failed"] # the patch actually fired on an objective evaluation
assert np.isfinite(res.att)
Expand All @@ -361,11 +405,37 @@ def test_n_starts_one_runs():
# n_starts=1 uses only the uniform start (short-circuits the heuristic candidates)
# and still produces a valid nested fit.
df, _, _ = _make_panel()
res = synthetic_control(df, "y", "treated", "unit", "year", seed=0, n_starts=1)
res = synthetic_control(
df,
"y",
"treated",
"unit",
"year",
seed=0,
n_starts=1,
optimizer_options={"maxiter": 50},
inner_min_decrease=1e-3,
)
assert np.isfinite(res.att)
assert abs(sum(res.donor_weights.values()) - 1.0) < 1e-6


def test_nested_production_defaults_smoke():
# Coverage anchor: exercise the FULL production-default nested path end-to-end in
# pure-Python — n_starts=4 (so the _v_starts heuristic candidates: inverse-variance,
# univariate-fit and Dirichlet starts are generated, which the n_starts=1 _FAST tests
# skip) and the tight inner_min_decrease=1e-5. A 2-donor panel keeps the inner
# Frank-Wolfe simplex effectively 1-D, so the default settings still run in <0.1s and
# this stays non-slow. The @slow Tier-2 Basque test covers the defaults only in the Rust
# matrix; this is the pure-Python complement.
df, _, _ = _make_panel(n_donors=2)
res = synthetic_control(df, "y", "treated", "unit", "year", seed=0) # production defaults
assert np.isfinite(res.att)
assert abs(sum(res.donor_weights.values()) - 1.0) < 1e-6
assert res.n_donors == 2
assert res.mspe_v is not None # nested V was selected by minimizing pre-period MSPE


def test_non_finite_outcome_rejected():
df, years, T0 = _make_panel()
df = df.copy()
Expand All @@ -378,7 +448,7 @@ def test_distinct_special_period_sets_not_duplicate():
# Same var/op, same endpoints + length, different intermediate period -> distinct
# predictors, must NOT be rejected as duplicates.
df, years, T0 = _make_panel(T=8, T0=6)
res = SyntheticControl(seed=0).fit(
res = SyntheticControl(seed=0, **_FAST).fit(
df,
"y",
"treated",
Expand Down Expand Up @@ -423,6 +493,7 @@ def test_duplicate_predictor_window_periods_deduped():
predictors=["y"],
predictor_window=[years[0], years[0], years[1]],
seed=0,
**_FAST,
)
r_uniq = synthetic_control(
df,
Expand All @@ -433,6 +504,7 @@ def test_duplicate_predictor_window_periods_deduped():
predictors=["y"],
predictor_window=[years[0], years[1]],
seed=0,
**_FAST,
)
assert abs(r_dup.att - r_uniq.att) < 1e-9

Expand Down Expand Up @@ -465,7 +537,7 @@ def test_poor_fit_warning():
rows.append({"unit": "treated", "year": yr, "y": 50 + 2.0 * t, "treated": int(t >= T0)})
df = pd.DataFrame(rows)
with pytest.warns(UserWarning, match="Pre-treatment fit is poor"):
synthetic_control(df, "y", "treated", "unit", "year", seed=0)
synthetic_control(df, "y", "treated", "unit", "year", seed=0, **_FAST)


def test_poor_fit_warning_flat_treated_pre_path():
Expand All @@ -484,7 +556,7 @@ def test_poor_fit_warning_flat_treated_pre_path():
)
df = pd.DataFrame(rows)
with pytest.warns(UserWarning, match="Pre-treatment fit is poor"):
synthetic_control(df, "y", "treated", "unit", "year", seed=0)
synthetic_control(df, "y", "treated", "unit", "year", seed=0, **_FAST)


# ---------------------------------------------------------------------------
Expand Down Expand Up @@ -520,7 +592,7 @@ def test_duplicate_regular_predictor_rejected():
def test_inner_nonconvergence_warning():
df, _, _ = _make_panel(n_donors=4)
with pytest.warns(UserWarning, match="did not converge"):
SyntheticControl(seed=0, v_method="nested", inner_max_iter=1).fit(
SyntheticControl(seed=0, v_method="nested", inner_max_iter=1, **_FAST_CHURN).fit(
df, "y", "treated", "unit", "year"
)

Expand All @@ -532,7 +604,7 @@ def test_inner_nonconvergence_warning():

def test_standardize_none_runs():
df, _, _ = _make_panel()
res = synthetic_control(df, "y", "treated", "unit", "year", standardize="none", seed=0)
res = synthetic_control(df, "y", "treated", "unit", "year", standardize="none", seed=0, **_FAST)
assert res.standardize == "none"
assert np.isfinite(res.att)

Expand Down Expand Up @@ -652,7 +724,7 @@ def test_set_params_rolls_back_on_invalid():

def test_nan_inference_contract():
df, _, _ = _make_panel()
res = synthetic_control(df, "y", "treated", "unit", "year", seed=0)
res = synthetic_control(df, "y", "treated", "unit", "year", seed=0, **_FAST)
assert_nan_inference(
{"se": res.se, "t_stat": res.t_stat, "p_value": res.p_value, "conf_int": res.conf_int}
)
Expand All @@ -661,7 +733,7 @@ def test_nan_inference_contract():

def test_result_accessors_render():
df, _, _ = _make_panel()
res = synthetic_control(df, "y", "treated", "unit", "year", seed=0)
res = synthetic_control(df, "y", "treated", "unit", "year", seed=0, **_FAST)
assert isinstance(res, SyntheticControlResults)
assert isinstance(res.summary(), str) and "Synthetic Control" in res.summary()
assert "att" in res.to_dict()
Expand All @@ -676,8 +748,10 @@ def test_result_accessors_render():

def test_inferred_post_matches_explicit():
df, years, T0 = _make_panel()
r_inf = synthetic_control(df, "y", "treated", "unit", "year", seed=0)
r_exp = synthetic_control(df, "y", "treated", "unit", "year", post_periods=years[T0:], seed=0)
r_inf = synthetic_control(df, "y", "treated", "unit", "year", seed=0, **_FAST)
r_exp = synthetic_control(
df, "y", "treated", "unit", "year", post_periods=years[T0:], seed=0, **_FAST
)
assert r_inf.post_periods == r_exp.post_periods == years[T0:]
assert abs(r_inf.att - r_exp.att) < 1e-12

Expand Down
Loading