diff --git a/tests/test_methodology_sdid.py b/tests/test_methodology_sdid.py index 5ea986ff..4ec97d32 100644 --- a/tests/test_methodology_sdid.py +++ b/tests/test_methodology_sdid.py @@ -3076,13 +3076,17 @@ def test_baseline_parity_small_scale(self, variance_method): assert len(r.placebo_effects) == n0 @pytest.mark.parametrize("variance_method", ["placebo", "bootstrap", "jackknife"]) - def test_scale_equivariance(self, variance_method): + def test_scale_equivariance(self, variance_method, ci_params): """τ/a, SE/|a|, p-value, and n_successful must be invariant under (Y → a*Y + b) across ~15 orders of magnitude.""" + # Pure invariance check (baseline captured at runtime, not vs _BASELINE), so the + # absolute n_bootstrap is irrelevant: r0 and the scaled refits all use the same + # (ci_params-scaled in pure-Python, 200 under Rust) count, preserving equivariance. + nb = ci_params.bootstrap(200) data = _make_panel(seed=42) with warnings.catch_warnings(): warnings.simplefilter("ignore", UserWarning) - r0 = self._fit(data, variance_method) + r0 = self._fit(data, variance_method, n_bootstrap=nb) att0, se0, p0 = r0.att, r0.se, r0.p_value n0 = len(r0.placebo_effects) noise0 = r0.noise_level @@ -3092,7 +3096,7 @@ def test_scale_equivariance(self, variance_method): scaled = self._rescale(data, a, b) with warnings.catch_warnings(): warnings.simplefilter("ignore", UserWarning) - r = self._fit(scaled, variance_method) + r = self._fit(scaled, variance_method, n_bootstrap=nb) # Variance-method success count must be identical; divergence # would shift the empirical p-value floor 1/(n+1). assert len(r.placebo_effects) == n0, ( @@ -3172,13 +3176,15 @@ class TestPValueSemantics: null draws either and also use the analytical p-value. """ - def test_bootstrap_p_value_matches_analytical(self): + def test_bootstrap_p_value_matches_analytical(self, ci_params): """Bootstrap p-value must equal safe_inference(att, se)[1].""" + # Self-consistency check (reported p vs the analytical formula on the reported se) — + # independent of the bootstrap draw count, so ci_params scaling is safe. df = _make_panel(seed=42) with warnings.catch_warnings(): warnings.simplefilter("ignore", UserWarning) r = SyntheticDiD( - variance_method="bootstrap", n_bootstrap=200, seed=1 + variance_method="bootstrap", n_bootstrap=ci_params.bootstrap(200), seed=1 ).fit( df, outcome="outcome", treatment="treated", unit="unit", time="period", @@ -3189,13 +3195,15 @@ def test_bootstrap_p_value_matches_analytical(self): f"bootstrap p_value={r.p_value} != analytical {expected_p}" ) - def test_placebo_p_value_uses_empirical_formula(self): + def test_placebo_p_value_uses_empirical_formula(self, ci_params): """Placebo p-value must equal max(mean(|draws| >= |att|), 1/(r+1)).""" + # Self-consistency check (reported p vs the empirical formula on the reported + # placebo_effects) — independent of the draw count, so ci_params scaling is safe. df = _make_panel(seed=42) with warnings.catch_warnings(): warnings.simplefilter("ignore", UserWarning) r = SyntheticDiD( - variance_method="placebo", n_bootstrap=200, seed=1 + variance_method="placebo", n_bootstrap=ci_params.bootstrap(200), seed=1 ).fit( df, outcome="outcome", treatment="treated", unit="unit", time="period", diff --git a/tests/test_methodology_synthetic_control.py b/tests/test_methodology_synthetic_control.py index 9584f12e..00507bd8 100644 --- a/tests/test_methodology_synthetic_control.py +++ b/tests/test_methodology_synthetic_control.py @@ -47,6 +47,31 @@ ] +# --------------------------------------------------------------------------- +# Cheap optimizer settings for behavior tests (pure-Python CI speed) +# --------------------------------------------------------------------------- +# Behavior tests only need a VALID, cleanly-converged fit, not data-driven V quality. +# The production nested defaults (n_starts=4, inner_max_iter=10000, inner_min_decrease=1e-5) +# cost 30-150s per *pure-Python* fit because the inner Frank-Wolfe solve grinds its slow +# sublinear tail to hit the tight tolerance on every objective evaluation. Loosening the +# inner tolerance + a single start + a small outer cap gives a clean ~0.1s fit without +# changing what these tests assert. Pure-Python coverage of the production-default nested +# path (n_starts=4 with the _v_starts heuristic candidates + the tight inner_min_decrease=1e-5) +# is kept by the dedicated non-slow ``test_nested_production_defaults_smoke`` (a 2-donor panel +# whose inner FW simplex is ~1-D, so defaults stay <0.1s). The @slow Tier-2 Basque test +# additionally covers the defaults in the Rust matrix, and the Rust<->numpy Frank-Wolfe kernel +# equivalence is locked by tests/test_rust_backend.py::test_sc_weight_fw_matches_numpy. +# +# NB: inner_max_iter is deliberately LEFT AT DEFAULT here — the speedup comes from the +# looser tolerance letting FW terminate on *convergence* (not on an iteration cap), so the +# solve stays clean (no non-convergence warning). Do NOT fold inner_max_iter into _FAST or +# the inner-non-convergence warning starts firing spuriously. +_FAST = dict(n_starts=1, optimizer_options={"maxiter": 50}, inner_min_decrease=1e-3) +# Churn tests deliberately force inner non-convergence (inner_max_iter=1); KEEP that and only +# cap the outer optimizer so it does not iterate to maxiter on the flat penalty landscape. +_FAST_CHURN = dict(n_starts=1, optimizer_options={"maxiter": 5}) + + # --------------------------------------------------------------------------- # Synthetic panel builders (fast; no R needed) # --------------------------------------------------------------------------- @@ -197,8 +222,12 @@ def test_post_periods_canonicalized_and_gap_order_independent(): df, years, T0 = _make_panel() ordered = years[T0:] scrambled = list(reversed(ordered)) + [ordered[-1]] # unsorted + duplicate - r1 = synthetic_control(df, "y", "treated", "unit", "year", post_periods=ordered, seed=0) - r2 = synthetic_control(df, "y", "treated", "unit", "year", post_periods=scrambled, seed=0) + r1 = synthetic_control( + df, "y", "treated", "unit", "year", post_periods=ordered, seed=0, **_FAST + ) + r2 = synthetic_control( + df, "y", "treated", "unit", "year", post_periods=scrambled, seed=0, **_FAST + ) assert r1.post_periods == r2.post_periods == ordered assert abs(r1.att - r2.att) < 1e-12 gdf = r2.get_gap_df() @@ -214,7 +243,9 @@ def test_post_periods_canonicalized_and_gap_order_independent(): def test_donor_pool_restricts_donors(): df, years, T0 = _make_panel(n_donors=4) - res = synthetic_control(df, "y", "treated", "unit", "year", donor_pool=["d0", "d1"], seed=0) + res = synthetic_control( + df, "y", "treated", "unit", "year", donor_pool=["d0", "d1"], seed=0, **_FAST + ) assert res.n_donors == 2 assert set(res.get_weights_df()["unit"]) <= {"d0", "d1"} @@ -309,8 +340,19 @@ def test_outer_v_nonconvergence_warning(): # Outer V-search non-convergence must not be silent (optimizer capped at 1 iter). df, _, _ = _make_panel() with pytest.warns(UserWarning, match="Outer V-search"): + # maxiter=1 forces the OUTER non-convergence; n_starts=1 + a loose inner tolerance + # keep the (still-real) inner solves cheap. Loosening inner_min_decrease does not + # affect whether the outer optimizer hits its 1-iteration cap. synthetic_control( - df, "y", "treated", "unit", "year", seed=0, optimizer_options={"maxiter": 1} + df, + "y", + "treated", + "unit", + "year", + seed=0, + n_starts=1, + optimizer_options={"maxiter": 1}, + inner_min_decrease=1e-3, ) @@ -319,7 +361,9 @@ def test_inner_v_search_nonconvergence_warning(): # inner_max_iter=1 makes them truncate, and the estimator emits an aggregated warning. df, _, _ = _make_panel() with pytest.warns(UserWarning, match="during nested V selection"): - synthetic_control(df, "y", "treated", "unit", "year", seed=0, inner_max_iter=1) + synthetic_control( + df, "y", "treated", "unit", "year", seed=0, inner_max_iter=1, **_FAST_CHURN + ) def test_single_inner_nonconvergence_excluded_from_v_ranking(monkeypatch): @@ -348,7 +392,7 @@ def patched(X1s, X0s, v, max_iter, min_decrease): monkeypatch.setattr(sc, "_inner_solve_W", patched) with pytest.warns(UserWarning, match="during nested V selection"): - res = synthetic_control(df, "y", "treated", "unit", "year", seed=0) + res = synthetic_control(df, "y", "treated", "unit", "year", seed=0, **_FAST) assert state["failed"] # the patch actually fired on an objective evaluation assert np.isfinite(res.att) @@ -361,11 +405,37 @@ def test_n_starts_one_runs(): # n_starts=1 uses only the uniform start (short-circuits the heuristic candidates) # and still produces a valid nested fit. df, _, _ = _make_panel() - res = synthetic_control(df, "y", "treated", "unit", "year", seed=0, n_starts=1) + res = synthetic_control( + df, + "y", + "treated", + "unit", + "year", + seed=0, + n_starts=1, + optimizer_options={"maxiter": 50}, + inner_min_decrease=1e-3, + ) assert np.isfinite(res.att) assert abs(sum(res.donor_weights.values()) - 1.0) < 1e-6 +def test_nested_production_defaults_smoke(): + # Coverage anchor: exercise the FULL production-default nested path end-to-end in + # pure-Python — n_starts=4 (so the _v_starts heuristic candidates: inverse-variance, + # univariate-fit and Dirichlet starts are generated, which the n_starts=1 _FAST tests + # skip) and the tight inner_min_decrease=1e-5. A 2-donor panel keeps the inner + # Frank-Wolfe simplex effectively 1-D, so the default settings still run in <0.1s and + # this stays non-slow. The @slow Tier-2 Basque test covers the defaults only in the Rust + # matrix; this is the pure-Python complement. + df, _, _ = _make_panel(n_donors=2) + res = synthetic_control(df, "y", "treated", "unit", "year", seed=0) # production defaults + assert np.isfinite(res.att) + assert abs(sum(res.donor_weights.values()) - 1.0) < 1e-6 + assert res.n_donors == 2 + assert res.mspe_v is not None # nested V was selected by minimizing pre-period MSPE + + def test_non_finite_outcome_rejected(): df, years, T0 = _make_panel() df = df.copy() @@ -378,7 +448,7 @@ def test_distinct_special_period_sets_not_duplicate(): # Same var/op, same endpoints + length, different intermediate period -> distinct # predictors, must NOT be rejected as duplicates. df, years, T0 = _make_panel(T=8, T0=6) - res = SyntheticControl(seed=0).fit( + res = SyntheticControl(seed=0, **_FAST).fit( df, "y", "treated", @@ -423,6 +493,7 @@ def test_duplicate_predictor_window_periods_deduped(): predictors=["y"], predictor_window=[years[0], years[0], years[1]], seed=0, + **_FAST, ) r_uniq = synthetic_control( df, @@ -433,6 +504,7 @@ def test_duplicate_predictor_window_periods_deduped(): predictors=["y"], predictor_window=[years[0], years[1]], seed=0, + **_FAST, ) assert abs(r_dup.att - r_uniq.att) < 1e-9 @@ -465,7 +537,7 @@ def test_poor_fit_warning(): rows.append({"unit": "treated", "year": yr, "y": 50 + 2.0 * t, "treated": int(t >= T0)}) df = pd.DataFrame(rows) with pytest.warns(UserWarning, match="Pre-treatment fit is poor"): - synthetic_control(df, "y", "treated", "unit", "year", seed=0) + synthetic_control(df, "y", "treated", "unit", "year", seed=0, **_FAST) def test_poor_fit_warning_flat_treated_pre_path(): @@ -484,7 +556,7 @@ def test_poor_fit_warning_flat_treated_pre_path(): ) df = pd.DataFrame(rows) with pytest.warns(UserWarning, match="Pre-treatment fit is poor"): - synthetic_control(df, "y", "treated", "unit", "year", seed=0) + synthetic_control(df, "y", "treated", "unit", "year", seed=0, **_FAST) # --------------------------------------------------------------------------- @@ -520,7 +592,7 @@ def test_duplicate_regular_predictor_rejected(): def test_inner_nonconvergence_warning(): df, _, _ = _make_panel(n_donors=4) with pytest.warns(UserWarning, match="did not converge"): - SyntheticControl(seed=0, v_method="nested", inner_max_iter=1).fit( + SyntheticControl(seed=0, v_method="nested", inner_max_iter=1, **_FAST_CHURN).fit( df, "y", "treated", "unit", "year" ) @@ -532,7 +604,7 @@ def test_inner_nonconvergence_warning(): def test_standardize_none_runs(): df, _, _ = _make_panel() - res = synthetic_control(df, "y", "treated", "unit", "year", standardize="none", seed=0) + res = synthetic_control(df, "y", "treated", "unit", "year", standardize="none", seed=0, **_FAST) assert res.standardize == "none" assert np.isfinite(res.att) @@ -652,7 +724,7 @@ def test_set_params_rolls_back_on_invalid(): def test_nan_inference_contract(): df, _, _ = _make_panel() - res = synthetic_control(df, "y", "treated", "unit", "year", seed=0) + res = synthetic_control(df, "y", "treated", "unit", "year", seed=0, **_FAST) assert_nan_inference( {"se": res.se, "t_stat": res.t_stat, "p_value": res.p_value, "conf_int": res.conf_int} ) @@ -661,7 +733,7 @@ def test_nan_inference_contract(): def test_result_accessors_render(): df, _, _ = _make_panel() - res = synthetic_control(df, "y", "treated", "unit", "year", seed=0) + res = synthetic_control(df, "y", "treated", "unit", "year", seed=0, **_FAST) assert isinstance(res, SyntheticControlResults) assert isinstance(res.summary(), str) and "Synthetic Control" in res.summary() assert "att" in res.to_dict() @@ -676,8 +748,10 @@ def test_result_accessors_render(): def test_inferred_post_matches_explicit(): df, years, T0 = _make_panel() - r_inf = synthetic_control(df, "y", "treated", "unit", "year", seed=0) - r_exp = synthetic_control(df, "y", "treated", "unit", "year", post_periods=years[T0:], seed=0) + r_inf = synthetic_control(df, "y", "treated", "unit", "year", seed=0, **_FAST) + r_exp = synthetic_control( + df, "y", "treated", "unit", "year", post_periods=years[T0:], seed=0, **_FAST + ) assert r_inf.post_periods == r_exp.post_periods == years[T0:] assert abs(r_inf.att - r_exp.att) < 1e-12