improve cov

statmlben · statmlben · commit d8c875e7dcf3 · 2026-04-11T17:32:00.000+08:00
diff --git a/rehline/__init__.py b/rehline/__init__.py
@@ -12,8 +12,13 @@
 from ._internal import rehline_internal, rehline_result
 from ._loss import ReHLoss
 from ._mf_class import plqMF_Ridge
-from ._path_sol import plqERM_Ridge_path_sol
-from ._sklearn_mixin import plq_Ridge_Classifier, plq_Ridge_Regressor, plq_ElasticNet_Classifier, plq_ElasticNet_Regressor
+from ._path_sol import CQR_Ridge_path_sol, plqERM_Ridge_path_sol
+from ._sklearn_mixin import (
+    plq_ElasticNet_Classifier,
+    plq_ElasticNet_Regressor,
+    plq_Ridge_Classifier,
+    plq_Ridge_Regressor,
+)
 
 __all__ = (
     "_BaseReHLine",
@@ -23,6 +28,7 @@
     "CQR_Ridge",
     "plqERM_ElasticNet",
     "plqMF_Ridge",
+    "CQR_Ridge_path_sol",
     "plqERM_Ridge_path_sol",
     "plq_Ridge_Classifier",
     "plq_Ridge_Regressor",
diff --git a/tests/test_bugfixes.py b/tests/test_bugfixes.py
@@ -135,7 +135,7 @@ def test_ratings_are_rounded_to_half(self):
 class TestPathSolVerbose:
     """Verify that plqERM_Ridge_path_sol does not crash with verbose + no timing."""
 
-    def test_verbose_without_return_time(self):
+    def test_verbose_without_return_time(self, capsys):
         """verbose=1 + return_time=False must not raise NameError."""
         X, y = _make_classification_data(n=100, d=3)
         loss = {"name": "svm"}
@@ -158,8 +158,11 @@ def test_verbose_without_return_time(self):
         Cs_out, n_iters, loss_vals, l2_norms, coefs = result
         assert len(Cs_out) == 2
         assert len(n_iters) == 2
+        captured = capsys.readouterr()
+        assert "PLQ ERM Path Solution Results" in captured.out
+        assert "Time (s)" not in captured.out
 
-    def test_verbose_with_return_time(self):
+    def test_verbose_with_return_time(self, capsys):
         """verbose=1 + return_time=True should still work."""
         X, y = _make_classification_data(n=100, d=3)
         loss = {"name": "svm"}
@@ -177,6 +180,9 @@ def test_verbose_with_return_time(self):
         )
 
         assert len(result) == 6, f"Expected 6 return values, got {len(result)}"
+        captured = capsys.readouterr()
+        assert "PLQ ERM Path Solution Results" in captured.out
+        assert "Total Time" in captured.out
 
 
 # ===========================================================================
diff --git a/tests/test_multiclass.py b/tests/test_multiclass.py
@@ -183,7 +183,7 @@ def test_decision_function_shapes():
 
     # Binary
     y_bin = np.random.randint(0, 2, n_samples)
-    clf = plq_Ridge_Classifier(loss={"name": "svm"}, C=1.0, tol=1e-5)
+    clf = plq_Ridge_Classifier(loss={"name": "svm"}, C=1.0, tol=1e-5, max_iter=1_000_000)
     clf.fit(X, y_bin)
     assert clf.decision_function(X).shape == (n_samples,), "Binary decision_function should have shape (n_samples,)"
 
@@ -206,6 +206,7 @@ def test_decision_function_shapes():
         C=1.0,
         multi_class="ovo",
         tol=1e-5,
+        max_iter=1_000_000,
     )
     clf_ovo.fit(X, y_multi)
     assert clf_ovo.decision_function(X).shape == (n_samples, 6), (
diff --git a/tests/test_path_sol.py b/tests/test_path_sol.py
@@ -3,15 +3,15 @@
 import numpy as np
 from sklearn.datasets import make_hastie_10_2
 
-from rehline import plqERM_Ridge_path_sol
+from rehline import CQR_Ridge_path_sol, plqERM_Ridge_path_sol
 
 
 def test_path_sol_warm_start_shapes():
     """plqERM_Ridge_path_sol should return arrays with consistent shapes."""
     X, y = make_hastie_10_2(random_state=1)
     loss = {"name": "svm"}
     # Use a small number of C values so the test is fast
-    Cs = np.logspace(-3, 3, 10, base=2)
+    Cs = np.logspace(-3, 3, 7, base=2)
 
     (Cs_out, times, n_iters, loss_vals, l2_norms, coefs) = plqERM_Ridge_path_sol(
         X,
@@ -33,7 +33,7 @@ def test_path_sol_warm_start_shapes():
     assert len(times) == n_path, f"times length should be {n_path}, got {len(times)}"
     assert len(n_iters) == n_path, f"n_iters length should be {n_path}, got {len(n_iters)}"
     assert len(loss_vals) == n_path, f"loss_vals length should be {n_path}, got {len(loss_vals)}"
-    assert coefs.shape == (n_path, n_features), f"coefs shape should be ({n_path}, {n_features}), got {coefs.shape}"
+    assert coefs.shape == (n_features, n_path), f"coefs shape should be ({n_features}, {n_path}), got {coefs.shape}"
 
     # All timing values should be non-negative
     assert np.all(np.array(times) >= 0), "All timing values should be non-negative"
@@ -68,3 +68,108 @@ def test_path_sol_loss_range_with_larger_C():
     assert loss_vals[-1] <= loss_vals[0] * 1.05, (
         f"Loss at C=10 ({loss_vals[-1]:.2f}) should be ≤ 105% of loss at C=0.01 ({loss_vals[0]:.2f})"
     )
+
+
+def test_path_sol_generates_default_Cs_when_not_provided():
+    """plqERM_Ridge_path_sol should generate a sorted path when Cs is omitted."""
+    X, y = make_hastie_10_2(random_state=1)
+    loss = {"name": "svm"}
+
+    Cs_out, n_iters, loss_vals, l2_norms, coefs = plqERM_Ridge_path_sol(
+        X,
+        y,
+        loss=loss,
+        eps=1e-2,
+        n_Cs=4,
+        max_iter=100000,
+        tol=1e-3,
+        verbose=0,
+        warm_start=False,
+        constraint=None,
+        return_time=False,
+    )
+
+    assert len(Cs_out) == 4
+    assert np.all(np.diff(Cs_out) >= 0), "Generated Cs should be sorted in ascending order"
+    assert len(n_iters) == 4
+    assert len(loss_vals) == 4
+    assert len(l2_norms) == 4
+    assert coefs.shape == (X.shape[1], 4)
+
+
+def test_cqr_path_sol_shapes_without_times():
+    """CQR_Ridge_path_sol should return consistently shaped outputs without timing."""
+    np.random.seed(42)
+    X = np.random.randn(200, 2)
+    y = X @ np.array([1.0, 2.0]) + np.random.randn(200)
+    quantiles = [0.1, 0.5, 0.9]
+    Cs = np.array([0.1, 1.0])
+
+    Cs_out, models, coefs, intercepts = CQR_Ridge_path_sol(
+        X,
+        y,
+        quantiles=quantiles,
+        Cs=Cs,
+        max_iter=20000,
+        tol=1e-3,
+        verbose=0,
+        warm_start=False,
+        return_time=False,
+    )
+
+    assert np.array_equal(Cs_out, Cs)
+    assert len(models) == len(Cs)
+    assert coefs.shape == (len(Cs), len(quantiles), X.shape[1])
+    assert intercepts.shape == (len(Cs), len(quantiles))
+
+
+def test_cqr_path_sol_generates_default_Cs_with_times():
+    """CQR_Ridge_path_sol should generate default Cs and return timing info."""
+    np.random.seed(0)
+    X = np.random.randn(120, 3)
+    y = X @ np.array([1.0, -0.5, 2.0]) + np.random.randn(120)
+    quantiles = [0.25, 0.5, 0.75]
+
+    Cs_out, models, coefs, intercepts, fit_times = CQR_Ridge_path_sol(
+        X,
+        y,
+        quantiles=quantiles,
+        eps=1e-3,
+        n_Cs=3,
+        max_iter=20000,
+        tol=1e-3,
+        verbose=0,
+        warm_start=True,
+        return_time=True,
+    )
+
+    expected_Cs = np.power(10.0, np.linspace(np.log10(1e-3), np.log10(10), 3))
+
+    assert np.allclose(Cs_out, expected_Cs)
+    assert len(models) == 3
+    assert coefs.shape == (3, len(quantiles), X.shape[1])
+    assert intercepts.shape == (3, len(quantiles))
+    assert len(fit_times) == 3
+    assert np.all(np.array(fit_times) >= 0)
+
+
+def test_cqr_path_sol_verbose_reports_progress(capsys):
+    """CQR_Ridge_path_sol should print per-C progress when verbose is enabled."""
+    np.random.seed(1)
+    X = np.random.randn(80, 2)
+    y = X @ np.array([1.5, -0.5]) + np.random.randn(80)
+
+    CQR_Ridge_path_sol(
+        X,
+        y,
+        quantiles=[0.2, 0.8],
+        Cs=np.array([0.5]),
+        max_iter=20000,
+        tol=1e-3,
+        verbose=1,
+        warm_start=False,
+        return_time=True,
+    )
+
+    captured = capsys.readouterr()
+    assert "[OK] C=" in captured.out
diff --git a/tests/test_sklearn_mixin.py b/tests/test_sklearn_mixin.py
@@ -37,7 +37,7 @@ def test_classifier_pipeline_fits_and_predicts():
     pipe = Pipeline(
         [
             ("scaler", StandardScaler()),
-            ("clf", plq_Ridge_Classifier(loss={"name": "svm"}, C=1.0)),
+            ("clf", plq_Ridge_Classifier(loss={"name": "svm"}, C=1.0, tol=1e-3, max_iter=1_000_000)),
         ]
     )
     pipe.fit(X, y)
@@ -50,7 +50,16 @@ def test_classifier_pipeline_fits_and_predicts():
 
 def test_classifier_cross_val_score():
     """cross_val_score on plq_Ridge_Classifier pipeline should return reasonable scores."""
-    X, y = _clf_dataset()
+    X, y = make_classification(
+        n_samples=500,
+        n_features=10,
+        n_informative=5,
+        n_redundant=2,
+        n_classes=2,
+        class_sep=1.5,
+        flip_y=0.0,
+        random_state=42,
+    )
     pipe = Pipeline(
         [
             ("scaler", StandardScaler()),
@@ -72,6 +81,7 @@ def test_classifier_with_intercept_scaling():
         C=1.0,
         fit_intercept=True,
         intercept_scaling=1.0,
+        max_iter=1_000_000,
     )
     clf.fit(X_tr, y_tr)
     preds = clf.predict(X_te)
@@ -86,6 +96,7 @@ def test_classifier_with_nonneg_constraint():
         loss={"name": "svm"},
         C=1.0,
         constraint=[{"name": "nonnegative"}],
+        max_iter=1_000_000,
     )
     clf.fit(X, y)
     # Allow 1e-2 numerical slack — the solver may not satisfy the constraint
diff --git a/tests/test_svr.py b/tests/test_svr.py
@@ -33,7 +33,7 @@ def test_plqERM_Ridge_svr_matches_sklearn():
     reg_skl.fit(X, y)
     coef_skl = reg_skl.coef_.flatten()
 
-    reg_reh = plqERM_Ridge(loss={"name": "svr", "epsilon": epsilon}, C=C)
+    reg_reh = plqERM_Ridge(loss={"name": "svr", "epsilon": epsilon}, C=C, tol=1e-4, max_iter=100000)
     reg_reh.fit(X=X, y=y)
     coef_reh = reg_reh.coef_.flatten()
 
@@ -53,7 +53,7 @@ def test_ReHLine_manual_svr_params_match_builtin():
     n = X.shape[0]
 
     # Built-in loss
-    reg_builtin = plqERM_Ridge(loss={"name": "svr", "epsilon": epsilon}, C=C)
+    reg_builtin = plqERM_Ridge(loss={"name": "svr", "epsilon": epsilon}, C=C, tol=1e-6, max_iter=1_000_000)
     reg_builtin.fit(X=X, y=y)
     coef_builtin = reg_builtin.coef_.flatten()
 
@@ -65,7 +65,7 @@ def test_ReHLine_manual_svr_params_match_builtin():
     V[1] = C * (y - epsilon)
 
     # When U/V are pre-scaled by C, use C=1.0 to avoid double-counting
-    reg_manual = ReHLine(C=1.0)
+    reg_manual = ReHLine(C=1.0, tol=1e-6, max_iter=1_000_000)
     reg_manual._U, reg_manual._V = U, V
     reg_manual.fit(X=X)
     coef_manual = reg_manual.coef_.flatten()