Bug#1029701: scikit-learn: tests fail with scipy 1.10

Thu Jan 26 12:06:24 GMT 2023

Package: scikit-learn
Version: 1.1.2+dfsg-92
Severity: normal

scipy 1.10 is now available in experimental.
scikit-learn fails debci tests using it.

We are considering uploading scipy 1.10 to unstable in order to
included it in the forthcoming stable release.  If we proceed with
that, then this bug will become Severity: serious.

test_search.py is affected
A sample from the failing test log is

_____________________ test_grid_search_failing_classifier ______________________

    def test_grid_search_failing_classifier():
        # GridSearchCV with on_error != 'raise'
        # Ensures that a warning is raised and score reset where appropriate.

        X, y = make_classification(n_samples=20, n_features=10, random_state=0)

        clf = FailingClassifier()

        # refit=False because we only want to check that errors caused by fits
        # to individual folds will be caught and warnings raised instead. If
        # refit was done, then an exception would be raised on refit and not
        # caught by grid_search (expected behavior), and this would cause an
        # error in this test.
        gs = GridSearchCV(
            clf,
            [{"parameter": [0, 1, 2]}],
            scoring="accuracy",
            refit=False,
            error_score=0.0,
        )

        warning_message = re.compile(
            "5 fits failed.+total of 15.+The score on these"
            r" train-test partitions for these parameters will be set to 0\.0.+"
            "5 fits failed with the following error.+ValueError.+Failing classifier failed"
            " as required",
            flags=re.DOTALL,
        )
        with pytest.warns(FitFailedWarning, match=warning_message):
            gs.fit(X, y)
        n_candidates = len(gs.cv_results_["params"])

        # Ensure that grid scores were set to zero as required for those fits
        # that are expected to fail.
        def get_cand_scores(i):
            return np.array(
                list(
                    gs.cv_results_["split%d_test_score" % s][i] for s in range(gs.n_splits_)
                )
            )

        assert all(
            (
                np.all(get_cand_scores(cand_i) == 0.0)
                for cand_i in range(n_candidates)
                if gs.cv_results_["param_parameter"][cand_i]
                == FailingClassifier.FAILING_PARAMETER
            )
        )

        gs = GridSearchCV(
            clf,
            [{"parameter": [0, 1, 2]}],
            scoring="accuracy",
            refit=False,
            error_score=float("nan"),
        )
        warning_message = re.compile(
            "5 fits failed.+total of 15.+The score on these"
            r" train-test partitions for these parameters will be set to nan.+"
            "5 fits failed with the following error.+ValueError.+Failing classifier failed"
            " as required",
            flags=re.DOTALL,
        )
        with pytest.warns(FitFailedWarning, match=warning_message):
            gs.fit(X, y)
        n_candidates = len(gs.cv_results_["params"])
        assert all(
            np.all(np.isnan(get_cand_scores(cand_i)))
            for cand_i in range(n_candidates)
            if gs.cv_results_["param_parameter"][cand_i]
            == FailingClassifier.FAILING_PARAMETER
        )

        ranks = gs.cv_results_["rank_test_score"]

        # Check that succeeded estimators have lower ranks
        assert ranks[0] <= 2 and ranks[1] <= 2
        # Check that failed estimator has the highest rank
>       assert ranks[clf.FAILING_PARAMETER] == 3
E       assert -2147483648 == 3

/usr/lib/python3/dist-packages/sklearn/model_selection/tests/test_search.py:1630: AssertionError