Bug#1023978: scikit-learn: FTBFS with Python 3.11 as a supported version

Sun Nov 13 12:26:18 GMT 2022

Source: scikit-learn
Version: 1.1.2+dfsg-7
Severity: serious
Tags: ftbfs
User: debian-python at lists.debian.org
Usertags: python3.11

Hi Maintainer

scikit-learn FTBFS with Python 3.11 as a supported version.  I've
copied what I hope is the relevant part of the log below.

Regards
Graham

==================================== ERRORS ====================================
_________________ ERROR at setup of test_load_empty_lfw_people _________________

    def setup_module():
        """Test fixture run once and common to all tests of this module"""
        Image = pytest.importorskip("PIL.Image")

        global SCIKIT_LEARN_DATA, SCIKIT_LEARN_EMPTY_DATA, LFW_HOME

        SCIKIT_LEARN_DATA = tempfile.mkdtemp(prefix="scikit_learn_lfw_test_")
        LFW_HOME = os.path.join(SCIKIT_LEARN_DATA, "lfw_home")

        SCIKIT_LEARN_EMPTY_DATA =
tempfile.mkdtemp(prefix="scikit_learn_empty_test_")

        if not os.path.exists(LFW_HOME):
            os.makedirs(LFW_HOME)

        random_state = random.Random(42)
        np_rng = np.random.RandomState(42)

        # generate some random jpeg files for each person
        counts = {}
        for name in FAKE_NAMES:
            folder_name = os.path.join(LFW_HOME, "lfw_funneled", name)
            if not os.path.exists(folder_name):
                os.makedirs(folder_name)

            n_faces = np_rng.randint(1, 5)
            counts[name] = n_faces
            for i in range(n_faces):
                file_path = os.path.join(folder_name, name + "_%04d.jpg" % i)
                uniface = np_rng.randint(0, 255, size=(250, 250, 3))
                img = Image.fromarray(uniface.astype(np.uint8))
                img.save(file_path)

        # add some random file pollution to test robustness
        with open(os.path.join(LFW_HOME, "lfw_funneled", ".test.swp"),
"wb") as f:
            f.write(b"Text file to be ignored by the dataset loader.")

        # generate some pairing metadata files using the same format as LFW
        with open(os.path.join(LFW_HOME, "pairsDevTrain.txt"), "wb") as f:
            f.write(b"10\n")
            more_than_two = [name for name, count in counts.items() if
count >= 2]
            for i in range(5):
                name = random_state.choice(more_than_two)
                first, second = random_state.sample(range(counts[name]), 2)
                f.write(("%s\t%d\t%d\n" % (name, first, second)).encode())

            for i in range(5):
                first_name, second_name = random_state.sample(FAKE_NAMES, 2)
>               first_index = random_state.choice(np.arange(counts[first_name]))

sklearn/datasets/tests/test_lfw.py:87:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

self = <random.Random object at 0x6d1b4e0>, seq = array([0, 1, 2])

    def choice(self, seq):
        """Choose a random element from a non-empty sequence."""
>       if not seq:
E       ValueError: The truth value of an array with more than one
element is ambiguous. Use a.any() or a.all()

/usr/lib/python3.11/random.py:369: ValueError
_________________ ERROR at setup of test_load_fake_lfw_people __________________

    def setup_module():
        """Test fixture run once and common to all tests of this module"""
        Image = pytest.importorskip("PIL.Image")

        global SCIKIT_LEARN_DATA, SCIKIT_LEARN_EMPTY_DATA, LFW_HOME

        SCIKIT_LEARN_DATA = tempfile.mkdtemp(prefix="scikit_learn_lfw_test_")
        LFW_HOME = os.path.join(SCIKIT_LEARN_DATA, "lfw_home")

        SCIKIT_LEARN_EMPTY_DATA =
tempfile.mkdtemp(prefix="scikit_learn_empty_test_")

        if not os.path.exists(LFW_HOME):
            os.makedirs(LFW_HOME)

        random_state = random.Random(42)
        np_rng = np.random.RandomState(42)

        # generate some random jpeg files for each person
        counts = {}
        for name in FAKE_NAMES:
            folder_name = os.path.join(LFW_HOME, "lfw_funneled", name)
            if not os.path.exists(folder_name):
                os.makedirs(folder_name)

            n_faces = np_rng.randint(1, 5)
            counts[name] = n_faces
            for i in range(n_faces):
                file_path = os.path.join(folder_name, name + "_%04d.jpg" % i)
                uniface = np_rng.randint(0, 255, size=(250, 250, 3))
                img = Image.fromarray(uniface.astype(np.uint8))
                img.save(file_path)

        # add some random file pollution to test robustness
        with open(os.path.join(LFW_HOME, "lfw_funneled", ".test.swp"),
"wb") as f:
            f.write(b"Text file to be ignored by the dataset loader.")

        # generate some pairing metadata files using the same format as LFW
        with open(os.path.join(LFW_HOME, "pairsDevTrain.txt"), "wb") as f:
            f.write(b"10\n")
            more_than_two = [name for name, count in counts.items() if
count >= 2]
            for i in range(5):
                name = random_state.choice(more_than_two)
                first, second = random_state.sample(range(counts[name]), 2)
                f.write(("%s\t%d\t%d\n" % (name, first, second)).encode())

            for i in range(5):
                first_name, second_name = random_state.sample(FAKE_NAMES, 2)
>               first_index = random_state.choice(np.arange(counts[first_name]))

sklearn/datasets/tests/test_lfw.py:87:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

self = <random.Random object at 0x6d1b4e0>, seq = array([0, 1, 2])

    def choice(self, seq):
        """Choose a random element from a non-empty sequence."""
>       if not seq:
E       ValueError: The truth value of an array with more than one
element is ambiguous. Use a.any() or a.all()

/usr/lib/python3.11/random.py:369: ValueError
_________ ERROR at setup of test_load_fake_lfw_people_too_restrictive __________

    def setup_module():
        """Test fixture run once and common to all tests of this module"""
        Image = pytest.importorskip("PIL.Image")

        global SCIKIT_LEARN_DATA, SCIKIT_LEARN_EMPTY_DATA, LFW_HOME

        SCIKIT_LEARN_DATA = tempfile.mkdtemp(prefix="scikit_learn_lfw_test_")
        LFW_HOME = os.path.join(SCIKIT_LEARN_DATA, "lfw_home")

        SCIKIT_LEARN_EMPTY_DATA =
tempfile.mkdtemp(prefix="scikit_learn_empty_test_")

        if not os.path.exists(LFW_HOME):
            os.makedirs(LFW_HOME)

        random_state = random.Random(42)
        np_rng = np.random.RandomState(42)

        # generate some random jpeg files for each person
        counts = {}
        for name in FAKE_NAMES:
            folder_name = os.path.join(LFW_HOME, "lfw_funneled", name)
            if not os.path.exists(folder_name):
                os.makedirs(folder_name)

            n_faces = np_rng.randint(1, 5)
            counts[name] = n_faces
            for i in range(n_faces):
                file_path = os.path.join(folder_name, name + "_%04d.jpg" % i)
                uniface = np_rng.randint(0, 255, size=(250, 250, 3))
                img = Image.fromarray(uniface.astype(np.uint8))
                img.save(file_path)

        # add some random file pollution to test robustness
        with open(os.path.join(LFW_HOME, "lfw_funneled", ".test.swp"),
"wb") as f:
            f.write(b"Text file to be ignored by the dataset loader.")

        # generate some pairing metadata files using the same format as LFW
        with open(os.path.join(LFW_HOME, "pairsDevTrain.txt"), "wb") as f:
            f.write(b"10\n")
            more_than_two = [name for name, count in counts.items() if
count >= 2]
            for i in range(5):
                name = random_state.choice(more_than_two)
                first, second = random_state.sample(range(counts[name]), 2)
                f.write(("%s\t%d\t%d\n" % (name, first, second)).encode())

            for i in range(5):
                first_name, second_name = random_state.sample(FAKE_NAMES, 2)
>               first_index = random_state.choice(np.arange(counts[first_name]))

sklearn/datasets/tests/test_lfw.py:87:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

self = <random.Random object at 0x6d1b4e0>, seq = array([0, 1, 2])

    def choice(self, seq):
        """Choose a random element from a non-empty sequence."""
>       if not seq:
E       ValueError: The truth value of an array with more than one
element is ambiguous. Use a.any() or a.all()

/usr/lib/python3.11/random.py:369: ValueError
_________________ ERROR at setup of test_load_empty_lfw_pairs __________________

    def setup_module():
        """Test fixture run once and common to all tests of this module"""
        Image = pytest.importorskip("PIL.Image")

        global SCIKIT_LEARN_DATA, SCIKIT_LEARN_EMPTY_DATA, LFW_HOME

        SCIKIT_LEARN_DATA = tempfile.mkdtemp(prefix="scikit_learn_lfw_test_")
        LFW_HOME = os.path.join(SCIKIT_LEARN_DATA, "lfw_home")

        SCIKIT_LEARN_EMPTY_DATA =
tempfile.mkdtemp(prefix="scikit_learn_empty_test_")

        if not os.path.exists(LFW_HOME):
            os.makedirs(LFW_HOME)

        random_state = random.Random(42)
        np_rng = np.random.RandomState(42)

        # generate some random jpeg files for each person
        counts = {}
        for name in FAKE_NAMES:
            folder_name = os.path.join(LFW_HOME, "lfw_funneled", name)
            if not os.path.exists(folder_name):
                os.makedirs(folder_name)

            n_faces = np_rng.randint(1, 5)
            counts[name] = n_faces
            for i in range(n_faces):
                file_path = os.path.join(folder_name, name + "_%04d.jpg" % i)
                uniface = np_rng.randint(0, 255, size=(250, 250, 3))
                img = Image.fromarray(uniface.astype(np.uint8))
                img.save(file_path)

        # add some random file pollution to test robustness
        with open(os.path.join(LFW_HOME, "lfw_funneled", ".test.swp"),
"wb") as f:
            f.write(b"Text file to be ignored by the dataset loader.")

        # generate some pairing metadata files using the same format as LFW
        with open(os.path.join(LFW_HOME, "pairsDevTrain.txt"), "wb") as f:
            f.write(b"10\n")
            more_than_two = [name for name, count in counts.items() if
count >= 2]
            for i in range(5):
                name = random_state.choice(more_than_two)
                first, second = random_state.sample(range(counts[name]), 2)
                f.write(("%s\t%d\t%d\n" % (name, first, second)).encode())

            for i in range(5):
                first_name, second_name = random_state.sample(FAKE_NAMES, 2)
>               first_index = random_state.choice(np.arange(counts[first_name]))

sklearn/datasets/tests/test_lfw.py:87:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

self = <random.Random object at 0x6d1b4e0>, seq = array([0, 1, 2])

    def choice(self, seq):
        """Choose a random element from a non-empty sequence."""
>       if not seq:
E       ValueError: The truth value of an array with more than one
element is ambiguous. Use a.any() or a.all()

/usr/lib/python3.11/random.py:369: ValueError
__________________ ERROR at setup of test_load_fake_lfw_pairs __________________

    def setup_module():
        """Test fixture run once and common to all tests of this module"""
        Image = pytest.importorskip("PIL.Image")

        global SCIKIT_LEARN_DATA, SCIKIT_LEARN_EMPTY_DATA, LFW_HOME

        SCIKIT_LEARN_DATA = tempfile.mkdtemp(prefix="scikit_learn_lfw_test_")
        LFW_HOME = os.path.join(SCIKIT_LEARN_DATA, "lfw_home")

        SCIKIT_LEARN_EMPTY_DATA =
tempfile.mkdtemp(prefix="scikit_learn_empty_test_")

        if not os.path.exists(LFW_HOME):
            os.makedirs(LFW_HOME)

        random_state = random.Random(42)
        np_rng = np.random.RandomState(42)

        # generate some random jpeg files for each person
        counts = {}
        for name in FAKE_NAMES:
            folder_name = os.path.join(LFW_HOME, "lfw_funneled", name)
            if not os.path.exists(folder_name):
                os.makedirs(folder_name)

            n_faces = np_rng.randint(1, 5)
            counts[name] = n_faces
            for i in range(n_faces):
                file_path = os.path.join(folder_name, name + "_%04d.jpg" % i)
                uniface = np_rng.randint(0, 255, size=(250, 250, 3))
                img = Image.fromarray(uniface.astype(np.uint8))
                img.save(file_path)

        # add some random file pollution to test robustness
        with open(os.path.join(LFW_HOME, "lfw_funneled", ".test.swp"),
"wb") as f:
            f.write(b"Text file to be ignored by the dataset loader.")

        # generate some pairing metadata files using the same format as LFW
        with open(os.path.join(LFW_HOME, "pairsDevTrain.txt"), "wb") as f:
            f.write(b"10\n")
            more_than_two = [name for name, count in counts.items() if
count >= 2]
            for i in range(5):
                name = random_state.choice(more_than_two)
                first, second = random_state.sample(range(counts[name]), 2)
                f.write(("%s\t%d\t%d\n" % (name, first, second)).encode())

            for i in range(5):
                first_name, second_name = random_state.sample(FAKE_NAMES, 2)
>               first_index = random_state.choice(np.arange(counts[first_name]))

sklearn/datasets/tests/test_lfw.py:87:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

self = <random.Random object at 0x6d1b4e0>, seq = array([0, 1, 2])

    def choice(self, seq):
        """Choose a random element from a non-empty sequence."""
>       if not seq:
E       ValueError: The truth value of an array with more than one
element is ambiguous. Use a.any() or a.all()

/usr/lib/python3.11/random.py:369: ValueError