[Debian-med-packaging] Bug#1000752: skbio: autopkgtest fail with pandas 1.3: 3 test failures

Sun Nov 28 13:12:46 GMT 2021

Package: python3-skbio
Version: 0.5.6-5
Severity: important
Control: block 999415 by -1

The 3 tests below fail with pandas 1.3 from experimental.

Full log: 
https://ci.debian.net/data/autopkgtest/unstable/amd64/p/python-skbio/17019227/log.gz

=================================== FAILURES 
===================================
___________ TestLoc.test_multiindex_complicated_axis_empty_selection 
___________

self = <skbio.alignment.tests.test_tabular_msa.TestLoc 
testMethod=test_multiindex_complicated_axis_empty_selection>

     def test_multiindex_complicated_axis_empty_selection(self):
         a = RNA("UUAG", metadata={0: 0}, positional_metadata={0: [1, 2, 
3, 4]})
         b = RNA("UAAG", metadata={1: 0}, positional_metadata={1: [1, 2, 
3, 4]})
         c = RNA("UAA-", metadata={2: 0}, positional_metadata={2: [1, 2, 
3, 4]})
         d = RNA("UA-G", metadata={3: 0}, positional_metadata={3: [1, 2, 
3, 4]})
         msa = TabularMSA([a, b, c, d], metadata={'x': 'y'},
                          positional_metadata={'c': ['a', 'b', 'c', 'd']},
                          index=[('a', 'x', 0), ('a', 'x', 1), ('a', 
'y', 2),
                                 ('b', 'x', 0)])

 >       self.assertEqual(self.get(msa, (([False, True, False, True],
                                          'x', 2), Ellipsis)),
                          TabularMSA([], metadata={'x': 'y'},
                                     # TODO: Change for #1198
                                     positional_metadata=None,
                                     index=[]))

skbio/alignment/tests/test_tabular_msa.py:1390:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
_ _ _ _
skbio/alignment/tests/test_tabular_msa.py:1271: in get
     return obj.loc[indexable]
skbio/alignment/_indexing.py:39: in __getitem__
     return self._handle_both_axes(*indexable)
skbio/alignment/_indexing.py:53: in _handle_both_axes
     r = self._slice_on_first_axis(self._obj, seq_index)
skbio/alignment/_indexing.py:77: in _slice_on_first_axis
     return self._slice_sequences(obj, indexable)
skbio/alignment/_indexing.py:203: in _slice_sequences
     return obj._slice_sequences_loc_(indexable)
skbio/alignment/_tabular_msa.py:1192: in _slice_sequences_loc_
     new_seqs = self._seqs.loc[l]
/usr/lib/python3/dist-packages/pandas/core/indexing.py:925: in __getitem__
     return self._getitem_tuple(key)
/usr/lib/python3/dist-packages/pandas/core/indexing.py:1100: in 
_getitem_tuple
     return self._getitem_lowerdim(tup)
/usr/lib/python3/dist-packages/pandas/core/indexing.py:822: in 
_getitem_lowerdim
     return self._getitem_nested_tuple(tup)
/usr/lib/python3/dist-packages/pandas/core/indexing.py:892: in 
_getitem_nested_tuple
     return self._getitem_axis(tup, axis=axis)
/usr/lib/python3/dist-packages/pandas/core/indexing.py:1157: in 
_getitem_axis
     locs = labels.get_locs(key)
/usr/lib/python3/dist-packages/pandas/core/indexes/multi.py:3347: in 
get_locs
     indexer = _update_indexer(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
_ _ _ _

idxr = Int64Index([2], dtype='int64')
indexer = Int64Index([1, 3], dtype='int64')
key = ([False, True, False, True], 'x', 2)

     def _update_indexer(idxr: Index | None, indexer: Index | None, key) 
-> Index:
         if indexer is None:
             indexer = Index(np.arange(n))
         if idxr is None:
             return indexer
         indexer_intersection = indexer.intersection(idxr)
         if indexer_intersection.empty and not idxr.empty and not 
indexer.empty:
 >           raise KeyError(key)
E           KeyError: ([False, True, False, True], 'x', 2)

/usr/lib/python3/dist-packages/pandas/core/indexes/multi.py:3296: KeyError
____________________ BIOENVTests.test_bioenv_vegan_example 
_____________________

self = <skbio.stats.distance.tests.test_bioenv.BIOENVTests 
testMethod=test_bioenv_vegan_example>

     def test_bioenv_vegan_example(self):
         # The correlation coefficient in the first row of the
         # results (rho=0.2516) is different from the correlation 
coefficient
         # computed by vegan (rho=0.2513). This seems to occur due to
         # differences in numerical precision when calculating the Euclidean
         # distances, which affects the rank calculations in Spearman
         # (specifically, dealing with ties). The ranked distances end 
up being
         # slightly different between vegan and our implementation 
because some
         # distances are treated as ties in vegan but treated as 
distinct values
         # in our implementation. This explains the difference in rho 
values. I
         # verified that using Pearson correlation instead of Spearman 
on the
         # same distances yields *very* similar results. Thus, the 
discrepancy
         # seems to stem from differences when computing ranks/ties.
         obs = bioenv(self.dm_vegan, self.df_vegan)
 >       assert_data_frame_almost_equal(obs, self.exp_results_vegan)

skbio/stats/distance/tests/test_bioenv.py:149:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
_ _ _ _
skbio/util/_testing.py:304: in assert_data_frame_almost_equal
     pdt.assert_frame_equal(left, right,
pandas/_libs/testing.pyx:53: in pandas._libs.testing.assert_almost_equal
     ???
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
_ _ _ _

 >   ???
E   AssertionError: DataFrame.iloc[:, 1] (column name="correlation") are 
different
E
E   DataFrame.iloc[:, 1] (column name="correlation") values are 
different (16.66667 %)
E   [index]: [P, P, Al, P, Ca, Al, P, Ca, pH, Al, log(N), P, Ca, pH, Al, 
log(N), P, K, Ca, pH, Al]
E   [left]:  [0.25149020972268976, 0.40037784848960495, 
0.40048058674961834, 0.3618749732452448, 0.3215524892624249, 
0.2821814757209515]
E   [right]: [0.2516302260961883, 0.4003778484896049, 
0.4004805867496183, 0.3618749732452448, 0.3215524892624249, 
0.2821814757209515]

pandas/_libs/testing.pyx:168: AssertionError
_____________________ GradientTests.test_weight_by_vector 
______________________

self = <skbio.stats.tests.test_gradient.GradientTests 
testMethod=test_weight_by_vector>

     def test_weight_by_vector(self):
         """Correctly weights the vectors"""
         trajectory = pd.DataFrame.from_dict({'s1': np.array([1]),
                                              's2': np.array([2]),
                                              's3': np.array([3]),
                                              's4': np.array([4]),
                                              's5': np.array([5]),
                                              's6': np.array([6]),
                                              's7': np.array([7]),
                                              's8': np.array([8])},
                                             orient='index')
         trajectory.sort_values(by=0, inplace=True)
         w_vector = pd.Series(np.array([1, 5, 8, 12, 45, 80, 85, 90]),
                              ['s1', 's2', 's3', 's4',
                               's5', 's6', 's7', 's8']).astype(np.float64)
         exp = pd.DataFrame.from_dict({'s1': np.array([1]),
                                       's2': np.array([6.3571428571]),
                                       's3': np.array([12.7142857142]),
                                       's4': np.array([12.7142857142]),
                                       's5': np.array([1.9264069264]),
                                       's6': np.array([2.1795918367]),
                                       's7': np.array([17.8]),
                                       's8': np.array([20.3428571428])},
                                      orient='index')
         obs = _weight_by_vector(trajectory, w_vector)
         assert_data_frame_almost_equal(obs.sort_index(), exp.sort_index())

         trajectory = pd.DataFrame.from_dict({'s1': np.array([1]),
                                              's2': np.array([2]),
                                              's3': np.array([3]),
                                              's4': np.array([4]),
                                              's5': np.array([5]),
                                              's6': np.array([6]),
                                              's7': np.array([7]),
                                              's8': np.array([8])},
                                             orient='index')
         trajectory.sort_values(by=0, inplace=True)
         w_vector = pd.Series(np.array([1, 2, 3, 4, 5, 6, 7, 8]),
                              ['s1', 's2', 's3', 's4',
                               's5', 's6', 's7', 's8']).astype(np.float64)
         exp = pd.DataFrame.from_dict({'s1': np.array([1.0]),
                                       's2': np.array([2.0]),
                                       's3': np.array([3.0]),
                                       's4': np.array([4.0]),
                                       's5': np.array([5.0]),
                                       's6': np.array([6.0]),
                                       's7': np.array([7.0]),
                                       's8': np.array([8.0])
                                       },
                                      orient='index')
         obs = _weight_by_vector(trajectory, w_vector)
 >       assert_data_frame_almost_equal(obs.sort_index(), exp.sort_index())

skbio/stats/tests/test_gradient.py:268:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
_ _ _ _

left =     0
s1  1
s2  2
s3  3
s4  4
s5  5
s6  6
s7  7
s8  8
right =       0
s1  1.0
s2  2.0
s3  3.0
s4  4.0
s5  5.0
s6  6.0
s7  7.0
s8  8.0

     @experimental(as_of="0.4.0")
     def assert_data_frame_almost_equal(left, right):
         """Raise AssertionError if ``pd.DataFrame`` objects are not 
"almost equal".

         Wrapper of ``pd.util.testing.assert_frame_equal``. Floating 
point values
         are considered "almost equal" if they are within a threshold 
defined by
         ``assert_frame_equal``. This wrapper uses a number of
         checks that are turned off by default in ``assert_frame_equal`` 
in order to
         perform stricter comparisons (for example, ensuring the index 
and column
         types are the same). It also does not consider empty 
``pd.DataFrame``
         objects equal if they have a different index.

         Other notes:

         * Index (row) and column ordering must be the same for objects 
to be equal.
         * NaNs (``np.nan``) in the same locations are considered equal.

         This is a helper function intended to be used in unit tests 
that need to
         compare ``pd.DataFrame`` objects.

         Parameters
         ----------
         left, right : pd.DataFrame
             ``pd.DataFrame`` objects to compare.

         Raises
         ------
         AssertionError
             If `left` and `right` are not "almost equal".

         See Also
         --------
         pandas.util.testing.assert_frame_equal

         """
         # pass all kwargs to ensure this function has consistent 
behavior even if
         # `assert_frame_equal`'s defaults change
 >       pdt.assert_frame_equal(left, right,
                                check_dtype=True,
                                check_index_type=True,
                                check_column_type=True,
                                check_frame_type=True,
                                check_less_precise=False,
                                check_names=True,
                                by_blocks=False,
                                check_exact=False)
E       AssertionError: Attributes of DataFrame.iloc[:, 0] (column 
name="0") are different
E
E       Attribute "dtype" are different
E       [left]:  int64
E       [right]: float64

skbio/util/_testing.py:304: AssertionError