[Debian-med-packaging] Bug#1000752: skbio: autopkgtest fail with pandas 1.3: 3 test failures
Rebecca N. Palmer
rebecca_palmer at zoho.com
Sun Nov 28 13:12:46 GMT 2021
Package: python3-skbio
Version: 0.5.6-5
Severity: important
Control: block 999415 by -1
The 3 tests below fail with pandas 1.3 from experimental.
Full log:
https://ci.debian.net/data/autopkgtest/unstable/amd64/p/python-skbio/17019227/log.gz
=================================== FAILURES
===================================
___________ TestLoc.test_multiindex_complicated_axis_empty_selection
___________
self = <skbio.alignment.tests.test_tabular_msa.TestLoc
testMethod=test_multiindex_complicated_axis_empty_selection>
def test_multiindex_complicated_axis_empty_selection(self):
a = RNA("UUAG", metadata={0: 0}, positional_metadata={0: [1, 2,
3, 4]})
b = RNA("UAAG", metadata={1: 0}, positional_metadata={1: [1, 2,
3, 4]})
c = RNA("UAA-", metadata={2: 0}, positional_metadata={2: [1, 2,
3, 4]})
d = RNA("UA-G", metadata={3: 0}, positional_metadata={3: [1, 2,
3, 4]})
msa = TabularMSA([a, b, c, d], metadata={'x': 'y'},
positional_metadata={'c': ['a', 'b', 'c', 'd']},
index=[('a', 'x', 0), ('a', 'x', 1), ('a',
'y', 2),
('b', 'x', 0)])
> self.assertEqual(self.get(msa, (([False, True, False, True],
'x', 2), Ellipsis)),
TabularMSA([], metadata={'x': 'y'},
# TODO: Change for #1198
positional_metadata=None,
index=[]))
skbio/alignment/tests/test_tabular_msa.py:1390:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _
skbio/alignment/tests/test_tabular_msa.py:1271: in get
return obj.loc[indexable]
skbio/alignment/_indexing.py:39: in __getitem__
return self._handle_both_axes(*indexable)
skbio/alignment/_indexing.py:53: in _handle_both_axes
r = self._slice_on_first_axis(self._obj, seq_index)
skbio/alignment/_indexing.py:77: in _slice_on_first_axis
return self._slice_sequences(obj, indexable)
skbio/alignment/_indexing.py:203: in _slice_sequences
return obj._slice_sequences_loc_(indexable)
skbio/alignment/_tabular_msa.py:1192: in _slice_sequences_loc_
new_seqs = self._seqs.loc[l]
/usr/lib/python3/dist-packages/pandas/core/indexing.py:925: in __getitem__
return self._getitem_tuple(key)
/usr/lib/python3/dist-packages/pandas/core/indexing.py:1100: in
_getitem_tuple
return self._getitem_lowerdim(tup)
/usr/lib/python3/dist-packages/pandas/core/indexing.py:822: in
_getitem_lowerdim
return self._getitem_nested_tuple(tup)
/usr/lib/python3/dist-packages/pandas/core/indexing.py:892: in
_getitem_nested_tuple
return self._getitem_axis(tup, axis=axis)
/usr/lib/python3/dist-packages/pandas/core/indexing.py:1157: in
_getitem_axis
locs = labels.get_locs(key)
/usr/lib/python3/dist-packages/pandas/core/indexes/multi.py:3347: in
get_locs
indexer = _update_indexer(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _
idxr = Int64Index([2], dtype='int64')
indexer = Int64Index([1, 3], dtype='int64')
key = ([False, True, False, True], 'x', 2)
def _update_indexer(idxr: Index | None, indexer: Index | None, key)
-> Index:
if indexer is None:
indexer = Index(np.arange(n))
if idxr is None:
return indexer
indexer_intersection = indexer.intersection(idxr)
if indexer_intersection.empty and not idxr.empty and not
indexer.empty:
> raise KeyError(key)
E KeyError: ([False, True, False, True], 'x', 2)
/usr/lib/python3/dist-packages/pandas/core/indexes/multi.py:3296: KeyError
____________________ BIOENVTests.test_bioenv_vegan_example
_____________________
self = <skbio.stats.distance.tests.test_bioenv.BIOENVTests
testMethod=test_bioenv_vegan_example>
def test_bioenv_vegan_example(self):
# The correlation coefficient in the first row of the
# results (rho=0.2516) is different from the correlation
coefficient
# computed by vegan (rho=0.2513). This seems to occur due to
# differences in numerical precision when calculating the Euclidean
# distances, which affects the rank calculations in Spearman
# (specifically, dealing with ties). The ranked distances end
up being
# slightly different between vegan and our implementation
because some
# distances are treated as ties in vegan but treated as
distinct values
# in our implementation. This explains the difference in rho
values. I
# verified that using Pearson correlation instead of Spearman
on the
# same distances yields *very* similar results. Thus, the
discrepancy
# seems to stem from differences when computing ranks/ties.
obs = bioenv(self.dm_vegan, self.df_vegan)
> assert_data_frame_almost_equal(obs, self.exp_results_vegan)
skbio/stats/distance/tests/test_bioenv.py:149:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _
skbio/util/_testing.py:304: in assert_data_frame_almost_equal
pdt.assert_frame_equal(left, right,
pandas/_libs/testing.pyx:53: in pandas._libs.testing.assert_almost_equal
???
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _
> ???
E AssertionError: DataFrame.iloc[:, 1] (column name="correlation") are
different
E
E DataFrame.iloc[:, 1] (column name="correlation") values are
different (16.66667 %)
E [index]: [P, P, Al, P, Ca, Al, P, Ca, pH, Al, log(N), P, Ca, pH, Al,
log(N), P, K, Ca, pH, Al]
E [left]: [0.25149020972268976, 0.40037784848960495,
0.40048058674961834, 0.3618749732452448, 0.3215524892624249,
0.2821814757209515]
E [right]: [0.2516302260961883, 0.4003778484896049,
0.4004805867496183, 0.3618749732452448, 0.3215524892624249,
0.2821814757209515]
pandas/_libs/testing.pyx:168: AssertionError
_____________________ GradientTests.test_weight_by_vector
______________________
self = <skbio.stats.tests.test_gradient.GradientTests
testMethod=test_weight_by_vector>
def test_weight_by_vector(self):
"""Correctly weights the vectors"""
trajectory = pd.DataFrame.from_dict({'s1': np.array([1]),
's2': np.array([2]),
's3': np.array([3]),
's4': np.array([4]),
's5': np.array([5]),
's6': np.array([6]),
's7': np.array([7]),
's8': np.array([8])},
orient='index')
trajectory.sort_values(by=0, inplace=True)
w_vector = pd.Series(np.array([1, 5, 8, 12, 45, 80, 85, 90]),
['s1', 's2', 's3', 's4',
's5', 's6', 's7', 's8']).astype(np.float64)
exp = pd.DataFrame.from_dict({'s1': np.array([1]),
's2': np.array([6.3571428571]),
's3': np.array([12.7142857142]),
's4': np.array([12.7142857142]),
's5': np.array([1.9264069264]),
's6': np.array([2.1795918367]),
's7': np.array([17.8]),
's8': np.array([20.3428571428])},
orient='index')
obs = _weight_by_vector(trajectory, w_vector)
assert_data_frame_almost_equal(obs.sort_index(), exp.sort_index())
trajectory = pd.DataFrame.from_dict({'s1': np.array([1]),
's2': np.array([2]),
's3': np.array([3]),
's4': np.array([4]),
's5': np.array([5]),
's6': np.array([6]),
's7': np.array([7]),
's8': np.array([8])},
orient='index')
trajectory.sort_values(by=0, inplace=True)
w_vector = pd.Series(np.array([1, 2, 3, 4, 5, 6, 7, 8]),
['s1', 's2', 's3', 's4',
's5', 's6', 's7', 's8']).astype(np.float64)
exp = pd.DataFrame.from_dict({'s1': np.array([1.0]),
's2': np.array([2.0]),
's3': np.array([3.0]),
's4': np.array([4.0]),
's5': np.array([5.0]),
's6': np.array([6.0]),
's7': np.array([7.0]),
's8': np.array([8.0])
},
orient='index')
obs = _weight_by_vector(trajectory, w_vector)
> assert_data_frame_almost_equal(obs.sort_index(), exp.sort_index())
skbio/stats/tests/test_gradient.py:268:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _
left = 0
s1 1
s2 2
s3 3
s4 4
s5 5
s6 6
s7 7
s8 8
right = 0
s1 1.0
s2 2.0
s3 3.0
s4 4.0
s5 5.0
s6 6.0
s7 7.0
s8 8.0
@experimental(as_of="0.4.0")
def assert_data_frame_almost_equal(left, right):
"""Raise AssertionError if ``pd.DataFrame`` objects are not
"almost equal".
Wrapper of ``pd.util.testing.assert_frame_equal``. Floating
point values
are considered "almost equal" if they are within a threshold
defined by
``assert_frame_equal``. This wrapper uses a number of
checks that are turned off by default in ``assert_frame_equal``
in order to
perform stricter comparisons (for example, ensuring the index
and column
types are the same). It also does not consider empty
``pd.DataFrame``
objects equal if they have a different index.
Other notes:
* Index (row) and column ordering must be the same for objects
to be equal.
* NaNs (``np.nan``) in the same locations are considered equal.
This is a helper function intended to be used in unit tests
that need to
compare ``pd.DataFrame`` objects.
Parameters
----------
left, right : pd.DataFrame
``pd.DataFrame`` objects to compare.
Raises
------
AssertionError
If `left` and `right` are not "almost equal".
See Also
--------
pandas.util.testing.assert_frame_equal
"""
# pass all kwargs to ensure this function has consistent
behavior even if
# `assert_frame_equal`'s defaults change
> pdt.assert_frame_equal(left, right,
check_dtype=True,
check_index_type=True,
check_column_type=True,
check_frame_type=True,
check_less_precise=False,
check_names=True,
by_blocks=False,
check_exact=False)
E AssertionError: Attributes of DataFrame.iloc[:, 0] (column
name="0") are different
E
E Attribute "dtype" are different
E [left]: int64
E [right]: float64
skbio/util/_testing.py:304: AssertionError
More information about the Debian-med-packaging
mailing list