[Debian-med-packaging] Bug#950932: q2-types: FTBFS with pandas 1.0: test failures
Rebecca N. Palmer
rebecca_palmer at zoho.com
Sat Feb 8 14:18:11 GMT 2020
Source: q2-types
Version: 2019.10.0-1
Control: block 950430 by -1
With pandas 1.0 from experimental:
=================================== FAILURES
===================================
____________ TestTaxonomyFormatsToDataFrame.test_duplicate_columns
_____________
self =
<q2_types.feature_data.tests.test_transformer.TestTaxonomyFormatsToDataFrame
testMethod=test_duplicate_columns>
def test_duplicate_columns(self):
with self.assertRaisesRegex(ValueError, 'duplicated: Column1'):
_taxonomy_formats_to_dataframe(
self.get_data_path(os.path.join(
> 'taxonomy', 'duplicate-columns.tsv')))
q2_types/feature_data/tests/test_transformer.py:355:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _
def _taxonomy_formats_to_dataframe(filepath, has_header=None):
"""Read any of the three taxonomy formats into a dataframe.
Parameters
----------
filepath : str
The taxonomy-formatted file to be read.
has_header : bool, optional
If `None`, autodetect the header: only `Feature
ID<tab>Taxon` is
recognized, optionally followed by other columns. If
`True`, the file
must have the expected header described above otherwise an
error is
raised. If `False`, the file is read without assuming a header.
Returns
-------
pd.DataFrame
Dataframe containing parsed contents of the taxonomy file. The
dataframe will have its index name set to `Feature ID` and
its first
column will be `Taxon`, followed by any other columns in
the input
file.
"""
# Using `dtype=object` and `set_index()` to avoid type
casting/inference of
# any columns or the index.
df = pd.read_csv(filepath, sep='\t', skip_blank_lines=True,
header=None, dtype=object)
if len(df.columns) < 2:
raise ValueError(
"Taxonomy format requires at least two columns, found %d."
% len(df.columns))
if has_header and not _has_expected_header(df):
raise ValueError(
"Taxonomy format requires a header with `Feature ID`
and `Taxon` "
"as the first two columns.")
if has_header or (has_header is None and _has_expected_header(df)):
# Make first row the header:
# https://stackoverflow.com/a/26147330/3776794
df.columns = df.iloc[0]
df.columns.name = None
df = df.reindex(df.index.drop(0))
else:
# No header
unnamed_columns = ['Unnamed Column %d' % (i + 1)
for i in range(len(df.columns[2:]))]
df.columns = TSVTaxonomyFormat.HEADER + unnamed_columns
df.set_index(df.columns[0], drop=True, append=False, inplace=True)
if len(df.index) < 1:
raise ValueError("Taxonomy format requires at least one row
of data.")
if df.index.has_duplicates:
raise ValueError(
"Taxonomy format feature IDs must be unique. The
following IDs "
"are duplicated: %s" % ',
'.join(df.index.get_duplicates()))
if df.columns.has_duplicates:
raise ValueError(
"Taxonomy format column names must be unique. The
following "
"column names are duplicated: %s" %
> ', '.join(df.columns.get_duplicates()))
E AttributeError: 'Index' object has no attribute 'get_duplicates'
q2_types/feature_data/_transformer.py:89: AttributeError
______________ TestTaxonomyFormatsToDataFrame.test_duplicate_ids
_______________
self =
<q2_types.feature_data.tests.test_transformer.TestTaxonomyFormatsToDataFrame
testMethod=test_duplicate_ids>
def test_duplicate_ids(self):
with self.assertRaisesRegex(ValueError, 'duplicated: SEQUENCE1'):
_taxonomy_formats_to_dataframe(
self.get_data_path(os.path.join(
> 'taxonomy', 'duplicate-ids.tsv')))
q2_types/feature_data/tests/test_transformer.py:349:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _
def _taxonomy_formats_to_dataframe(filepath, has_header=None):
"""Read any of the three taxonomy formats into a dataframe.
Parameters
----------
filepath : str
The taxonomy-formatted file to be read.
has_header : bool, optional
If `None`, autodetect the header: only `Feature
ID<tab>Taxon` is
recognized, optionally followed by other columns. If
`True`, the file
must have the expected header described above otherwise an
error is
raised. If `False`, the file is read without assuming a header.
Returns
-------
pd.DataFrame
Dataframe containing parsed contents of the taxonomy file. The
dataframe will have its index name set to `Feature ID` and
its first
column will be `Taxon`, followed by any other columns in
the input
file.
"""
# Using `dtype=object` and `set_index()` to avoid type
casting/inference of
# any columns or the index.
df = pd.read_csv(filepath, sep='\t', skip_blank_lines=True,
header=None, dtype=object)
if len(df.columns) < 2:
raise ValueError(
"Taxonomy format requires at least two columns, found %d."
% len(df.columns))
if has_header and not _has_expected_header(df):
raise ValueError(
"Taxonomy format requires a header with `Feature ID`
and `Taxon` "
"as the first two columns.")
if has_header or (has_header is None and _has_expected_header(df)):
# Make first row the header:
# https://stackoverflow.com/a/26147330/3776794
df.columns = df.iloc[0]
df.columns.name = None
df = df.reindex(df.index.drop(0))
else:
# No header
unnamed_columns = ['Unnamed Column %d' % (i + 1)
for i in range(len(df.columns[2:]))]
df.columns = TSVTaxonomyFormat.HEADER + unnamed_columns
df.set_index(df.columns[0], drop=True, append=False, inplace=True)
if len(df.index) < 1:
raise ValueError("Taxonomy format requires at least one row
of data.")
if df.index.has_duplicates:
raise ValueError(
"Taxonomy format feature IDs must be unique. The
following IDs "
> "are duplicated: %s" % ',
'.join(df.index.get_duplicates()))
E AttributeError: 'Index' object has no attribute 'get_duplicates'
q2_types/feature_data/_transformer.py:83: AttributeError
__________________ TestTaxonomyFormatsToDataFrame.test_jagged
__________________
self =
<q2_types.feature_data.tests.test_transformer.TestTaxonomyFormatsToDataFrame
testMethod=test_jagged>
def test_jagged(self):
> with self.assertRaises(pandas.io.common.CParserError):
E AttributeError: module 'pandas.io.common' has no attribute
'CParserError'
q2_types/feature_data/tests/test_transformer.py:341: AttributeError
___________ TestDataFrameToTSVTaxonomyFormat.test_duplicate_columns
____________
self =
<q2_types.feature_data.tests.test_transformer.TestDataFrameToTSVTaxonomyFormat
testMethod=test_duplicate_columns>
def test_duplicate_columns(self):
index = pd.Index(['seq1', 'seq2'], name='Feature ID', dtype=object)
columns = ['Taxon', 'Taxon']
df = pd.DataFrame([['abc', 'def'], ['ghi', 'jkl']], index=index,
columns=columns, dtype=object)
with self.assertRaisesRegex(ValueError, "duplicated: Taxon"):
> _dataframe_to_tsv_taxonomy_format(df)
q2_types/feature_data/tests/test_transformer.py:497:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _
def _dataframe_to_tsv_taxonomy_format(df):
if len(df.index) < 1:
raise ValueError("Taxonomy format requires at least one row
of data.")
if len(df.columns) < 1:
raise ValueError(
"Taxonomy format requires at least one column of data.")
if df.index.name != 'Feature ID':
raise ValueError(
"Taxonomy format requires the dataframe index name to be "
"`Feature ID`, found %r" % df.index.name)
if df.columns[0] != 'Taxon':
raise ValueError(
"Taxonomy format requires the first column name to be
`Taxon`, "
"found %r" % df.columns[0])
if df.index.has_duplicates:
raise ValueError(
"Taxonomy format feature IDs must be unique. The
following IDs "
"are duplicated: %s" % ',
'.join(df.index.get_duplicates()))
if df.columns.has_duplicates:
raise ValueError(
"Taxonomy format column names must be unique. The
following "
"column names are duplicated: %s" %
> ', '.join(df.columns.get_duplicates()))
E AttributeError: 'Index' object has no attribute 'get_duplicates'
q2_types/feature_data/_transformer.py:126: AttributeError
_____________ TestDataFrameToTSVTaxonomyFormat.test_duplicate_ids
______________
self =
<q2_types.feature_data.tests.test_transformer.TestDataFrameToTSVTaxonomyFormat
testMethod=test_duplicate_ids>
def test_duplicate_ids(self):
index = pd.Index(['seq1', 'seq2', 'seq1'], name='Feature ID',
dtype=object)
columns = ['Taxon']
df = pd.DataFrame([['abc'], ['def'], ['ghi']], index=index,
columns=columns, dtype=object)
with self.assertRaisesRegex(ValueError, "duplicated: seq1"):
> _dataframe_to_tsv_taxonomy_format(df)
q2_types/feature_data/tests/test_transformer.py:488:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _
def _dataframe_to_tsv_taxonomy_format(df):
if len(df.index) < 1:
raise ValueError("Taxonomy format requires at least one row
of data.")
if len(df.columns) < 1:
raise ValueError(
"Taxonomy format requires at least one column of data.")
if df.index.name != 'Feature ID':
raise ValueError(
"Taxonomy format requires the dataframe index name to be "
"`Feature ID`, found %r" % df.index.name)
if df.columns[0] != 'Taxon':
raise ValueError(
"Taxonomy format requires the first column name to be
`Taxon`, "
"found %r" % df.columns[0])
if df.index.has_duplicates:
raise ValueError(
"Taxonomy format feature IDs must be unique. The
following IDs "
> "are duplicated: %s" % ',
'.join(df.index.get_duplicates()))
E AttributeError: 'Index' object has no attribute 'get_duplicates'
q2_types/feature_data/_transformer.py:120: AttributeError
More information about the Debian-med-packaging
mailing list