[Debian-med-packaging] Bug#950932: q2-types: FTBFS with pandas 1.0: test failures

Sat Feb 8 14:18:11 GMT 2020

Source: q2-types
Version: 2019.10.0-1
Control: block 950430 by -1

With pandas 1.0 from experimental:

=================================== FAILURES 
===================================
____________ TestTaxonomyFormatsToDataFrame.test_duplicate_columns 
_____________

self = 
<q2_types.feature_data.tests.test_transformer.TestTaxonomyFormatsToDataFrame 
testMethod=test_duplicate_columns>

     def test_duplicate_columns(self):
         with self.assertRaisesRegex(ValueError, 'duplicated: Column1'):
             _taxonomy_formats_to_dataframe(
                 self.get_data_path(os.path.join(
 >                   'taxonomy', 'duplicate-columns.tsv')))

q2_types/feature_data/tests/test_transformer.py:355:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
_ _ _ _

     def _taxonomy_formats_to_dataframe(filepath, has_header=None):
         """Read any of the three taxonomy formats into a dataframe.

         Parameters
         ----------
         filepath : str
             The taxonomy-formatted file to be read.
         has_header : bool, optional
             If `None`, autodetect the header: only `Feature 
ID<tab>Taxon` is
             recognized, optionally followed by other columns. If 
`True`, the file
             must have the expected header described above otherwise an 
error is
             raised. If `False`, the file is read without assuming a header.

         Returns
         -------
         pd.DataFrame
             Dataframe containing parsed contents of the taxonomy file. The
             dataframe will have its index name set to `Feature ID` and 
its first
             column will be `Taxon`, followed by any other columns in 
the input
             file.

         """
         # Using `dtype=object` and `set_index()` to avoid type 
casting/inference of
         # any columns or the index.
         df = pd.read_csv(filepath, sep='\t', skip_blank_lines=True,
                          header=None, dtype=object)

         if len(df.columns) < 2:
             raise ValueError(
                 "Taxonomy format requires at least two columns, found %d."
                 % len(df.columns))

         if has_header and not _has_expected_header(df):
             raise ValueError(
                 "Taxonomy format requires a header with `Feature ID` 
and `Taxon` "
                 "as the first two columns.")

         if has_header or (has_header is None and _has_expected_header(df)):
             # Make first row the header:
             #     https://stackoverflow.com/a/26147330/3776794
             df.columns = df.iloc[0]
             df.columns.name = None
             df = df.reindex(df.index.drop(0))
         else:
             # No header
             unnamed_columns = ['Unnamed Column %d' % (i + 1)
                                for i in range(len(df.columns[2:]))]
             df.columns = TSVTaxonomyFormat.HEADER + unnamed_columns

         df.set_index(df.columns[0], drop=True, append=False, inplace=True)

         if len(df.index) < 1:
             raise ValueError("Taxonomy format requires at least one row 
of data.")

         if df.index.has_duplicates:
             raise ValueError(
                 "Taxonomy format feature IDs must be unique. The 
following IDs "
                 "are duplicated: %s" % ', 
'.join(df.index.get_duplicates()))

         if df.columns.has_duplicates:
             raise ValueError(
                 "Taxonomy format column names must be unique. The 
following "
                 "column names are duplicated: %s" %
 >               ', '.join(df.columns.get_duplicates()))
E           AttributeError: 'Index' object has no attribute 'get_duplicates'

q2_types/feature_data/_transformer.py:89: AttributeError
______________ TestTaxonomyFormatsToDataFrame.test_duplicate_ids 
_______________

self = 
<q2_types.feature_data.tests.test_transformer.TestTaxonomyFormatsToDataFrame 
testMethod=test_duplicate_ids>

     def test_duplicate_ids(self):
         with self.assertRaisesRegex(ValueError, 'duplicated: SEQUENCE1'):
             _taxonomy_formats_to_dataframe(
                 self.get_data_path(os.path.join(
 >                   'taxonomy', 'duplicate-ids.tsv')))

q2_types/feature_data/tests/test_transformer.py:349:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
_ _ _ _

     def _taxonomy_formats_to_dataframe(filepath, has_header=None):
         """Read any of the three taxonomy formats into a dataframe.

         Parameters
         ----------
         filepath : str
             The taxonomy-formatted file to be read.
         has_header : bool, optional
             If `None`, autodetect the header: only `Feature 
ID<tab>Taxon` is
             recognized, optionally followed by other columns. If 
`True`, the file
             must have the expected header described above otherwise an 
error is
             raised. If `False`, the file is read without assuming a header.

         Returns
         -------
         pd.DataFrame
             Dataframe containing parsed contents of the taxonomy file. The
             dataframe will have its index name set to `Feature ID` and 
its first
             column will be `Taxon`, followed by any other columns in 
the input
             file.

         """
         # Using `dtype=object` and `set_index()` to avoid type 
casting/inference of
         # any columns or the index.
         df = pd.read_csv(filepath, sep='\t', skip_blank_lines=True,
                          header=None, dtype=object)

         if len(df.columns) < 2:
             raise ValueError(
                 "Taxonomy format requires at least two columns, found %d."
                 % len(df.columns))

         if has_header and not _has_expected_header(df):
             raise ValueError(
                 "Taxonomy format requires a header with `Feature ID` 
and `Taxon` "
                 "as the first two columns.")

         if has_header or (has_header is None and _has_expected_header(df)):
             # Make first row the header:
             #     https://stackoverflow.com/a/26147330/3776794
             df.columns = df.iloc[0]
             df.columns.name = None
             df = df.reindex(df.index.drop(0))
         else:
             # No header
             unnamed_columns = ['Unnamed Column %d' % (i + 1)
                                for i in range(len(df.columns[2:]))]
             df.columns = TSVTaxonomyFormat.HEADER + unnamed_columns

         df.set_index(df.columns[0], drop=True, append=False, inplace=True)

         if len(df.index) < 1:
             raise ValueError("Taxonomy format requires at least one row 
of data.")

         if df.index.has_duplicates:
             raise ValueError(
                 "Taxonomy format feature IDs must be unique. The 
following IDs "
 >               "are duplicated: %s" % ', 
'.join(df.index.get_duplicates()))
E           AttributeError: 'Index' object has no attribute 'get_duplicates'

q2_types/feature_data/_transformer.py:83: AttributeError
__________________ TestTaxonomyFormatsToDataFrame.test_jagged 
__________________

self = 
<q2_types.feature_data.tests.test_transformer.TestTaxonomyFormatsToDataFrame 
testMethod=test_jagged>

     def test_jagged(self):
 >       with self.assertRaises(pandas.io.common.CParserError):
E       AttributeError: module 'pandas.io.common' has no attribute 
'CParserError'

q2_types/feature_data/tests/test_transformer.py:341: AttributeError
___________ TestDataFrameToTSVTaxonomyFormat.test_duplicate_columns 
____________

self = 
<q2_types.feature_data.tests.test_transformer.TestDataFrameToTSVTaxonomyFormat 
testMethod=test_duplicate_columns>

     def test_duplicate_columns(self):
         index = pd.Index(['seq1', 'seq2'], name='Feature ID', dtype=object)
         columns = ['Taxon', 'Taxon']
         df = pd.DataFrame([['abc', 'def'], ['ghi', 'jkl']], index=index,
                           columns=columns, dtype=object)

         with self.assertRaisesRegex(ValueError, "duplicated: Taxon"):
 >           _dataframe_to_tsv_taxonomy_format(df)

q2_types/feature_data/tests/test_transformer.py:497:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
_ _ _ _

     def _dataframe_to_tsv_taxonomy_format(df):
         if len(df.index) < 1:
             raise ValueError("Taxonomy format requires at least one row 
of data.")

         if len(df.columns) < 1:
             raise ValueError(
                 "Taxonomy format requires at least one column of data.")

         if df.index.name != 'Feature ID':
             raise ValueError(
                 "Taxonomy format requires the dataframe index name to be "
                 "`Feature ID`, found %r" % df.index.name)

         if df.columns[0] != 'Taxon':
             raise ValueError(
                 "Taxonomy format requires the first column name to be 
`Taxon`, "
                 "found %r" % df.columns[0])

         if df.index.has_duplicates:
             raise ValueError(
                 "Taxonomy format feature IDs must be unique. The 
following IDs "
                 "are duplicated: %s" % ', 
'.join(df.index.get_duplicates()))

         if df.columns.has_duplicates:
             raise ValueError(
                 "Taxonomy format column names must be unique. The 
following "
                 "column names are duplicated: %s" %
 >               ', '.join(df.columns.get_duplicates()))
E           AttributeError: 'Index' object has no attribute 'get_duplicates'

q2_types/feature_data/_transformer.py:126: AttributeError
_____________ TestDataFrameToTSVTaxonomyFormat.test_duplicate_ids 
______________

self = 
<q2_types.feature_data.tests.test_transformer.TestDataFrameToTSVTaxonomyFormat 
testMethod=test_duplicate_ids>

     def test_duplicate_ids(self):
         index = pd.Index(['seq1', 'seq2', 'seq1'], name='Feature ID',
                          dtype=object)
         columns = ['Taxon']
         df = pd.DataFrame([['abc'], ['def'], ['ghi']], index=index,
                           columns=columns, dtype=object)

         with self.assertRaisesRegex(ValueError, "duplicated: seq1"):
 >           _dataframe_to_tsv_taxonomy_format(df)

q2_types/feature_data/tests/test_transformer.py:488:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
_ _ _ _

     def _dataframe_to_tsv_taxonomy_format(df):
         if len(df.index) < 1:
             raise ValueError("Taxonomy format requires at least one row 
of data.")

         if len(df.columns) < 1:
             raise ValueError(
                 "Taxonomy format requires at least one column of data.")

         if df.index.name != 'Feature ID':
             raise ValueError(
                 "Taxonomy format requires the dataframe index name to be "
                 "`Feature ID`, found %r" % df.index.name)

         if df.columns[0] != 'Taxon':
             raise ValueError(
                 "Taxonomy format requires the first column name to be 
`Taxon`, "
                 "found %r" % df.columns[0])

         if df.index.has_duplicates:
             raise ValueError(
                 "Taxonomy format feature IDs must be unique. The 
following IDs "
 >               "are duplicated: %s" % ', 
'.join(df.index.get_duplicates()))
E           AttributeError: 'Index' object has no attribute 'get_duplicates'

q2_types/feature_data/_transformer.py:120: AttributeError