[med-svn] [Git][med-team/q2-feature-table][upstream] New upstream version 2024.2.0+dfsg

Andreas Tille (@tille) gitlab at salsa.debian.org
Sun Feb 18 13:34:53 GMT 2024



Andreas Tille pushed to branch upstream at Debian Med / q2-feature-table


Commits:
66136d9a by Andreas Tille at 2024-02-18T13:01:50+01:00
New upstream version 2024.2.0+dfsg
- - - - -


18 changed files:

- .github/workflows/ci-dev.yaml
- README.md
- q2_feature_table/__init__.py
- q2_feature_table/_examples.py
- q2_feature_table/_filter.py
- q2_feature_table/_subsample.py → q2_feature_table/_subsample_ids.py
- q2_feature_table/_summarize/__init__.py
- q2_feature_table/_summarize/_visualizer.py
- q2_feature_table/_summarize/summarize_assets/index.html
- q2_feature_table/_summarize/tabulate_seqs_assets/index.html
- q2_feature_table/_version.py
- q2_feature_table/plugin_setup.py
- q2_feature_table/tests/filter/test_conditional_filter.py
- q2_feature_table/tests/filter/test_filter_features.py
- q2_feature_table/tests/filter/test_filter_samples.py
- q2_feature_table/tests/filter/test_filter_sequences.py
- q2_feature_table/tests/test_subsample.py
- q2_feature_table/tests/test_summarize.py


Changes:

=====================================
.github/workflows/ci-dev.yaml
=====================================
@@ -9,4 +9,4 @@ jobs:
   ci:
     uses: qiime2/distributions/.github/workflows/lib-ci-dev.yaml at dev
     with:
-      distro: core
\ No newline at end of file
+      distro: amplicon


=====================================
README.md
=====================================
@@ -1,5 +1,5 @@
 # q2-feature-table
 
-![](https://github.com/qiime2/q2-feature-table/workflows/ci/badge.svg)
+![](https://github.com/qiime2/q2-feature-table/workflows/ci-dev/badge.svg)
 
 This is a QIIME 2 plugin. For details on QIIME 2, see https://qiime2.org.
\ No newline at end of file


=====================================
q2_feature_table/__init__.py
=====================================
@@ -7,9 +7,10 @@
 # ----------------------------------------------------------------------------
 
 from ._normalize import rarefy
-from ._subsample import subsample
+from ._subsample_ids import subsample_ids
 from ._transform import (presence_absence, relative_frequency, transpose)
-from ._summarize import (summarize, tabulate_seqs)
+from ._summarize import (summarize, tabulate_seqs, tabulate_sample_frequencies,
+                         tabulate_feature_frequencies, summarize_plus)
 from ._merge import (merge, merge_seqs, merge_taxa, overlap_methods)
 from ._filter import (filter_samples, filter_features, filter_seqs,
                       filter_features_conditionally)
@@ -27,5 +28,7 @@ __all__ = ['rarefy', 'presence_absence', 'relative_frequency', 'transpose',
            'summarize', 'merge', 'merge_seqs', 'filter_samples',
            'filter_features', 'merge_taxa', 'tabulate_seqs', 'overlap_methods',
            'core_features', 'group', 'heatmap', 'heatmap_choices',
-           'filter_seqs', 'subsample', 'rename_ids',
-           'filter_features_conditionally', 'split']
+           'filter_seqs', 'subsample_ids', 'rename_ids',
+           'filter_features_conditionally', 'split',
+           'tabulate_feature_frequencies', 'tabulate_sample_frequencies',
+           'summarize_plus']


=====================================
q2_feature_table/_examples.py
=====================================
@@ -388,3 +388,49 @@ def feature_table_tabulate_seqs_multi_taxon(use):
     )
 
     viz.assert_output_type('Visualization')
+
+
+def feature_table_tabulate_sample_freqs(use):
+    feature_table = use.init_artifact_from_url(
+        'feature_table', moving_pics_ft_url
+    )
+
+    sample_frequencies, = use.action(
+        use.UsageAction('feature_table', 'tabulate_sample_frequencies'),
+        use.UsageInputs(table=feature_table),
+        use.UsageOutputNames(sample_frequencies='sample_frequencies')
+    )
+
+    sample_frequencies.assert_output_type('ImmutableMetadata')
+
+
+def feature_table_tabulate_feature_freqs(use):
+    feature_table = use.init_artifact_from_url(
+        'feature_table', moving_pics_ft_url
+    )
+
+    feature_frequencies, = use.action(
+        use.UsageAction('feature_table', 'tabulate_feature_frequencies'),
+        use.UsageInputs(table=feature_table),
+        use.UsageOutputNames(feature_frequencies='feature_frequencies')
+    )
+
+    feature_frequencies.assert_output_type('ImmutableMetadata')
+
+
+def feature_table_summarize_plus(use):
+    feature_table = use.init_artifact_from_url(
+        'feature_table', moving_pics_ft_url
+    )
+
+    feature_freqs, sample_freqs, viz, = use.action(
+        use.UsageAction('feature_table', 'summarize_plus'),
+        use.UsageInputs(table=feature_table),
+        use.UsageOutputNames(feature_frequencies='feature_frequencies',
+                             sample_frequencies='sample_frequencies',
+                             summary='visual summary')
+    )
+
+    feature_freqs.assert_output_type('ImmutableMetadata')
+    sample_freqs.assert_output_type('ImmutableMetadata')
+    viz.assert_output_type('Visualization')


=====================================
q2_feature_table/_filter.py
=====================================
@@ -12,6 +12,14 @@ import numpy as np
 import pandas as pd
 
 
+def _validate_nonempty_table(table):
+    if table.is_empty():
+        raise ValueError("The resulting table is empty. This can happen if "
+                         "you filter all samples or features out of the "
+                         "table. Please check your filtering parameters and "
+                         "try again.")
+
+
 def _get_biom_filter_function(ids_to_keep, min_frequency, max_frequency,
                               min_nonzero, max_nonzero):
     ids_to_keep = set(ids_to_keep)
@@ -32,7 +40,8 @@ _other_axis_map = {'sample': 'observation', 'observation': 'sample'}
 
 def _filter_table(table, min_frequency, max_frequency, min_nonzero,
                   max_nonzero, metadata, where, axis, exclude_ids=False,
-                  filter_opposite_axis=True):
+                  filter_opposite_axis=True,
+                  allow_empty_table=False):
     if min_frequency == 0 and max_frequency is None and min_nonzero == 0 and\
        max_nonzero is None and metadata is None and where is None and\
        exclude_ids is False:
@@ -62,20 +71,25 @@ def _filter_table(table, min_frequency, max_frequency, min_nonzero,
             max_frequency=None, min_nonzero=1, max_nonzero=None)
         table.filter(filter_fn2, axis=_other_axis_map[axis], inplace=True)
 
+    if not allow_empty_table:
+        _validate_nonempty_table(table)
+
 
 def filter_samples(table: biom.Table, min_frequency: int = 0,
                    max_frequency: int = None, min_features: int = 0,
                    max_features: int = None,
                    metadata: qiime2.Metadata = None, where: str = None,
                    exclude_ids: bool = False,
-                   filter_empty_features: bool = True)\
+                   filter_empty_features: bool = True,
+                   allow_empty_table: bool = False)\
                   -> biom.Table:
     _filter_table(table=table, min_frequency=min_frequency,
                   max_frequency=max_frequency, min_nonzero=min_features,
                   max_nonzero=max_features, metadata=metadata,
                   where=where, axis='sample', exclude_ids=exclude_ids,
-                  filter_opposite_axis=filter_empty_features)
-
+                  filter_opposite_axis=filter_empty_features,
+                  allow_empty_table=allow_empty_table
+                  )
     return table
 
 
@@ -84,20 +98,23 @@ def filter_features(table: biom.Table, min_frequency: int = 0,
                     max_samples: int = None,
                     metadata: qiime2.Metadata = None, where: str = None,
                     exclude_ids: bool = False,
-                    filter_empty_samples: bool = True)\
+                    filter_empty_samples: bool = True,
+                    allow_empty_table: bool = False)\
                    -> biom.Table:
     _filter_table(table=table, min_frequency=min_frequency,
                   max_frequency=max_frequency, min_nonzero=min_samples,
                   max_nonzero=max_samples, metadata=metadata,
                   where=where, axis='observation', exclude_ids=exclude_ids,
-                  filter_opposite_axis=filter_empty_samples)
+                  filter_opposite_axis=filter_empty_samples,
+                  allow_empty_table=allow_empty_table)
 
     return table
 
 
 def filter_seqs(data: pd.Series, table: biom.Table = None,
                 metadata: qiime2.Metadata = None, where: str = None,
-                exclude_ids: bool = False) -> pd.Series:
+                exclude_ids: bool = False,
+                ) -> pd.Series:
     if table is not None and metadata is not None:
         raise ValueError('Filtering with metadata and filtering with a table '
                          'are mutually exclusive.')
@@ -116,12 +133,14 @@ def filter_seqs(data: pd.Series, table: biom.Table = None,
     filtered = data[data.index.isin(ids_to_keep)]
     if filtered.empty is True:
         raise ValueError('All features were filtered out of the data.')
+
     return filtered
 
 
 def filter_features_conditionally(table: biom.Table,
                                   abundance: float,
                                   prevalence: float,
+                                  allow_empty_table: bool = False
                                   ) -> biom.Table:
     """
     A function to perform joint filtering because it makes life better
@@ -143,4 +162,7 @@ def filter_features_conditionally(table: biom.Table,
 
     new_table = table.filter(filter_ids, axis='observation', inplace=False)
 
+    if not allow_empty_table:
+        _validate_nonempty_table(new_table)
+
     return new_table


=====================================
q2_feature_table/_subsample.py → q2_feature_table/_subsample_ids.py
=====================================
@@ -9,8 +9,8 @@
 import biom
 
 
-def subsample(table: biom.Table, subsampling_depth: int,
-              axis: str) -> biom.Table:
+def subsample_ids(table: biom.Table, subsampling_depth: int,
+                  axis: str) -> biom.Table:
     if axis == 'feature':
         # we are transposing the table due to biocore/biom-format#759
         table = table.transpose()


=====================================
q2_feature_table/_summarize/__init__.py
=====================================
@@ -6,6 +6,12 @@
 # The full license is in the file LICENSE, distributed with this software.
 # ----------------------------------------------------------------------------
 
-from ._visualizer import (summarize, tabulate_seqs)
+from ._visualizer import (summarize, tabulate_seqs,
+                          tabulate_feature_frequencies,
+                          tabulate_sample_frequencies,
+                          summarize_plus)
 
-__all__ = ['summarize', 'tabulate_seqs']
+__all__ = ['summarize', 'tabulate_seqs',
+           'tabulate_feature_frequencies',
+           'tabulate_sample_frequencies',
+           'summarize_plus']


=====================================
q2_feature_table/_summarize/_visualizer.py
=====================================
@@ -59,7 +59,7 @@ def tabulate_seqs(output_dir: str, data: DNAIterator,
                 metadata_df.index)
         elif merge_method == 'strict':
             if set(metadata_df.index) != display_sequences:
-                raise Exception('Merge method is strict and IDs do not match')
+                raise ValueError('Merge method is strict and IDs do not match')
     if taxonomy is not None:
         for member in taxonomy.values():
             if merge_method == 'union':
@@ -69,8 +69,8 @@ def tabulate_seqs(output_dir: str, data: DNAIterator,
                     member.index)
             elif merge_method == 'strict':
                 if set(member.index) != display_sequences:
-                    raise Exception('Merge method is strict and IDs do not \
-                        match')
+                    raise ValueError(
+                                'Merge method is strict and IDs do not match')
 
     seq_len_stats = _compute_descriptive_stats(seq_lengths)
     _write_tsvs_of_descriptive_stats(seq_len_stats, output_dir)
@@ -99,6 +99,7 @@ def summarize(output_dir: str, table: biom.Table,
 
     sample_summary, sample_frequencies = _frequency_summary(
         table, axis='sample')
+
     if number_of_samples > 1:
 
         # Calculate the bin count, with a minimum of 5 bins
@@ -166,13 +167,11 @@ def summarize(output_dir: str, table: biom.Table,
 
     feature_qualitative_data = _compute_qualitative_summary(table)
     sample_frequencies.sort_values(inplace=True, ascending=False)
+
+    sample_frequencies_json = pd.Series(["{:,}".format(int(x)) for x in
+                                         sample_frequencies])
+
     feature_frequencies.sort_values(inplace=True, ascending=False)
-    sample_frequencies.to_csv(
-        os.path.join(output_dir, 'sample-frequency-detail.csv'),
-        header=False)
-    feature_frequencies.to_csv(
-        os.path.join(output_dir, 'feature-frequency-detail.csv'),
-        header=False)
 
     feature_frequencies = feature_frequencies.astype(int) \
         .apply('{:,}'.format).to_frame('Frequency')
@@ -196,7 +195,7 @@ def summarize(output_dir: str, table: biom.Table,
 
     # Create a JSON object containing the Sample Frequencies to build the
     # table in sample-frequency-detail.html
-    sample_frequencies_json = sample_frequencies.to_json()
+    sample_frequencies_json = sample_frequencies_json.to_json()
 
     templates = [index, sample_frequency_template, feature_frequency_template]
     context.update({'frequencies_list':
@@ -217,6 +216,55 @@ def summarize(output_dir: str, table: biom.Table,
     plt.close('all')
 
 
+def tabulate_feature_frequencies(table: biom.Table) -> qiime2.Metadata:
+    feature_frequencies = _frequencies(table, 'observation')
+    feature_frequencies = feature_frequencies.apply(
+        '{:,}'.format).to_frame('Frequency')
+    feature_qualitative_data = _compute_qualitative_summary(table)
+    samples_observed_in =\
+        pd.Series(feature_qualitative_data).astype(int).apply('{:,}'.format)
+    feature_frequencies["No. of Samples Observed In"] = samples_observed_in
+    feature_frequencies.index.name = "Feature ID"
+    return qiime2.Metadata(feature_frequencies)
+
+
+def tabulate_sample_frequencies(table: biom.Table) -> qiime2.Metadata:
+    sample_frequencies = _frequencies(table, 'sample')
+    sample_frequencies = sample_frequencies.apply(
+        '{:,}'.format).to_frame('Frequency')
+    sample_qualitative_data = _compute_qualitative_summary_sample(table)
+    samples_with_feature =\
+        pd.Series(sample_qualitative_data).astype(int).apply('{:,}'.format)
+    sample_frequencies["No. of Associated Features"] = samples_with_feature
+    sample_frequencies.index.name = "Sample ID"
+    return qiime2.Metadata(sample_frequencies)
+
+
+def summarize_plus(ctx, table, metadata=None):
+
+    try:
+        table_dimensions = table.view(pd.DataFrame).shape
+
+    except ValueError:
+        raise ValueError('Cannot summarize a table with no features')
+
+    if table_dimensions[0] == 0:
+        raise ValueError('Cannot summarize a table with no samples')
+
+    _feature_frequencies = ctx.get_action('feature_table',
+                                          'tabulate_feature_frequencies')
+    _sample_frequencies = ctx.get_action('feature_table',
+                                         'tabulate_sample_frequencies')
+    _visualizer = ctx.get_action('feature_table',
+                                 'summarize')
+
+    feature_frequencies, = _feature_frequencies(table)
+    sample_frequencies, = _sample_frequencies(table)
+    summary, = _visualizer(table, metadata)
+
+    return feature_frequencies, sample_frequencies, summary
+
+
 def _compute_descriptive_stats(lst: list):
     """Basic descriptive statistics and a (parametric) seven-number summary.
 
@@ -302,16 +350,32 @@ def _compute_qualitative_summary(table):
     return sample_count
 
 
+def _compute_qualitative_summary_sample(table):
+    feature_count = {}
+    for count_vector, sample_id, _ in table.iter():
+        feature_count[sample_id] = (count_vector != 0).sum()
+    return feature_count
+
+
 def _frequencies(table, axis):
     return pd.Series(data=table.sum(axis=axis), index=table.ids(axis=axis))
 
 
-def _frequency_summary(table, axis='sample'):
+def _frequency_summary(table: biom.Table, axis='sample'):
     frequencies = _frequencies(table, axis=axis)
 
-    summary = pd.Series([frequencies.min(), frequencies.quantile(0.25),
-                         frequencies.median(), frequencies.quantile(0.75),
-                         frequencies.max(), frequencies.mean()],
+    first = frequencies.quantile(0.25)
+    third = frequencies.quantile(0.75)
+    _fst = round(first, 1)
+    _min = round(frequencies.min(), 1)
+    _thd = round(third, 1)
+    _med = round(frequencies.median(), 1)
+    _max = round(frequencies.max(), 1)
+    mean = round(frequencies.mean(), 1)
+
+    summary = pd.Series([_min, _fst,
+                         _med, _thd,
+                         _max, mean],
                         index=['Minimum frequency', '1st quartile',
                                'Median frequency', '3rd quartile',
                                'Maximum frequency', 'Mean frequency'])


=====================================
q2_feature_table/_summarize/summarize_assets/index.html
=====================================
@@ -37,10 +37,6 @@
     <div class="row">
       <div class="col-lg-6">
         {{ sample_summary_table }}
-        <p class="text-left">
-          Frequency per sample detail (<a href="sample-frequency-detail.csv">csv</a> |
-          <a href="sample-frequency-detail.html">html</a>)
-        </p>
       </div>
       {% if number_of_samples > 1 %}
       <div class="col-lg-6">
@@ -59,10 +55,6 @@
       <div class="col-lg-6">
         <h1>Frequency per feature</h1>
         {{ feature_summary_table }}
-        <p class="text-left">
-          Frequency per feature detail (<a href="feature-frequency-detail.csv">csv</a> |
-          <a href="feature-frequency-detail.html">html</a>)
-        </p>
       </div>
       {% if number_of_features > 1 %}
       <div class="col-lg-6">


=====================================
q2_feature_table/_summarize/tabulate_seqs_assets/index.html
=====================================
@@ -108,7 +108,7 @@
           <tr>
             <td>{{ sequence }}</td>
             {% if sequence in data %}
-            <td><samp><a target="_blank" href="{{ sequence.url }}" rel="noopener noreferrer">{{ data[sequence].seq }}</a></samp></td>
+            <td><samp><a target="_blank" href="{{ data[sequence].url }}" rel="noopener noreferrer">{{ data[sequence].seq }}</a></samp></td>
             <td>{{ data[sequence].len }}</td>
             {% else %}
             <td>-</td>
@@ -116,7 +116,7 @@
             {% endif %}
             {% if taxonomy is defined %}
               {% for member in taxonomy.values() %}
-                {% if sequence in member.index %}            
+                {% if sequence in member.index %}
             <td>{{ member.loc[sequence, "Taxon"] }}</td>
                 {% else %}
             <td>-</td>


=====================================
q2_feature_table/_version.py
=====================================
@@ -23,9 +23,9 @@ def get_keywords():
     # setup.py/versioneer.py will grep for the variable names, so they must
     # each be defined on a line of their own. _version.py will just call
     # get_keywords().
-    git_refnames = " (tag: 2023.9.0, Release-2023.9)"
-    git_full = "7cac3100e1bdc58f3d4c0b3790a3aa3973de4696"
-    git_date = "2023-10-03 21:58:03 +0000"
+    git_refnames = " (tag: 2024.2.0, Release-2024.2)"
+    git_full = "8635d787a83854309dc6cb26afdc5dc5476db208"
+    git_date = "2024-02-16 21:57:35 +0000"
     keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
     return keywords
 


=====================================
q2_feature_table/plugin_setup.py
=====================================
@@ -8,12 +8,14 @@
 
 from qiime2.plugin import (Plugin, Int, Float, Range, Metadata, Str, Bool,
                            Choices, MetadataColumn, Categorical, List,
-                           Citations, TypeMatch, TypeMap, Collection)
+                           Citations, TypeMatch, TypeMap, Collection,
+                           Visualization)
 
 from q2_types.feature_table import (
     FeatureTable, Frequency, RelativeFrequency, PresenceAbsence, Composition)
 from q2_types.feature_data import (
     FeatureData, Sequence, Taxonomy, AlignedSequence)
+from q2_types.metadata import ImmutableMetadata
 
 import q2_feature_table
 import q2_feature_table._examples as ex
@@ -56,7 +58,7 @@ plugin.methods.register_function(
 )
 
 plugin.methods.register_function(
-    function=q2_feature_table.subsample,
+    function=q2_feature_table.subsample_ids,
     inputs={'table': FeatureTable[Frequency]},
     parameters={'subsampling_depth': Int % Range(1, None),
                 'axis': Str % Choices(['sample', 'feature'])},
@@ -311,7 +313,8 @@ plugin.methods.register_function(
                 'metadata': Metadata,
                 'where': Str,
                 'exclude_ids': Bool,
-                'filter_empty_features': Bool},
+                'filter_empty_features': Bool,
+                'allow_empty_table': Bool},
     outputs=[('filtered_table', FeatureTable[T1])],
     input_descriptions={
         'table': 'The feature table from which samples should be filtered.'
@@ -341,6 +344,9 @@ plugin.methods.register_function(
                        'table instead of being retained.',
         'filter_empty_features': 'If true, features which are not present in '
                                  'any retained samples are dropped.',
+        'allow_empty_table': 'If true, the filtered table may be empty. '
+                             'Default behavior is to raise an error if the '
+                             'filtered table is empty.'
     },
     output_descriptions={
         'filtered_table': 'The resulting feature table filtered by sample.'
@@ -366,8 +372,8 @@ plugin.methods.register_function(
     function=q2_feature_table.filter_features_conditionally,
     inputs={'table': FeatureTable[T1]},
     parameters={'prevalence': Float % Range(0, 1),
-                'abundance': Float % Range(0, 1)
-                },
+                'abundance': Float % Range(0, 1),
+                'allow_empty_table': Bool},
     outputs=[('filtered_table', FeatureTable[T1])],
     input_descriptions={
         'table': 'The feature table from which features should be filtered.'
@@ -377,7 +383,10 @@ plugin.methods.register_function(
                       'retained.'),
         'prevalence': ('The minimum portion of samples that a feature '
                        'must have a relative abundance of at least '
-                       '`abundance` to be retained.')
+                       '`abundance` to be retained.'),
+        'allow_empty_table': 'If true, the filtered table may be empty. '
+                             'Default behavior is to raise an error if the '
+                             'filtered table is empty.'
     },
     output_descriptions={
         'filtered_table': 'The resulting feature table filtered by feature.'
@@ -406,7 +415,8 @@ plugin.methods.register_function(
                 'metadata': Metadata,
                 'where': Str,
                 'exclude_ids': Bool,
-                'filter_empty_samples': Bool},
+                'filter_empty_samples': Bool,
+                'allow_empty_table': Bool},
     outputs=[('filtered_table', FeatureTable[Frequency])],
     input_descriptions={
         'table': 'The feature table from which features should be filtered.'
@@ -436,6 +446,9 @@ plugin.methods.register_function(
                        'table instead of being retained.',
         'filter_empty_samples': 'If true, drop any samples where none of the '
                                 'retained features are present.',
+        'allow_empty_table': 'If true, the filtered table may be empty. '
+                                'Default behavior is to raise an error if the '
+                                'filtered table is empty.'
     },
     output_descriptions={
         'filtered_table': 'The resulting feature table filtered by feature.'
@@ -498,8 +511,7 @@ plugin.methods.register_function(
 
 plugin.visualizers.register_function(
     function=q2_feature_table.summarize,
-    inputs={'table': FeatureTable[Frequency | RelativeFrequency |
-                                  PresenceAbsence]},
+    inputs={'table': FeatureTable[Frequency | PresenceAbsence]},
     parameters={'sample_metadata': Metadata},
     input_descriptions={'table': 'The feature table to be summarized.'},
     parameter_descriptions={'sample_metadata': 'The sample metadata.'},
@@ -652,3 +664,62 @@ plugin.methods.register_function(
                 'splits are defined by values in metadata column.',
     examples={}
 )
+
+plugin.methods.register_function(
+    function=q2_feature_table.tabulate_feature_frequencies,
+    inputs={'table': FeatureTable[Frequency | PresenceAbsence |
+                                  RelativeFrequency]},
+    parameters={},
+    outputs={'feature_frequencies': ImmutableMetadata},
+    input_descriptions={
+        'table': 'The input feature table.'
+    },
+    output_descriptions={
+        'feature_frequencies': 'Per-sample and total frequencies per feature.'
+    },
+    name='Tabulate feature frequencies',
+    description='Tabulate sample count and total frequency per feature.',
+    examples={'feature_table_tabulate_feature_frequencies':
+              ex.feature_table_tabulate_feature_freqs}
+)
+
+plugin.methods.register_function(
+    function=q2_feature_table.tabulate_sample_frequencies,
+    inputs={'table': FeatureTable[Frequency | PresenceAbsence |
+                                  RelativeFrequency]},
+    parameters={},
+    outputs={'sample_frequencies': ImmutableMetadata},
+    input_descriptions={
+        'table': 'The input feature table.'
+    },
+    output_descriptions={
+        'sample_frequencies': 'Observed feature count and total' +
+        ' frequencies per sample.'
+    },
+    name='Tabulate sample frequencies',
+    description='Tabulate feature count and total frequency per sample.',
+    examples={'feature_table_tabulate_sample_frequencies':
+              ex.feature_table_tabulate_sample_freqs}
+)
+
+plugin.pipelines.register_function(
+    function=q2_feature_table.summarize_plus,
+    inputs={'table': FeatureTable[Frequency | PresenceAbsence]},
+    parameters={'metadata': Metadata},
+    outputs={'feature_frequencies': ImmutableMetadata,
+             'sample_frequencies': ImmutableMetadata,
+             'summary': Visualization},
+    input_descriptions={
+        'table': 'The feature table to be summarized.'
+    },
+    parameter_descriptions={'metadata': 'The sample metadata.'},
+    output_descriptions={'feature_frequencies': 'Per-sample and total ' +
+                         'frequencies per feature.',
+                         'sample_frequencies': 'Observed feature count and ' +
+                         'total frequencies per sample.',
+                         'summary': 'Visual summary of feature table'},
+    name="Summarize table plus",
+    description="Generate visual and tabular summaries of a feature table. "
+                "Tabulate sample and feature frequencies.",
+    examples={'feature_table_summarize_plus': ex.feature_table_summarize_plus}
+)


=====================================
q2_feature_table/tests/filter/test_conditional_filter.py
=====================================
@@ -40,6 +40,36 @@ class TestConditional(unittest.TestCase):
         npt.assert_array_equal(known.ids(axis='observation'),
                                test_.ids(axis='observation'))
 
+    def test_allow_empty_table_true(self):
+        table = biom.Table(
+            data=np.array([[0,   0,  10,   0,   0],
+                           [250, 250, 140,  90, 150],
+                           [250,  25, 100, 200, 100],
+                           [0, 225, 250, 210, 250]]),
+            sample_ids=['A', 'B', 'C', 'D', 'E'],
+            observation_ids=['bat', 'cat', 'rat', 'a-tat-tat']
+            )
+        test_ = filter_features_conditionally(table,
+                                              prevalence=0.9,
+                                              abundance=0.9,
+                                              allow_empty_table=True)
+        self.assertTrue(test_.is_empty())
+
+    def test_allow_empty_table_false(self):
+        table = biom.Table(
+            data=np.array([[0,   0,  10,   0,   0],
+                           [250, 250, 140,  90, 150],
+                           [250,  25, 100, 200, 100],
+                           [0, 225, 250, 210, 250]]),
+            sample_ids=['A', 'B', 'C', 'D', 'E'],
+            observation_ids=['bat', 'cat', 'rat', 'a-tat-tat']
+            )
+        with self.assertRaisesRegex(ValueError, 'table is empty'):
+            filter_features_conditionally(table,
+                                          prevalence=0.9,
+                                          abundance=0.9,
+                                          allow_empty_table=False)
+
 
 if __name__ == "__main__":
     unittest.main()


=====================================
q2_feature_table/tests/filter/test_filter_features.py
=====================================
@@ -45,11 +45,16 @@ class FilterFeaturesTests(unittest.TestCase):
                          ['S1', 'S2', 'S3'])
         self.assertEqual(actual, expected)
 
-        # filter all
+        # filter all raising ValueError
         table = Table(np.array([[0, 1, 1], [1, 1, 2]]),
                       ['O1', 'O2'],
                       ['S1', 'S2', 'S3'])
-        actual = filter_features(table, min_frequency=5)
+        with self.assertRaisesRegex(ValueError, 'table is empty'):
+            filter_features(table, min_frequency=5)
+
+        # filter all and allow empty table
+        actual = filter_features(table, min_frequency=5,
+                                 allow_empty_table=True)
         self.assertTrue(actual.is_empty())
 
     def test_filter_empty_samples(self):
@@ -64,12 +69,17 @@ class FilterFeaturesTests(unittest.TestCase):
                          ['S1', 'S2', 'S3'])
         self.assertEqual(actual, expected)
 
-        # filter all
+        # filter all raising ValueError
         table = Table(np.array([[0, 1, 1], [1, 1, 2]]),
                       ['O1', 'O2'],
                       ['S1', 'S2', 'S3'])
+        with self.assertRaisesRegex(ValueError, 'table is empty'):
+            filter_features(table, min_frequency=5, filter_empty_samples=False)
+
+        # filter all and allow empty table
         actual = filter_features(table, min_frequency=5,
-                                 filter_empty_samples=False)
+                                 filter_empty_samples=False,
+                                 allow_empty_table=True)
         self.assertTrue(actual.is_empty())
 
     def test_feature_metadata(self):
@@ -99,13 +109,18 @@ class FilterFeaturesTests(unittest.TestCase):
                          ['S2', 'S3'])
         self.assertEqual(actual, expected)
 
-        # filter all
+        # filter all and raise ValueError
         df = pd.DataFrame({}, index=pd.Index(['foo'], name='id'))
         metadata = qiime2.Metadata(df)
         table = Table(np.array([[0, 1, 3], [1, 1, 2]]),
                       ['O1', 'O2'],
                       ['S1', 'S2', 'S3'])
-        actual = filter_features(table, metadata=metadata)
+        with self.assertRaisesRegex(ValueError, 'table is empty'):
+            filter_features(table, metadata=metadata)
+
+        # filter all and allow empty table
+        actual = filter_features(table, metadata=metadata,
+                                 allow_empty_table=True)
         self.assertTrue(actual.is_empty())
 
         # exclude one
@@ -122,15 +137,20 @@ class FilterFeaturesTests(unittest.TestCase):
                          ['S1', 'S2', 'S3'])
         self.assertEqual(actual, expected)
 
-        # exclude all
+        # exclude all and raise ValueError
         df = pd.DataFrame({'SequencedGenome': ['yes', 'yes']},
                           index=pd.Index(['O1', 'O2'], name='id'))
         metadata = qiime2.Metadata(df)
         table = Table(np.array([[0, 1, 3], [1, 1, 2]]),
                       ['O1', 'O2'],
                       ['S1', 'S2', 'S3'])
-        actual = filter_features(table, metadata=metadata,
-                                 exclude_ids=True)
+        with self.assertRaisesRegex(ValueError, 'table is empty'):
+            filter_features(table, metadata=metadata, exclude_ids=True,
+                            allow_empty_table=False)
+
+        # exclude all and allow empty table
+        actual = filter_features(table, metadata=metadata, exclude_ids=True,
+                                 allow_empty_table=True)
         self.assertTrue(actual.is_empty())
 
     def test_where(self):
@@ -162,7 +182,7 @@ class FilterFeaturesTests(unittest.TestCase):
                          ['S2', 'S3'])
         self.assertEqual(actual, expected)
 
-        # filter all
+        # filter all with ValueError
         df = pd.DataFrame({'SequencedGenome': ['yes', 'no']},
                           index=pd.Index(['O1', 'O2'], name='feature-id'))
         metadata = qiime2.Metadata(df)
@@ -170,7 +190,14 @@ class FilterFeaturesTests(unittest.TestCase):
                       ['O1', 'O2'],
                       ['S1', 'S2', 'S3'])
         where = "SequencedGenome='yes' AND SequencedGenome='no'"
-        actual = filter_features(table, metadata=metadata, where=where)
+
+        with self.assertRaisesRegex(ValueError, 'table is empty'):
+            filter_features(table, metadata=metadata, where=where,
+                            allow_empty_table=False)
+
+        # Filter all and allow empty table
+        actual = filter_features(table, metadata=metadata, where=where,
+                                 allow_empty_table=True)
         self.assertTrue(actual.is_empty())
 
         # filter one -> exclude one


=====================================
q2_feature_table/tests/filter/test_filter_samples.py
=====================================
@@ -64,11 +64,18 @@ class FilterSamplesTests(unittest.TestCase):
                          ['S3'])
         self.assertEqual(actual, expected)
 
-        # filter all
+        # filter all raising ValueError
         table = Table(np.array([[0, 1, 3], [1, 1, 2]]),
                       ['O1', 'O2'],
                       ['S1', 'S2', 'S3'])
-        actual = filter_samples(table, min_frequency=42)
+        with self.assertRaisesRegex(ValueError, 'table is empty'):
+            filter_samples(table, min_frequency=42)
+
+        # filter all and allow empty table
+        table = Table(np.array([[0, 1, 3], [1, 1, 2]]), ['O1-alt', 'O2-alt'],
+                      ['S1', 'S2', 'S3'])
+        actual = filter_samples(table, min_frequency=42,
+                                allow_empty_table=True)
         self.assertTrue(actual.is_empty())
 
     def test_max_frequency(self):
@@ -102,11 +109,17 @@ class FilterSamplesTests(unittest.TestCase):
                          ['S1'])
         self.assertEqual(actual, expected)
 
-        # filter all
+        # filter all raising ValueError
         table = Table(np.array([[0, 1, 3], [1, 1, 2]]),
                       ['O1', 'O2'],
                       ['S1', 'S2', 'S3'])
-        actual = filter_samples(table, max_frequency=0)
+        with self.assertRaisesRegex(ValueError, 'table is empty'):
+            filter_samples(table, max_frequency=0)
+
+        # filter all and allow empty table
+        table = Table(np.array([[0, 1, 3], [1, 1, 2]]), ['O1-alt', 'O2-alt'],
+                      ['S1', 'S2', 'S3'])
+        actual = filter_samples(table, max_frequency=0, allow_empty_table=True)
         self.assertTrue(actual.is_empty())
 
     def test_filter_empty_features(self):
@@ -143,12 +156,20 @@ class FilterSamplesTests(unittest.TestCase):
                          ['S1'])
         self.assertEqual(actual, expected)
 
-        # filter all
+        # filter all raising ValueError
         table = Table(np.array([[0, 1, 3], [1, 1, 2]]),
                       ['O1', 'O2'],
                       ['S1', 'S2', 'S3'])
+        with self.assertRaisesRegex(ValueError, 'table is empty'):
+            filter_samples(table, max_frequency=0,
+                           filter_empty_features=False)
+
+        # filter all and allow empty table
+        table = Table(np.array([[0, 1, 3], [1, 1, 2]]), ['O1-alt', 'O2-alt'],
+                      ['S1', 'S2', 'S3'])
         actual = filter_samples(table, max_frequency=0,
-                                filter_empty_features=False)
+                                filter_empty_features=False,
+                                allow_empty_table=True)
         self.assertTrue(actual.is_empty())
 
     def test_min_features(self):
@@ -172,11 +193,17 @@ class FilterSamplesTests(unittest.TestCase):
                          ['S2', 'S3'])
         self.assertEqual(actual, expected)
 
-        # filter all
+        # filter all raising ValueError
         table = Table(np.array([[0, 1, 3], [1, 1, 2]]),
                       ['O1', 'O2'],
                       ['S1', 'S2', 'S3'])
-        actual = filter_samples(table, min_features=3)
+        with self.assertRaisesRegex(ValueError, 'table is empty'):
+            filter_samples(table, min_features=3)
+
+        # filter all and allow empty table
+        table = Table(np.array([[0, 1, 3], [1, 1, 2]]), ['O1-alt', 'O2-alt'],
+                      ['S1', 'S2', 'S3'])
+        actual = filter_samples(table, min_features=3, allow_empty_table=True)
         self.assertTrue(actual.is_empty())
 
     def test_max_features(self):
@@ -200,11 +227,18 @@ class FilterSamplesTests(unittest.TestCase):
                          ['S1'])
         self.assertEqual(actual, expected)
 
-        # filter all
+        # filter all raising ValueError
         table = Table(np.array([[0, 1, 3], [1, 1, 2]]),
                       ['O1', 'O2'],
                       ['S1', 'S2', 'S3'])
-        actual = filter_samples(table, max_features=0)
+        with self.assertRaisesRegex(ValueError, 'table is empty'):
+            filter_samples(table, max_features=0)
+
+        # filter all and allow empty table
+        table = Table(np.array([[0, 1, 3], [1, 1, 2]]),
+                      ['O1-alt', 'O2-alt'],
+                      ['S1', 'S2', 'S3'])
+        actual = filter_samples(table, max_features=0, allow_empty_table=True)
         self.assertTrue(actual.is_empty())
 
     def test_sample_metadata(self):
@@ -236,13 +270,20 @@ class FilterSamplesTests(unittest.TestCase):
                          ['S2', 'S3'])
         self.assertEqual(actual, expected)
 
-        # filter all
+        # filter all raising ValueError
         df = pd.DataFrame({}, index=pd.Index(['foo'], name='id'))
         metadata = qiime2.Metadata(df)
         table = Table(np.array([[0, 1, 3], [1, 1, 2]]),
                       ['O1', 'O2'],
                       ['S1', 'S2', 'S3'])
-        actual = filter_samples(table, metadata=metadata)
+        with self.assertRaisesRegex(ValueError, 'table is empty'):
+            filter_samples(table, metadata=metadata)
+
+        # filter all and allow empty table
+        df = pd.DataFrame({}, index=pd.Index(['foo'], name='id'))
+        metadata = qiime2.Metadata(df)
+        actual = filter_samples(table, metadata=metadata,
+                                allow_empty_table=True)
         self.assertTrue(actual.is_empty())
 
         # exclude none
@@ -284,13 +325,23 @@ class FilterSamplesTests(unittest.TestCase):
                          ['S3'])
         self.assertEqual(actual, expected)
 
-        # exclude all
+        # exclude all raising ValueError
+        df = pd.DataFrame({'Subject': ['subject-1', 'subject-1', 'subject-2'],
+                           'SampleType': ['gut', 'tongue', 'gut']},
+                          index=pd.Index(['S1', 'S2', 'S3'], name='id'))
+        metadata = qiime2.Metadata(df)
+        with self.assertRaisesRegex(ValueError, 'table is empty'):
+            filter_samples(table, metadata=metadata,
+                           exclude_ids=True)
+
+        # exclude all and allow empty table
         df = pd.DataFrame({'Subject': ['subject-1', 'subject-1', 'subject-2'],
                            'SampleType': ['gut', 'tongue', 'gut']},
                           index=pd.Index(['S1', 'S2', 'S3'], name='id'))
         metadata = qiime2.Metadata(df)
         actual = filter_samples(table, metadata=metadata,
-                                exclude_ids=True)
+                                exclude_ids=True,
+                                allow_empty_table=True)
         self.assertTrue(actual.is_empty())
 
     def test_sample_metadata_extra_ids(self):
@@ -354,7 +405,7 @@ class FilterSamplesTests(unittest.TestCase):
                          ['S1'])
         self.assertEqual(actual, expected)
 
-        # filter all
+        # filter all raising ValueError
         df = pd.DataFrame({'Subject': ['subject-1', 'subject-1', 'subject-2'],
                            'SampleType': ['gut', 'tongue', 'gut']},
                           index=pd.Index(['S1', 'S2', 'S3'], name='#SampleID'))
@@ -363,7 +414,12 @@ class FilterSamplesTests(unittest.TestCase):
                       ['O1', 'O2'],
                       ['S1', 'S2', 'S3'])
         where = "Subject='subject-1' AND Subject='subject-2'"
-        actual = filter_samples(table, metadata=metadata, where=where)
+        with self.assertRaisesRegex(ValueError, 'table is empty'):
+            filter_samples(table, metadata=metadata, where=where)
+
+        # filter all allowing empty table
+        actual = filter_samples(table, metadata=metadata, where=where,
+                                allow_empty_table=True)
         self.assertTrue(actual.is_empty())
 
         # filter none -> exclude none
@@ -417,7 +473,22 @@ class FilterSamplesTests(unittest.TestCase):
                          ['S3'])
         self.assertEqual(actual, expected)
 
-        # filter all -> exclude all
+        # filter all -> exclude all raising ValueError
+        df = pd.DataFrame({'Subject': ['subject-1', 'subject-1', 'subject-2'],
+                           'SampleType': ['gut', 'tongue', 'gut']},
+                          index=pd.Index(['S1', 'S2', 'S3'], name='#SampleID'))
+        metadata = qiime2.Metadata(df)
+        table = Table(np.array([[0, 1, 3], [1, 1, 2]]),
+                      ['O1', 'O2'],
+                      ['S1', 'S2', 'S3'])
+        where = "Subject='subject-1' OR Subject='subject-2'"
+        with self.assertRaisesRegex(ValueError, 'table is empty'):
+            filter_samples(table,
+                           metadata=metadata,
+                           where=where,
+                           exclude_ids=True)
+
+        # exclude all and allow empty table
         df = pd.DataFrame({'Subject': ['subject-1', 'subject-1', 'subject-2'],
                            'SampleType': ['gut', 'tongue', 'gut']},
                           index=pd.Index(['S1', 'S2', 'S3'], name='#SampleID'))
@@ -429,7 +500,8 @@ class FilterSamplesTests(unittest.TestCase):
         actual = filter_samples(table,
                                 metadata=metadata,
                                 where=where,
-                                exclude_ids=True)
+                                exclude_ids=True,
+                                allow_empty_table=True)
         self.assertTrue(actual.is_empty())
 
     def test_combine_id_and_frequency_filters(self):
@@ -618,7 +690,7 @@ class FilterSamplesTests(unittest.TestCase):
         self.assertEqual(actual, expected)
 
         # exclude one, min_frequency filter one,
-        # max_frequency filter one
+        # max_frequency filter one raising ValueError
         df = pd.DataFrame({'Subject': ['subject-1'],
                            'SampleType': ['gut']},
                           index=pd.Index(['S1'], name='id'))
@@ -626,16 +698,25 @@ class FilterSamplesTests(unittest.TestCase):
         table = Table(np.array([[0, 1, 3], [1, 1, 2]]),
                       ['O1', 'O2'],
                       ['S1', 'S2', 'S3'])
+        with self.assertRaisesRegex(ValueError, 'table is empty'):
+            filter_samples(table,
+                           metadata=metadata,
+                           exclude_ids=True,
+                           max_frequency=4,
+                           min_frequency=3)
+
+        # allow empty table
         actual = filter_samples(table,
                                 metadata=metadata,
                                 exclude_ids=True,
                                 max_frequency=4,
-                                min_frequency=3)
+                                min_frequency=3,
+                                allow_empty_table=True)
         self.assertTrue(actual.is_empty())
 
         # where filter one -> exclude one,
         # min_frequency filter one,
-        # max_frequency filter one
+        # max_frequency filter one raising ValueError
         df = pd.DataFrame({'Subject': ['subject-1', 'subject-2'],
                            'SampleType': ['gut', 'tongue']},
                           index=pd.Index(['S1', 'S2'], name='id'))
@@ -644,12 +725,22 @@ class FilterSamplesTests(unittest.TestCase):
                       ['O1', 'O2'],
                       ['S1', 'S2', 'S3'])
         where = "Subject='subject-1'"
+        with self.assertRaisesRegex(ValueError, 'table is empty'):
+            filter_samples(table,
+                           metadata=metadata,
+                           exclude_ids=True,
+                           where=where,
+                           max_frequency=4,
+                           min_frequency=3)
+
+        # allow empty table
         actual = filter_samples(table,
                                 metadata=metadata,
                                 exclude_ids=True,
                                 where=where,
                                 max_frequency=4,
-                                min_frequency=3)
+                                min_frequency=3,
+                                allow_empty_table=True)
         self.assertTrue(actual.is_empty())
 
     def test_combine_exclude_ids_and_features_filters(self):
@@ -773,7 +864,7 @@ class FilterSamplesTests(unittest.TestCase):
         self.assertEqual(actual, expected)
 
         # exclude one, max_features filter one,
-        # min_features filter one
+        # min_features filter one raising ValueError
         df = pd.DataFrame({'Subject': ['subject-1'],
                            'SampleType': ['gut']},
                           index=pd.Index(['S2'], name='id'))
@@ -781,16 +872,23 @@ class FilterSamplesTests(unittest.TestCase):
         table = Table(np.array([[0, 1, 3], [0, 1, 2]]),
                       ['O1', 'O2'],
                       ['S1', 'S2', 'S3'])
+        with self.assertRaisesRegex(ValueError, 'table is empty'):
+            filter_samples(table,
+                           metadata=metadata,
+                           exclude_ids=True,
+                           min_features=1,
+                           max_features=1)
+        # allow empty table
         actual = filter_samples(table,
                                 metadata=metadata,
                                 exclude_ids=True,
                                 min_features=1,
-                                max_features=1)
-        self.assertTrue(actual.is_empty())
+                                max_features=1,
+                                allow_empty_table=True)
 
         # where filter one -> exclude one,
         # max_features filter one,
-        # min_features filter one
+        # min_features filter one raising ValueError
         df = pd.DataFrame({'Subject': ['subject-1', 'subject-2'],
                            'SampleType': ['gut', 'tongue']},
                           index=pd.Index(['S1', 'S2'], name='id'))
@@ -799,12 +897,22 @@ class FilterSamplesTests(unittest.TestCase):
                       ['O1', 'O2'],
                       ['S1', 'S2', 'S3'])
         where = "SampleType='tongue'"
+        with self.assertRaisesRegex(ValueError, 'table is empty'):
+            filter_samples(table,
+                           metadata=metadata,
+                           where=where,
+                           exclude_ids=True,
+                           min_features=1,
+                           max_features=1)
+
+        # allow empty table
         actual = filter_samples(table,
                                 metadata=metadata,
                                 where=where,
                                 exclude_ids=True,
                                 min_features=1,
-                                max_features=1)
+                                max_features=1,
+                                allow_empty_table=True)
         self.assertTrue(actual.is_empty())
 
 


=====================================
q2_feature_table/tests/filter/test_filter_sequences.py
=====================================
@@ -31,8 +31,8 @@ class FilterSeqsTests(unittest.TestCase):
         md_full.index.name = 'FeatureID'
         self.md_full = qiime2.Metadata(md_full)
 
-    def filter_and_assertEqual(self, exp, md=None, exclude_ids=False,
-                               where=None):
+    def _filter_and_assertEqual(self, exp, md=None, exclude_ids=False,
+                                where=None):
         if md is None:
             md = self.md_full
         obs = filter_seqs(self.seqs, metadata=md,
@@ -41,13 +41,13 @@ class FilterSeqsTests(unittest.TestCase):
 
     def test_id_based_filtering(self):
         # filter none
-        self.filter_and_assertEqual(self.seqs,
-                                    md=qiime2.Metadata(self.df_lite))
+        self._filter_and_assertEqual(self.seqs,
+                                     md=qiime2.Metadata(self.df_lite))
 
         # filter one
         md = qiime2.Metadata(self.df_lite.drop(['O1']))
         exp = pd.Series(['GCTA', 'CCCC', 'TGTT'], index=['O2', 'O3', 'O4'])
-        self.filter_and_assertEqual(exp, md=md)
+        self._filter_and_assertEqual(exp, md=md)
 
         # filter all
         md = qiime2.Metadata(pd.DataFrame({},
@@ -58,12 +58,12 @@ class FilterSeqsTests(unittest.TestCase):
         # exclude none
         md = qiime2.Metadata(pd.DataFrame({},
                                           index=pd.Index(['foo'], name='id')))
-        self.filter_and_assertEqual(self.seqs, md=md, exclude_ids=True)
+        self._filter_and_assertEqual(self.seqs, md=md, exclude_ids=True)
 
         # exclude one
         md = qiime2.Metadata(self.df_lite.drop(['O1', 'O2', 'O3']))
         exp = pd.Series(['ACGT', 'GCTA', 'CCCC'], index=['O1', 'O2', 'O3'])
-        self.filter_and_assertEqual(exp, md=md, exclude_ids=True)
+        self._filter_and_assertEqual(exp, md=md, exclude_ids=True)
 
         # exclude all
         md = qiime2.Metadata(self.df_lite)
@@ -74,17 +74,17 @@ class FilterSeqsTests(unittest.TestCase):
         md = qiime2.Metadata(pd.DataFrame([],
                              index=pd.Index(['O1', 'O3', 'foo'], name='id')))
         exp = pd.Series(['ACGT', 'CCCC'], index=['O1', 'O3'])
-        self.filter_and_assertEqual(exp, md=md)
+        self._filter_and_assertEqual(exp, md=md)
 
     def test_where_param(self):
         # filter none
         where = "stuff='foo' OR stuff='bar' OR stuff='baz'"
-        self.filter_and_assertEqual(self.seqs, where=where)
+        self._filter_and_assertEqual(self.seqs, where=where)
 
         # filter one
         where = "stuff='foo' OR stuff='bar'"
         exp = pd.Series(['ACGT', 'GCTA', 'TGTT'], index=['O1', 'O2', 'O4'])
-        self.filter_and_assertEqual(exp, where=where)
+        self._filter_and_assertEqual(exp, where=where)
 
         # filter all
         where = "stuff='boo'"
@@ -93,12 +93,12 @@ class FilterSeqsTests(unittest.TestCase):
 
         # exclude none
         where = 'CAST(some_numbers AS INTEGER) < 0'
-        self.filter_and_assertEqual(self.seqs, exclude_ids=True, where=where)
+        self._filter_and_assertEqual(self.seqs, exclude_ids=True, where=where)
 
         # exclude one
         where = 'CAST(some_numbers AS INTEGER) > 3'
         exp = pd.Series(['ACGT', 'GCTA', 'CCCC'], index=['O1', 'O2', 'O3'])
-        self.filter_and_assertEqual(exp, exclude_ids=True, where=where)
+        self._filter_and_assertEqual(exp, exclude_ids=True, where=where)
 
         # exclude all
         where = 'CAST(some_numbers AS INTEGER) BETWEEN 0 AND 5'


=====================================
q2_feature_table/tests/test_subsample.py
=====================================
@@ -12,16 +12,16 @@ import numpy as np
 import numpy.testing as npt
 from biom.table import Table
 
-from q2_feature_table import subsample
+from q2_feature_table import subsample_ids
 
 
-class SubsampleTests(TestCase):
+class SubsampleIDsTests(TestCase):
 
     def test_subsample_samples(self):
         t = Table(np.array([[0, 1, 3], [1, 1, 2]]),
                   ['O1', 'O2'],
                   ['S1', 'S2', 'S3'])
-        a = subsample(t, 2, 'sample')
+        a = subsample_ids(t, 2, 'sample')
         self.assertEqual(a.shape, (2, 2))
 
         sample_ids = frozenset(a.ids(axis='sample'))
@@ -38,7 +38,7 @@ class SubsampleTests(TestCase):
         t = Table(np.array([[0, 1, 3], [1, 1, 2]]).T,
                   ['O1', 'O2', 'O3'],
                   ['S1', 'S2'])
-        a = subsample(t, 2, 'feature')
+        a = subsample_ids(t, 2, 'feature')
         self.assertEqual(a.shape, (2, 2))
 
         sample_ids = frozenset(a.ids(axis='observation'))
@@ -56,28 +56,28 @@ class SubsampleTests(TestCase):
                   ['O1', 'O2', 'O3'],
                   ['S1', 'S2'])
         with self.assertRaisesRegex(ValueError, "depth exceeds"):
-            subsample(t, 10, 'sample')
+            subsample_ids(t, 10, 'sample')
 
     def test_subsample_features_oversample(self):
         t = Table(np.array([[0, 1, 3], [1, 1, 2]]).T,
                   ['O1', 'O2', 'O3'],
                   ['S1', 'S2'])
         with self.assertRaisesRegex(ValueError, "depth exceeds"):
-            subsample(t, 10, 'feature')
+            subsample_ids(t, 10, 'feature')
 
     def test_subsample_samples_empty(self):
         t = Table(np.array([[0, 0, 0], [0, 0, 0]]).T,
                   ['O1', 'O2', 'O3'],
                   ['S1', 'S2'])
         with self.assertRaisesRegex(ValueError, "contains no"):
-            subsample(t, 2, 'sample')
+            subsample_ids(t, 2, 'sample')
 
     def test_subsample_features_empty(self):
         t = Table(np.array([[0, 0, 0], [0, 0, 0]]).T,
                   ['O1', 'O2', 'O3'],
                   ['S1', 'S2'])
         with self.assertRaisesRegex(ValueError, "contains no"):
-            subsample(t, 2, 'feature')
+            subsample_ids(t, 2, 'feature')
 
 
 if __name__ == "__main__":


=====================================
q2_feature_table/tests/test_summarize.py
=====================================
@@ -16,9 +16,13 @@ import pandas as pd
 import numpy as np
 import qiime2
 from q2_types.feature_data import DNAIterator
+from qiime2.plugin.testing import TestPluginBase
+from qiime2 import Artifact, Metadata
 import csv
 
-from q2_feature_table import tabulate_seqs, summarize
+from q2_feature_table import (
+        tabulate_seqs, summarize,
+        tabulate_feature_frequencies, tabulate_sample_frequencies)
 from q2_feature_table._summarize._visualizer import _compute_descriptive_stats
 from q2_feature_table._summarize._visualizer import _frequencies
 from q2_feature_table._summarize._vega_spec import vega_spec
@@ -379,16 +383,6 @@ class SummarizeTests(TestCase):
             index_fp = os.path.join(output_dir, 'index.html')
             self.assertTrue(os.path.exists(index_fp))
 
-            feature_freq_fp = os.path.join(output_dir,
-                                           'feature-frequency-detail.csv')
-            self.assertTrue(os.path.exists(feature_freq_fp))
-            self.assertTrue('O1,4' in open(feature_freq_fp).read())
-
-            sample_freq_fp = os.path.join(output_dir,
-                                          'sample-frequency-detail.csv')
-            self.assertTrue(os.path.exists(sample_freq_fp))
-            self.assertTrue('S1,1453' in open(sample_freq_fp).read())
-
     def test_frequency_ranges_are_zero(self):
         table = biom.Table(np.array([[25, 25, 25], [25, 25, 25]]),
                            ['O1', 'O2'],
@@ -400,16 +394,6 @@ class SummarizeTests(TestCase):
             index_fp = os.path.join(output_dir, 'index.html')
             self.assertTrue(os.path.exists(index_fp))
 
-            feature_freq_fp = os.path.join(output_dir,
-                                           'feature-frequency-detail.csv')
-            self.assertTrue(os.path.exists(feature_freq_fp))
-            self.assertTrue('O1,75' in open(feature_freq_fp).read())
-
-            sample_freq_fp = os.path.join(output_dir,
-                                          'sample-frequency-detail.csv')
-            self.assertTrue(os.path.exists(sample_freq_fp))
-            self.assertTrue('S1,50' in open(sample_freq_fp).read())
-
     def test_one_sample(self):
         sample_frequencies_pdf_fn = 'sample-frequencies.pdf'
         # sample-frequencies.pdf should not be written when there is only
@@ -471,16 +455,6 @@ class SummarizeTests(TestCase):
             index_fp = os.path.join(output_dir, 'index.html')
             self.assertTrue(os.path.exists(index_fp))
 
-            feature_freq_fp = os.path.join(output_dir,
-                                           'feature-frequency-detail.csv')
-            self.assertTrue(os.path.exists(feature_freq_fp))
-            self.assertTrue('O1,4' in open(feature_freq_fp).read())
-
-            sample_freq_fp = os.path.join(output_dir,
-                                          'sample-frequency-detail.csv')
-            self.assertTrue(os.path.exists(sample_freq_fp))
-            self.assertTrue('S1,1' in open(sample_freq_fp).read())
-
     def test_vega_spec_data(self):
         # test if metadata is converted correctly to vega compatible JSON
         df = pd.DataFrame({'Subject': ['subject-1', 'subject-1', 'subject-2'],
@@ -516,5 +490,133 @@ class SummarizeTests(TestCase):
         self.assertEqual(spec['data'][0]['values'], exp)
 
 
+class TabulateSampleFrequencyTests(TestCase):
+
+    def test_basic_case(self):
+        table = biom.Table(np.array([[0, 25, 25], [25, 25, 25]]),
+                           ['O1', 'O2'],
+                           ['S1', 'S2', 'S3'])
+        obs = tabulate_sample_frequencies(table).to_dataframe()
+
+        exp = pd.DataFrame({'Frequency': ['25.0', '50.0', '50.0'],
+                            'No. of Associated Features':
+                            ['1', '2', '2']},
+                           index=['S1', 'S2', 'S3'])
+        exp.index.name = 'Sample ID'
+        pd.testing.assert_frame_equal(exp, obs)
+
+
+class TabulateFeatureFrequencyTests(TestCase):
+
+    def test_basic_case(self):
+        table = biom.Table(np.array([[25, 25, 0], [25, 25, 25]]),
+                           ['O1', 'O2'],
+                           ['S1', 'S2', 'S3'])
+        obs = tabulate_feature_frequencies(table).to_dataframe()
+
+        exp = pd.DataFrame({'Frequency': ['50.0', '75.0'],
+                            'No. of Samples Observed In':
+                            ['2', '3']},
+                           index=['O1', 'O2'])
+        exp.index.name = 'Feature ID'
+        pd.testing.assert_frame_equal(exp, obs)
+
+
+class SummarizePlusTests(TestPluginBase):
+
+    package = 'q2_feature_table'
+
+    def setUp(self):
+        super().setUp()
+        self.summarize_plus = self.plugin.pipelines['summarize_plus']
+
+    def test_basic(self):
+        table = biom.Table(np.array([[25, 0, 25], [25, 25, 25]]),
+                           ['O1', 'O2'],
+                           ['S1', 'S2', 'S3'])
+        table = Artifact.import_data('FeatureTable[Frequency]', table)
+        results = self.summarize_plus(table)
+
+        self.assertEqual(len(results), 3)
+        self.assertEqual(repr(results.feature_frequencies.type),
+                         'ImmutableMetadata')
+        self.assertEqual(repr(results.sample_frequencies.type),
+                         'ImmutableMetadata')
+        self.assertEqual(repr(results.summary.type),
+                         'Visualization')
+
+        exp_feature = pd.DataFrame({'Frequency': ['50.0', '75.0'],
+                                   'No. of Samples Observed In':
+                                    ['2', '3']},
+                                   index=['O1', 'O2'])
+        exp_feature.index.name = "Feature ID"
+        obs_feature = results[0].view(Metadata).to_dataframe()
+        pd.testing.assert_frame_equal(exp_feature, obs_feature)
+
+        exp_sample = pd.DataFrame({'Frequency': ['50.0', '25.0', '50.0'],
+                                  'No. of Associated Features':
+                                   ['2', '1', '2']},
+                                  index=['S1', 'S2', 'S3'])
+        exp_sample.index.name = "Sample ID"
+        obs_sample = results[1].view(Metadata).to_dataframe()
+        pd.testing.assert_frame_equal(exp_sample, obs_sample)
+
+    def test_no_samples(self):
+        table = biom.Table(np.array([[], []]),
+                           ['O1', 'O2'],
+                           [])
+        table = Artifact.import_data('FeatureTable[Frequency]', table)
+
+        with self.assertRaises(ValueError) as context:
+            self.summarize_plus(table)
+
+            self.assertTrue('Cannot summarize a table with no samples' in
+                            context.exception)
+
+    def test_no_features(self):
+        table = biom.Table(np.array([]),
+                           [],
+                           ['S1', 'S2', 'S3'])
+        table = Artifact.import_data('FeatureTable[Frequency]', table)
+
+        with self.assertRaises(ValueError) as context:
+
+            self.summarize_plus(table)
+
+            self.assertTrue('Cannot summarize a table with no features' in
+                            context.exception)
+
+    def test_all_zeros(self):
+        table = biom.Table(np.array([[0, 0, 0], [0, 0, 0]]),
+                           ['O1', 'O2'],
+                           ['S1', 'S2', 'S3'])
+        table = Artifact.import_data('FeatureTable[Frequency]', table)
+        results = self.summarize_plus(table)
+
+        self.assertEqual(len(results), 3)
+        self.assertEqual(repr(results.feature_frequencies.type),
+                         'ImmutableMetadata')
+        self.assertEqual(repr(results.sample_frequencies.type),
+                         'ImmutableMetadata')
+        self.assertEqual(repr(results.summary.type),
+                         'Visualization')
+
+        exp_feature = pd.DataFrame({'Frequency': ['0.0', '0.0'],
+                                   'No. of Samples Observed In':
+                                    ['0', '0']},
+                                   index=['O1', 'O2'])
+        exp_feature.index.name = "Feature ID"
+        obs_feature = results[0].view(Metadata).to_dataframe()
+        pd.testing.assert_frame_equal(exp_feature, obs_feature)
+
+        exp_sample = pd.DataFrame({'Frequency': ['0.0', '0.0', '0.0'],
+                                  'No. of Associated Features':
+                                   ['0', '0', '0']},
+                                  index=['S1', 'S2', 'S3'])
+        exp_sample.index.name = "Sample ID"
+        obs_sample = results[1].view(Metadata).to_dataframe()
+        pd.testing.assert_frame_equal(exp_sample, obs_sample)
+
+
 if __name__ == "__main__":
     main()



View it on GitLab: https://salsa.debian.org/med-team/q2-feature-table/-/commit/66136d9a5b3c89a1cb1e483f9d48ae0e09d942b2

-- 
View it on GitLab: https://salsa.debian.org/med-team/q2-feature-table/-/commit/66136d9a5b3c89a1cb1e483f9d48ae0e09d942b2
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20240218/1c79dd7e/attachment-0001.htm>


More information about the debian-med-commit mailing list