[med-svn] [Git][med-team/q2-feature-table][upstream] New upstream version 2024.2.0+dfsg
Andreas Tille (@tille)
gitlab at salsa.debian.org
Sun Feb 18 13:34:53 GMT 2024
Andreas Tille pushed to branch upstream at Debian Med / q2-feature-table
Commits:
66136d9a by Andreas Tille at 2024-02-18T13:01:50+01:00
New upstream version 2024.2.0+dfsg
- - - - -
18 changed files:
- .github/workflows/ci-dev.yaml
- README.md
- q2_feature_table/__init__.py
- q2_feature_table/_examples.py
- q2_feature_table/_filter.py
- q2_feature_table/_subsample.py → q2_feature_table/_subsample_ids.py
- q2_feature_table/_summarize/__init__.py
- q2_feature_table/_summarize/_visualizer.py
- q2_feature_table/_summarize/summarize_assets/index.html
- q2_feature_table/_summarize/tabulate_seqs_assets/index.html
- q2_feature_table/_version.py
- q2_feature_table/plugin_setup.py
- q2_feature_table/tests/filter/test_conditional_filter.py
- q2_feature_table/tests/filter/test_filter_features.py
- q2_feature_table/tests/filter/test_filter_samples.py
- q2_feature_table/tests/filter/test_filter_sequences.py
- q2_feature_table/tests/test_subsample.py
- q2_feature_table/tests/test_summarize.py
Changes:
=====================================
.github/workflows/ci-dev.yaml
=====================================
@@ -9,4 +9,4 @@ jobs:
ci:
uses: qiime2/distributions/.github/workflows/lib-ci-dev.yaml at dev
with:
- distro: core
\ No newline at end of file
+ distro: amplicon
=====================================
README.md
=====================================
@@ -1,5 +1,5 @@
# q2-feature-table
-![](https://github.com/qiime2/q2-feature-table/workflows/ci/badge.svg)
+![](https://github.com/qiime2/q2-feature-table/workflows/ci-dev/badge.svg)
This is a QIIME 2 plugin. For details on QIIME 2, see https://qiime2.org.
\ No newline at end of file
=====================================
q2_feature_table/__init__.py
=====================================
@@ -7,9 +7,10 @@
# ----------------------------------------------------------------------------
from ._normalize import rarefy
-from ._subsample import subsample
+from ._subsample_ids import subsample_ids
from ._transform import (presence_absence, relative_frequency, transpose)
-from ._summarize import (summarize, tabulate_seqs)
+from ._summarize import (summarize, tabulate_seqs, tabulate_sample_frequencies,
+ tabulate_feature_frequencies, summarize_plus)
from ._merge import (merge, merge_seqs, merge_taxa, overlap_methods)
from ._filter import (filter_samples, filter_features, filter_seqs,
filter_features_conditionally)
@@ -27,5 +28,7 @@ __all__ = ['rarefy', 'presence_absence', 'relative_frequency', 'transpose',
'summarize', 'merge', 'merge_seqs', 'filter_samples',
'filter_features', 'merge_taxa', 'tabulate_seqs', 'overlap_methods',
'core_features', 'group', 'heatmap', 'heatmap_choices',
- 'filter_seqs', 'subsample', 'rename_ids',
- 'filter_features_conditionally', 'split']
+ 'filter_seqs', 'subsample_ids', 'rename_ids',
+ 'filter_features_conditionally', 'split',
+ 'tabulate_feature_frequencies', 'tabulate_sample_frequencies',
+ 'summarize_plus']
=====================================
q2_feature_table/_examples.py
=====================================
@@ -388,3 +388,49 @@ def feature_table_tabulate_seqs_multi_taxon(use):
)
viz.assert_output_type('Visualization')
+
+
+def feature_table_tabulate_sample_freqs(use):
+ feature_table = use.init_artifact_from_url(
+ 'feature_table', moving_pics_ft_url
+ )
+
+ sample_frequencies, = use.action(
+ use.UsageAction('feature_table', 'tabulate_sample_frequencies'),
+ use.UsageInputs(table=feature_table),
+ use.UsageOutputNames(sample_frequencies='sample_frequencies')
+ )
+
+ sample_frequencies.assert_output_type('ImmutableMetadata')
+
+
+def feature_table_tabulate_feature_freqs(use):
+ feature_table = use.init_artifact_from_url(
+ 'feature_table', moving_pics_ft_url
+ )
+
+ feature_frequencies, = use.action(
+ use.UsageAction('feature_table', 'tabulate_feature_frequencies'),
+ use.UsageInputs(table=feature_table),
+ use.UsageOutputNames(feature_frequencies='feature_frequencies')
+ )
+
+ feature_frequencies.assert_output_type('ImmutableMetadata')
+
+
+def feature_table_summarize_plus(use):
+ feature_table = use.init_artifact_from_url(
+ 'feature_table', moving_pics_ft_url
+ )
+
+ feature_freqs, sample_freqs, viz, = use.action(
+ use.UsageAction('feature_table', 'summarize_plus'),
+ use.UsageInputs(table=feature_table),
+ use.UsageOutputNames(feature_frequencies='feature_frequencies',
+ sample_frequencies='sample_frequencies',
+ summary='visual summary')
+ )
+
+ feature_freqs.assert_output_type('ImmutableMetadata')
+ sample_freqs.assert_output_type('ImmutableMetadata')
+ viz.assert_output_type('Visualization')
=====================================
q2_feature_table/_filter.py
=====================================
@@ -12,6 +12,14 @@ import numpy as np
import pandas as pd
+def _validate_nonempty_table(table):
+ if table.is_empty():
+ raise ValueError("The resulting table is empty. This can happen if "
+ "you filter all samples or features out of the "
+ "table. Please check your filtering parameters and "
+ "try again.")
+
+
def _get_biom_filter_function(ids_to_keep, min_frequency, max_frequency,
min_nonzero, max_nonzero):
ids_to_keep = set(ids_to_keep)
@@ -32,7 +40,8 @@ _other_axis_map = {'sample': 'observation', 'observation': 'sample'}
def _filter_table(table, min_frequency, max_frequency, min_nonzero,
max_nonzero, metadata, where, axis, exclude_ids=False,
- filter_opposite_axis=True):
+ filter_opposite_axis=True,
+ allow_empty_table=False):
if min_frequency == 0 and max_frequency is None and min_nonzero == 0 and\
max_nonzero is None and metadata is None and where is None and\
exclude_ids is False:
@@ -62,20 +71,25 @@ def _filter_table(table, min_frequency, max_frequency, min_nonzero,
max_frequency=None, min_nonzero=1, max_nonzero=None)
table.filter(filter_fn2, axis=_other_axis_map[axis], inplace=True)
+ if not allow_empty_table:
+ _validate_nonempty_table(table)
+
def filter_samples(table: biom.Table, min_frequency: int = 0,
max_frequency: int = None, min_features: int = 0,
max_features: int = None,
metadata: qiime2.Metadata = None, where: str = None,
exclude_ids: bool = False,
- filter_empty_features: bool = True)\
+ filter_empty_features: bool = True,
+ allow_empty_table: bool = False)\
-> biom.Table:
_filter_table(table=table, min_frequency=min_frequency,
max_frequency=max_frequency, min_nonzero=min_features,
max_nonzero=max_features, metadata=metadata,
where=where, axis='sample', exclude_ids=exclude_ids,
- filter_opposite_axis=filter_empty_features)
-
+ filter_opposite_axis=filter_empty_features,
+ allow_empty_table=allow_empty_table
+ )
return table
@@ -84,20 +98,23 @@ def filter_features(table: biom.Table, min_frequency: int = 0,
max_samples: int = None,
metadata: qiime2.Metadata = None, where: str = None,
exclude_ids: bool = False,
- filter_empty_samples: bool = True)\
+ filter_empty_samples: bool = True,
+ allow_empty_table: bool = False)\
-> biom.Table:
_filter_table(table=table, min_frequency=min_frequency,
max_frequency=max_frequency, min_nonzero=min_samples,
max_nonzero=max_samples, metadata=metadata,
where=where, axis='observation', exclude_ids=exclude_ids,
- filter_opposite_axis=filter_empty_samples)
+ filter_opposite_axis=filter_empty_samples,
+ allow_empty_table=allow_empty_table)
return table
def filter_seqs(data: pd.Series, table: biom.Table = None,
metadata: qiime2.Metadata = None, where: str = None,
- exclude_ids: bool = False) -> pd.Series:
+ exclude_ids: bool = False,
+ ) -> pd.Series:
if table is not None and metadata is not None:
raise ValueError('Filtering with metadata and filtering with a table '
'are mutually exclusive.')
@@ -116,12 +133,14 @@ def filter_seqs(data: pd.Series, table: biom.Table = None,
filtered = data[data.index.isin(ids_to_keep)]
if filtered.empty is True:
raise ValueError('All features were filtered out of the data.')
+
return filtered
def filter_features_conditionally(table: biom.Table,
abundance: float,
prevalence: float,
+ allow_empty_table: bool = False
) -> biom.Table:
"""
A function to perform joint filtering because it makes life better
@@ -143,4 +162,7 @@ def filter_features_conditionally(table: biom.Table,
new_table = table.filter(filter_ids, axis='observation', inplace=False)
+ if not allow_empty_table:
+ _validate_nonempty_table(new_table)
+
return new_table
=====================================
q2_feature_table/_subsample.py → q2_feature_table/_subsample_ids.py
=====================================
@@ -9,8 +9,8 @@
import biom
-def subsample(table: biom.Table, subsampling_depth: int,
- axis: str) -> biom.Table:
+def subsample_ids(table: biom.Table, subsampling_depth: int,
+ axis: str) -> biom.Table:
if axis == 'feature':
# we are transposing the table due to biocore/biom-format#759
table = table.transpose()
=====================================
q2_feature_table/_summarize/__init__.py
=====================================
@@ -6,6 +6,12 @@
# The full license is in the file LICENSE, distributed with this software.
# ----------------------------------------------------------------------------
-from ._visualizer import (summarize, tabulate_seqs)
+from ._visualizer import (summarize, tabulate_seqs,
+ tabulate_feature_frequencies,
+ tabulate_sample_frequencies,
+ summarize_plus)
-__all__ = ['summarize', 'tabulate_seqs']
+__all__ = ['summarize', 'tabulate_seqs',
+ 'tabulate_feature_frequencies',
+ 'tabulate_sample_frequencies',
+ 'summarize_plus']
=====================================
q2_feature_table/_summarize/_visualizer.py
=====================================
@@ -59,7 +59,7 @@ def tabulate_seqs(output_dir: str, data: DNAIterator,
metadata_df.index)
elif merge_method == 'strict':
if set(metadata_df.index) != display_sequences:
- raise Exception('Merge method is strict and IDs do not match')
+ raise ValueError('Merge method is strict and IDs do not match')
if taxonomy is not None:
for member in taxonomy.values():
if merge_method == 'union':
@@ -69,8 +69,8 @@ def tabulate_seqs(output_dir: str, data: DNAIterator,
member.index)
elif merge_method == 'strict':
if set(member.index) != display_sequences:
- raise Exception('Merge method is strict and IDs do not \
- match')
+ raise ValueError(
+ 'Merge method is strict and IDs do not match')
seq_len_stats = _compute_descriptive_stats(seq_lengths)
_write_tsvs_of_descriptive_stats(seq_len_stats, output_dir)
@@ -99,6 +99,7 @@ def summarize(output_dir: str, table: biom.Table,
sample_summary, sample_frequencies = _frequency_summary(
table, axis='sample')
+
if number_of_samples > 1:
# Calculate the bin count, with a minimum of 5 bins
@@ -166,13 +167,11 @@ def summarize(output_dir: str, table: biom.Table,
feature_qualitative_data = _compute_qualitative_summary(table)
sample_frequencies.sort_values(inplace=True, ascending=False)
+
+ sample_frequencies_json = pd.Series(["{:,}".format(int(x)) for x in
+ sample_frequencies])
+
feature_frequencies.sort_values(inplace=True, ascending=False)
- sample_frequencies.to_csv(
- os.path.join(output_dir, 'sample-frequency-detail.csv'),
- header=False)
- feature_frequencies.to_csv(
- os.path.join(output_dir, 'feature-frequency-detail.csv'),
- header=False)
feature_frequencies = feature_frequencies.astype(int) \
.apply('{:,}'.format).to_frame('Frequency')
@@ -196,7 +195,7 @@ def summarize(output_dir: str, table: biom.Table,
# Create a JSON object containing the Sample Frequencies to build the
# table in sample-frequency-detail.html
- sample_frequencies_json = sample_frequencies.to_json()
+ sample_frequencies_json = sample_frequencies_json.to_json()
templates = [index, sample_frequency_template, feature_frequency_template]
context.update({'frequencies_list':
@@ -217,6 +216,55 @@ def summarize(output_dir: str, table: biom.Table,
plt.close('all')
+def tabulate_feature_frequencies(table: biom.Table) -> qiime2.Metadata:
+ feature_frequencies = _frequencies(table, 'observation')
+ feature_frequencies = feature_frequencies.apply(
+ '{:,}'.format).to_frame('Frequency')
+ feature_qualitative_data = _compute_qualitative_summary(table)
+ samples_observed_in =\
+ pd.Series(feature_qualitative_data).astype(int).apply('{:,}'.format)
+ feature_frequencies["No. of Samples Observed In"] = samples_observed_in
+ feature_frequencies.index.name = "Feature ID"
+ return qiime2.Metadata(feature_frequencies)
+
+
+def tabulate_sample_frequencies(table: biom.Table) -> qiime2.Metadata:
+ sample_frequencies = _frequencies(table, 'sample')
+ sample_frequencies = sample_frequencies.apply(
+ '{:,}'.format).to_frame('Frequency')
+ sample_qualitative_data = _compute_qualitative_summary_sample(table)
+ samples_with_feature =\
+ pd.Series(sample_qualitative_data).astype(int).apply('{:,}'.format)
+ sample_frequencies["No. of Associated Features"] = samples_with_feature
+ sample_frequencies.index.name = "Sample ID"
+ return qiime2.Metadata(sample_frequencies)
+
+
+def summarize_plus(ctx, table, metadata=None):
+
+ try:
+ table_dimensions = table.view(pd.DataFrame).shape
+
+ except ValueError:
+ raise ValueError('Cannot summarize a table with no features')
+
+ if table_dimensions[0] == 0:
+ raise ValueError('Cannot summarize a table with no samples')
+
+ _feature_frequencies = ctx.get_action('feature_table',
+ 'tabulate_feature_frequencies')
+ _sample_frequencies = ctx.get_action('feature_table',
+ 'tabulate_sample_frequencies')
+ _visualizer = ctx.get_action('feature_table',
+ 'summarize')
+
+ feature_frequencies, = _feature_frequencies(table)
+ sample_frequencies, = _sample_frequencies(table)
+ summary, = _visualizer(table, metadata)
+
+ return feature_frequencies, sample_frequencies, summary
+
+
def _compute_descriptive_stats(lst: list):
"""Basic descriptive statistics and a (parametric) seven-number summary.
@@ -302,16 +350,32 @@ def _compute_qualitative_summary(table):
return sample_count
+def _compute_qualitative_summary_sample(table):
+ feature_count = {}
+ for count_vector, sample_id, _ in table.iter():
+ feature_count[sample_id] = (count_vector != 0).sum()
+ return feature_count
+
+
def _frequencies(table, axis):
return pd.Series(data=table.sum(axis=axis), index=table.ids(axis=axis))
-def _frequency_summary(table, axis='sample'):
+def _frequency_summary(table: biom.Table, axis='sample'):
frequencies = _frequencies(table, axis=axis)
- summary = pd.Series([frequencies.min(), frequencies.quantile(0.25),
- frequencies.median(), frequencies.quantile(0.75),
- frequencies.max(), frequencies.mean()],
+ first = frequencies.quantile(0.25)
+ third = frequencies.quantile(0.75)
+ _fst = round(first, 1)
+ _min = round(frequencies.min(), 1)
+ _thd = round(third, 1)
+ _med = round(frequencies.median(), 1)
+ _max = round(frequencies.max(), 1)
+ mean = round(frequencies.mean(), 1)
+
+ summary = pd.Series([_min, _fst,
+ _med, _thd,
+ _max, mean],
index=['Minimum frequency', '1st quartile',
'Median frequency', '3rd quartile',
'Maximum frequency', 'Mean frequency'])
=====================================
q2_feature_table/_summarize/summarize_assets/index.html
=====================================
@@ -37,10 +37,6 @@
<div class="row">
<div class="col-lg-6">
{{ sample_summary_table }}
- <p class="text-left">
- Frequency per sample detail (<a href="sample-frequency-detail.csv">csv</a> |
- <a href="sample-frequency-detail.html">html</a>)
- </p>
</div>
{% if number_of_samples > 1 %}
<div class="col-lg-6">
@@ -59,10 +55,6 @@
<div class="col-lg-6">
<h1>Frequency per feature</h1>
{{ feature_summary_table }}
- <p class="text-left">
- Frequency per feature detail (<a href="feature-frequency-detail.csv">csv</a> |
- <a href="feature-frequency-detail.html">html</a>)
- </p>
</div>
{% if number_of_features > 1 %}
<div class="col-lg-6">
=====================================
q2_feature_table/_summarize/tabulate_seqs_assets/index.html
=====================================
@@ -108,7 +108,7 @@
<tr>
<td>{{ sequence }}</td>
{% if sequence in data %}
- <td><samp><a target="_blank" href="{{ sequence.url }}" rel="noopener noreferrer">{{ data[sequence].seq }}</a></samp></td>
+ <td><samp><a target="_blank" href="{{ data[sequence].url }}" rel="noopener noreferrer">{{ data[sequence].seq }}</a></samp></td>
<td>{{ data[sequence].len }}</td>
{% else %}
<td>-</td>
@@ -116,7 +116,7 @@
{% endif %}
{% if taxonomy is defined %}
{% for member in taxonomy.values() %}
- {% if sequence in member.index %}
+ {% if sequence in member.index %}
<td>{{ member.loc[sequence, "Taxon"] }}</td>
{% else %}
<td>-</td>
=====================================
q2_feature_table/_version.py
=====================================
@@ -23,9 +23,9 @@ def get_keywords():
# setup.py/versioneer.py will grep for the variable names, so they must
# each be defined on a line of their own. _version.py will just call
# get_keywords().
- git_refnames = " (tag: 2023.9.0, Release-2023.9)"
- git_full = "7cac3100e1bdc58f3d4c0b3790a3aa3973de4696"
- git_date = "2023-10-03 21:58:03 +0000"
+ git_refnames = " (tag: 2024.2.0, Release-2024.2)"
+ git_full = "8635d787a83854309dc6cb26afdc5dc5476db208"
+ git_date = "2024-02-16 21:57:35 +0000"
keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
return keywords
=====================================
q2_feature_table/plugin_setup.py
=====================================
@@ -8,12 +8,14 @@
from qiime2.plugin import (Plugin, Int, Float, Range, Metadata, Str, Bool,
Choices, MetadataColumn, Categorical, List,
- Citations, TypeMatch, TypeMap, Collection)
+ Citations, TypeMatch, TypeMap, Collection,
+ Visualization)
from q2_types.feature_table import (
FeatureTable, Frequency, RelativeFrequency, PresenceAbsence, Composition)
from q2_types.feature_data import (
FeatureData, Sequence, Taxonomy, AlignedSequence)
+from q2_types.metadata import ImmutableMetadata
import q2_feature_table
import q2_feature_table._examples as ex
@@ -56,7 +58,7 @@ plugin.methods.register_function(
)
plugin.methods.register_function(
- function=q2_feature_table.subsample,
+ function=q2_feature_table.subsample_ids,
inputs={'table': FeatureTable[Frequency]},
parameters={'subsampling_depth': Int % Range(1, None),
'axis': Str % Choices(['sample', 'feature'])},
@@ -311,7 +313,8 @@ plugin.methods.register_function(
'metadata': Metadata,
'where': Str,
'exclude_ids': Bool,
- 'filter_empty_features': Bool},
+ 'filter_empty_features': Bool,
+ 'allow_empty_table': Bool},
outputs=[('filtered_table', FeatureTable[T1])],
input_descriptions={
'table': 'The feature table from which samples should be filtered.'
@@ -341,6 +344,9 @@ plugin.methods.register_function(
'table instead of being retained.',
'filter_empty_features': 'If true, features which are not present in '
'any retained samples are dropped.',
+ 'allow_empty_table': 'If true, the filtered table may be empty. '
+ 'Default behavior is to raise an error if the '
+ 'filtered table is empty.'
},
output_descriptions={
'filtered_table': 'The resulting feature table filtered by sample.'
@@ -366,8 +372,8 @@ plugin.methods.register_function(
function=q2_feature_table.filter_features_conditionally,
inputs={'table': FeatureTable[T1]},
parameters={'prevalence': Float % Range(0, 1),
- 'abundance': Float % Range(0, 1)
- },
+ 'abundance': Float % Range(0, 1),
+ 'allow_empty_table': Bool},
outputs=[('filtered_table', FeatureTable[T1])],
input_descriptions={
'table': 'The feature table from which features should be filtered.'
@@ -377,7 +383,10 @@ plugin.methods.register_function(
'retained.'),
'prevalence': ('The minimum portion of samples that a feature '
'must have a relative abundance of at least '
- '`abundance` to be retained.')
+ '`abundance` to be retained.'),
+ 'allow_empty_table': 'If true, the filtered table may be empty. '
+ 'Default behavior is to raise an error if the '
+ 'filtered table is empty.'
},
output_descriptions={
'filtered_table': 'The resulting feature table filtered by feature.'
@@ -406,7 +415,8 @@ plugin.methods.register_function(
'metadata': Metadata,
'where': Str,
'exclude_ids': Bool,
- 'filter_empty_samples': Bool},
+ 'filter_empty_samples': Bool,
+ 'allow_empty_table': Bool},
outputs=[('filtered_table', FeatureTable[Frequency])],
input_descriptions={
'table': 'The feature table from which features should be filtered.'
@@ -436,6 +446,9 @@ plugin.methods.register_function(
'table instead of being retained.',
'filter_empty_samples': 'If true, drop any samples where none of the '
'retained features are present.',
+ 'allow_empty_table': 'If true, the filtered table may be empty. '
+ 'Default behavior is to raise an error if the '
+ 'filtered table is empty.'
},
output_descriptions={
'filtered_table': 'The resulting feature table filtered by feature.'
@@ -498,8 +511,7 @@ plugin.methods.register_function(
plugin.visualizers.register_function(
function=q2_feature_table.summarize,
- inputs={'table': FeatureTable[Frequency | RelativeFrequency |
- PresenceAbsence]},
+ inputs={'table': FeatureTable[Frequency | PresenceAbsence]},
parameters={'sample_metadata': Metadata},
input_descriptions={'table': 'The feature table to be summarized.'},
parameter_descriptions={'sample_metadata': 'The sample metadata.'},
@@ -652,3 +664,62 @@ plugin.methods.register_function(
'splits are defined by values in metadata column.',
examples={}
)
+
+plugin.methods.register_function(
+ function=q2_feature_table.tabulate_feature_frequencies,
+ inputs={'table': FeatureTable[Frequency | PresenceAbsence |
+ RelativeFrequency]},
+ parameters={},
+ outputs={'feature_frequencies': ImmutableMetadata},
+ input_descriptions={
+ 'table': 'The input feature table.'
+ },
+ output_descriptions={
+ 'feature_frequencies': 'Per-sample and total frequencies per feature.'
+ },
+ name='Tabulate feature frequencies',
+ description='Tabulate sample count and total frequency per feature.',
+ examples={'feature_table_tabulate_feature_frequencies':
+ ex.feature_table_tabulate_feature_freqs}
+)
+
+plugin.methods.register_function(
+ function=q2_feature_table.tabulate_sample_frequencies,
+ inputs={'table': FeatureTable[Frequency | PresenceAbsence |
+ RelativeFrequency]},
+ parameters={},
+ outputs={'sample_frequencies': ImmutableMetadata},
+ input_descriptions={
+ 'table': 'The input feature table.'
+ },
+ output_descriptions={
+ 'sample_frequencies': 'Observed feature count and total' +
+ ' frequencies per sample.'
+ },
+ name='Tabulate sample frequencies',
+ description='Tabulate feature count and total frequency per sample.',
+ examples={'feature_table_tabulate_sample_frequencies':
+ ex.feature_table_tabulate_sample_freqs}
+)
+
+plugin.pipelines.register_function(
+ function=q2_feature_table.summarize_plus,
+ inputs={'table': FeatureTable[Frequency | PresenceAbsence]},
+ parameters={'metadata': Metadata},
+ outputs={'feature_frequencies': ImmutableMetadata,
+ 'sample_frequencies': ImmutableMetadata,
+ 'summary': Visualization},
+ input_descriptions={
+ 'table': 'The feature table to be summarized.'
+ },
+ parameter_descriptions={'metadata': 'The sample metadata.'},
+ output_descriptions={'feature_frequencies': 'Per-sample and total ' +
+ 'frequencies per feature.',
+ 'sample_frequencies': 'Observed feature count and ' +
+ 'total frequencies per sample.',
+ 'summary': 'Visual summary of feature table'},
+ name="Summarize table plus",
+ description="Generate visual and tabular summaries of a feature table. "
+ "Tabulate sample and feature frequencies.",
+ examples={'feature_table_summarize_plus': ex.feature_table_summarize_plus}
+)
=====================================
q2_feature_table/tests/filter/test_conditional_filter.py
=====================================
@@ -40,6 +40,36 @@ class TestConditional(unittest.TestCase):
npt.assert_array_equal(known.ids(axis='observation'),
test_.ids(axis='observation'))
+ def test_allow_empty_table_true(self):
+ table = biom.Table(
+ data=np.array([[0, 0, 10, 0, 0],
+ [250, 250, 140, 90, 150],
+ [250, 25, 100, 200, 100],
+ [0, 225, 250, 210, 250]]),
+ sample_ids=['A', 'B', 'C', 'D', 'E'],
+ observation_ids=['bat', 'cat', 'rat', 'a-tat-tat']
+ )
+ test_ = filter_features_conditionally(table,
+ prevalence=0.9,
+ abundance=0.9,
+ allow_empty_table=True)
+ self.assertTrue(test_.is_empty())
+
+ def test_allow_empty_table_false(self):
+ table = biom.Table(
+ data=np.array([[0, 0, 10, 0, 0],
+ [250, 250, 140, 90, 150],
+ [250, 25, 100, 200, 100],
+ [0, 225, 250, 210, 250]]),
+ sample_ids=['A', 'B', 'C', 'D', 'E'],
+ observation_ids=['bat', 'cat', 'rat', 'a-tat-tat']
+ )
+ with self.assertRaisesRegex(ValueError, 'table is empty'):
+ filter_features_conditionally(table,
+ prevalence=0.9,
+ abundance=0.9,
+ allow_empty_table=False)
+
if __name__ == "__main__":
unittest.main()
=====================================
q2_feature_table/tests/filter/test_filter_features.py
=====================================
@@ -45,11 +45,16 @@ class FilterFeaturesTests(unittest.TestCase):
['S1', 'S2', 'S3'])
self.assertEqual(actual, expected)
- # filter all
+ # filter all raising ValueError
table = Table(np.array([[0, 1, 1], [1, 1, 2]]),
['O1', 'O2'],
['S1', 'S2', 'S3'])
- actual = filter_features(table, min_frequency=5)
+ with self.assertRaisesRegex(ValueError, 'table is empty'):
+ filter_features(table, min_frequency=5)
+
+ # filter all and allow empty table
+ actual = filter_features(table, min_frequency=5,
+ allow_empty_table=True)
self.assertTrue(actual.is_empty())
def test_filter_empty_samples(self):
@@ -64,12 +69,17 @@ class FilterFeaturesTests(unittest.TestCase):
['S1', 'S2', 'S3'])
self.assertEqual(actual, expected)
- # filter all
+ # filter all raising ValueError
table = Table(np.array([[0, 1, 1], [1, 1, 2]]),
['O1', 'O2'],
['S1', 'S2', 'S3'])
+ with self.assertRaisesRegex(ValueError, 'table is empty'):
+ filter_features(table, min_frequency=5, filter_empty_samples=False)
+
+ # filter all and allow empty table
actual = filter_features(table, min_frequency=5,
- filter_empty_samples=False)
+ filter_empty_samples=False,
+ allow_empty_table=True)
self.assertTrue(actual.is_empty())
def test_feature_metadata(self):
@@ -99,13 +109,18 @@ class FilterFeaturesTests(unittest.TestCase):
['S2', 'S3'])
self.assertEqual(actual, expected)
- # filter all
+ # filter all and raise ValueError
df = pd.DataFrame({}, index=pd.Index(['foo'], name='id'))
metadata = qiime2.Metadata(df)
table = Table(np.array([[0, 1, 3], [1, 1, 2]]),
['O1', 'O2'],
['S1', 'S2', 'S3'])
- actual = filter_features(table, metadata=metadata)
+ with self.assertRaisesRegex(ValueError, 'table is empty'):
+ filter_features(table, metadata=metadata)
+
+ # filter all and allow empty table
+ actual = filter_features(table, metadata=metadata,
+ allow_empty_table=True)
self.assertTrue(actual.is_empty())
# exclude one
@@ -122,15 +137,20 @@ class FilterFeaturesTests(unittest.TestCase):
['S1', 'S2', 'S3'])
self.assertEqual(actual, expected)
- # exclude all
+ # exclude all and raise ValueError
df = pd.DataFrame({'SequencedGenome': ['yes', 'yes']},
index=pd.Index(['O1', 'O2'], name='id'))
metadata = qiime2.Metadata(df)
table = Table(np.array([[0, 1, 3], [1, 1, 2]]),
['O1', 'O2'],
['S1', 'S2', 'S3'])
- actual = filter_features(table, metadata=metadata,
- exclude_ids=True)
+ with self.assertRaisesRegex(ValueError, 'table is empty'):
+ filter_features(table, metadata=metadata, exclude_ids=True,
+ allow_empty_table=False)
+
+ # exclude all and allow empty table
+ actual = filter_features(table, metadata=metadata, exclude_ids=True,
+ allow_empty_table=True)
self.assertTrue(actual.is_empty())
def test_where(self):
@@ -162,7 +182,7 @@ class FilterFeaturesTests(unittest.TestCase):
['S2', 'S3'])
self.assertEqual(actual, expected)
- # filter all
+ # filter all with ValueError
df = pd.DataFrame({'SequencedGenome': ['yes', 'no']},
index=pd.Index(['O1', 'O2'], name='feature-id'))
metadata = qiime2.Metadata(df)
@@ -170,7 +190,14 @@ class FilterFeaturesTests(unittest.TestCase):
['O1', 'O2'],
['S1', 'S2', 'S3'])
where = "SequencedGenome='yes' AND SequencedGenome='no'"
- actual = filter_features(table, metadata=metadata, where=where)
+
+ with self.assertRaisesRegex(ValueError, 'table is empty'):
+ filter_features(table, metadata=metadata, where=where,
+ allow_empty_table=False)
+
+ # Filter all and allow empty table
+ actual = filter_features(table, metadata=metadata, where=where,
+ allow_empty_table=True)
self.assertTrue(actual.is_empty())
# filter one -> exclude one
=====================================
q2_feature_table/tests/filter/test_filter_samples.py
=====================================
@@ -64,11 +64,18 @@ class FilterSamplesTests(unittest.TestCase):
['S3'])
self.assertEqual(actual, expected)
- # filter all
+ # filter all raising ValueError
table = Table(np.array([[0, 1, 3], [1, 1, 2]]),
['O1', 'O2'],
['S1', 'S2', 'S3'])
- actual = filter_samples(table, min_frequency=42)
+ with self.assertRaisesRegex(ValueError, 'table is empty'):
+ filter_samples(table, min_frequency=42)
+
+ # filter all and allow empty table
+ table = Table(np.array([[0, 1, 3], [1, 1, 2]]), ['O1-alt', 'O2-alt'],
+ ['S1', 'S2', 'S3'])
+ actual = filter_samples(table, min_frequency=42,
+ allow_empty_table=True)
self.assertTrue(actual.is_empty())
def test_max_frequency(self):
@@ -102,11 +109,17 @@ class FilterSamplesTests(unittest.TestCase):
['S1'])
self.assertEqual(actual, expected)
- # filter all
+ # filter all raising ValueError
table = Table(np.array([[0, 1, 3], [1, 1, 2]]),
['O1', 'O2'],
['S1', 'S2', 'S3'])
- actual = filter_samples(table, max_frequency=0)
+ with self.assertRaisesRegex(ValueError, 'table is empty'):
+ filter_samples(table, max_frequency=0)
+
+ # filter all and allow empty table
+ table = Table(np.array([[0, 1, 3], [1, 1, 2]]), ['O1-alt', 'O2-alt'],
+ ['S1', 'S2', 'S3'])
+ actual = filter_samples(table, max_frequency=0, allow_empty_table=True)
self.assertTrue(actual.is_empty())
def test_filter_empty_features(self):
@@ -143,12 +156,20 @@ class FilterSamplesTests(unittest.TestCase):
['S1'])
self.assertEqual(actual, expected)
- # filter all
+ # filter all raising ValueError
table = Table(np.array([[0, 1, 3], [1, 1, 2]]),
['O1', 'O2'],
['S1', 'S2', 'S3'])
+ with self.assertRaisesRegex(ValueError, 'table is empty'):
+ filter_samples(table, max_frequency=0,
+ filter_empty_features=False)
+
+ # filter all and allow empty table
+ table = Table(np.array([[0, 1, 3], [1, 1, 2]]), ['O1-alt', 'O2-alt'],
+ ['S1', 'S2', 'S3'])
actual = filter_samples(table, max_frequency=0,
- filter_empty_features=False)
+ filter_empty_features=False,
+ allow_empty_table=True)
self.assertTrue(actual.is_empty())
def test_min_features(self):
@@ -172,11 +193,17 @@ class FilterSamplesTests(unittest.TestCase):
['S2', 'S3'])
self.assertEqual(actual, expected)
- # filter all
+ # filter all raising ValueError
table = Table(np.array([[0, 1, 3], [1, 1, 2]]),
['O1', 'O2'],
['S1', 'S2', 'S3'])
- actual = filter_samples(table, min_features=3)
+ with self.assertRaisesRegex(ValueError, 'table is empty'):
+ filter_samples(table, min_features=3)
+
+ # filter all and allow empty table
+ table = Table(np.array([[0, 1, 3], [1, 1, 2]]), ['O1-alt', 'O2-alt'],
+ ['S1', 'S2', 'S3'])
+ actual = filter_samples(table, min_features=3, allow_empty_table=True)
self.assertTrue(actual.is_empty())
def test_max_features(self):
@@ -200,11 +227,18 @@ class FilterSamplesTests(unittest.TestCase):
['S1'])
self.assertEqual(actual, expected)
- # filter all
+ # filter all raising ValueError
table = Table(np.array([[0, 1, 3], [1, 1, 2]]),
['O1', 'O2'],
['S1', 'S2', 'S3'])
- actual = filter_samples(table, max_features=0)
+ with self.assertRaisesRegex(ValueError, 'table is empty'):
+ filter_samples(table, max_features=0)
+
+ # filter all and allow empty table
+ table = Table(np.array([[0, 1, 3], [1, 1, 2]]),
+ ['O1-alt', 'O2-alt'],
+ ['S1', 'S2', 'S3'])
+ actual = filter_samples(table, max_features=0, allow_empty_table=True)
self.assertTrue(actual.is_empty())
def test_sample_metadata(self):
@@ -236,13 +270,20 @@ class FilterSamplesTests(unittest.TestCase):
['S2', 'S3'])
self.assertEqual(actual, expected)
- # filter all
+ # filter all raising ValueError
df = pd.DataFrame({}, index=pd.Index(['foo'], name='id'))
metadata = qiime2.Metadata(df)
table = Table(np.array([[0, 1, 3], [1, 1, 2]]),
['O1', 'O2'],
['S1', 'S2', 'S3'])
- actual = filter_samples(table, metadata=metadata)
+ with self.assertRaisesRegex(ValueError, 'table is empty'):
+ filter_samples(table, metadata=metadata)
+
+ # filter all and allow empty table
+ df = pd.DataFrame({}, index=pd.Index(['foo'], name='id'))
+ metadata = qiime2.Metadata(df)
+ actual = filter_samples(table, metadata=metadata,
+ allow_empty_table=True)
self.assertTrue(actual.is_empty())
# exclude none
@@ -284,13 +325,23 @@ class FilterSamplesTests(unittest.TestCase):
['S3'])
self.assertEqual(actual, expected)
- # exclude all
+ # exclude all raising ValueError
+ df = pd.DataFrame({'Subject': ['subject-1', 'subject-1', 'subject-2'],
+ 'SampleType': ['gut', 'tongue', 'gut']},
+ index=pd.Index(['S1', 'S2', 'S3'], name='id'))
+ metadata = qiime2.Metadata(df)
+ with self.assertRaisesRegex(ValueError, 'table is empty'):
+ filter_samples(table, metadata=metadata,
+ exclude_ids=True)
+
+ # exclude all and allow empty table
df = pd.DataFrame({'Subject': ['subject-1', 'subject-1', 'subject-2'],
'SampleType': ['gut', 'tongue', 'gut']},
index=pd.Index(['S1', 'S2', 'S3'], name='id'))
metadata = qiime2.Metadata(df)
actual = filter_samples(table, metadata=metadata,
- exclude_ids=True)
+ exclude_ids=True,
+ allow_empty_table=True)
self.assertTrue(actual.is_empty())
def test_sample_metadata_extra_ids(self):
@@ -354,7 +405,7 @@ class FilterSamplesTests(unittest.TestCase):
['S1'])
self.assertEqual(actual, expected)
- # filter all
+ # filter all raising ValueError
df = pd.DataFrame({'Subject': ['subject-1', 'subject-1', 'subject-2'],
'SampleType': ['gut', 'tongue', 'gut']},
index=pd.Index(['S1', 'S2', 'S3'], name='#SampleID'))
@@ -363,7 +414,12 @@ class FilterSamplesTests(unittest.TestCase):
['O1', 'O2'],
['S1', 'S2', 'S3'])
where = "Subject='subject-1' AND Subject='subject-2'"
- actual = filter_samples(table, metadata=metadata, where=where)
+ with self.assertRaisesRegex(ValueError, 'table is empty'):
+ filter_samples(table, metadata=metadata, where=where)
+
+ # filter all allowing empty table
+ actual = filter_samples(table, metadata=metadata, where=where,
+ allow_empty_table=True)
self.assertTrue(actual.is_empty())
# filter none -> exclude none
@@ -417,7 +473,22 @@ class FilterSamplesTests(unittest.TestCase):
['S3'])
self.assertEqual(actual, expected)
- # filter all -> exclude all
+ # filter all -> exclude all raising ValueError
+ df = pd.DataFrame({'Subject': ['subject-1', 'subject-1', 'subject-2'],
+ 'SampleType': ['gut', 'tongue', 'gut']},
+ index=pd.Index(['S1', 'S2', 'S3'], name='#SampleID'))
+ metadata = qiime2.Metadata(df)
+ table = Table(np.array([[0, 1, 3], [1, 1, 2]]),
+ ['O1', 'O2'],
+ ['S1', 'S2', 'S3'])
+ where = "Subject='subject-1' OR Subject='subject-2'"
+ with self.assertRaisesRegex(ValueError, 'table is empty'):
+ filter_samples(table,
+ metadata=metadata,
+ where=where,
+ exclude_ids=True)
+
+ # exclude all and allow empty table
df = pd.DataFrame({'Subject': ['subject-1', 'subject-1', 'subject-2'],
'SampleType': ['gut', 'tongue', 'gut']},
index=pd.Index(['S1', 'S2', 'S3'], name='#SampleID'))
@@ -429,7 +500,8 @@ class FilterSamplesTests(unittest.TestCase):
actual = filter_samples(table,
metadata=metadata,
where=where,
- exclude_ids=True)
+ exclude_ids=True,
+ allow_empty_table=True)
self.assertTrue(actual.is_empty())
def test_combine_id_and_frequency_filters(self):
@@ -618,7 +690,7 @@ class FilterSamplesTests(unittest.TestCase):
self.assertEqual(actual, expected)
# exclude one, min_frequency filter one,
- # max_frequency filter one
+ # max_frequency filter one raising ValueError
df = pd.DataFrame({'Subject': ['subject-1'],
'SampleType': ['gut']},
index=pd.Index(['S1'], name='id'))
@@ -626,16 +698,25 @@ class FilterSamplesTests(unittest.TestCase):
table = Table(np.array([[0, 1, 3], [1, 1, 2]]),
['O1', 'O2'],
['S1', 'S2', 'S3'])
+ with self.assertRaisesRegex(ValueError, 'table is empty'):
+ filter_samples(table,
+ metadata=metadata,
+ exclude_ids=True,
+ max_frequency=4,
+ min_frequency=3)
+
+ # allow empty table
actual = filter_samples(table,
metadata=metadata,
exclude_ids=True,
max_frequency=4,
- min_frequency=3)
+ min_frequency=3,
+ allow_empty_table=True)
self.assertTrue(actual.is_empty())
# where filter one -> exclude one,
# min_frequency filter one,
- # max_frequency filter one
+ # max_frequency filter one raising ValueError
df = pd.DataFrame({'Subject': ['subject-1', 'subject-2'],
'SampleType': ['gut', 'tongue']},
index=pd.Index(['S1', 'S2'], name='id'))
@@ -644,12 +725,22 @@ class FilterSamplesTests(unittest.TestCase):
['O1', 'O2'],
['S1', 'S2', 'S3'])
where = "Subject='subject-1'"
+ with self.assertRaisesRegex(ValueError, 'table is empty'):
+ filter_samples(table,
+ metadata=metadata,
+ exclude_ids=True,
+ where=where,
+ max_frequency=4,
+ min_frequency=3)
+
+ # allow empty table
actual = filter_samples(table,
metadata=metadata,
exclude_ids=True,
where=where,
max_frequency=4,
- min_frequency=3)
+ min_frequency=3,
+ allow_empty_table=True)
self.assertTrue(actual.is_empty())
def test_combine_exclude_ids_and_features_filters(self):
@@ -773,7 +864,7 @@ class FilterSamplesTests(unittest.TestCase):
self.assertEqual(actual, expected)
# exclude one, max_features filter one,
- # min_features filter one
+ # min_features filter one raising ValueError
df = pd.DataFrame({'Subject': ['subject-1'],
'SampleType': ['gut']},
index=pd.Index(['S2'], name='id'))
@@ -781,16 +872,23 @@ class FilterSamplesTests(unittest.TestCase):
table = Table(np.array([[0, 1, 3], [0, 1, 2]]),
['O1', 'O2'],
['S1', 'S2', 'S3'])
+ with self.assertRaisesRegex(ValueError, 'table is empty'):
+ filter_samples(table,
+ metadata=metadata,
+ exclude_ids=True,
+ min_features=1,
+ max_features=1)
+ # allow empty table
actual = filter_samples(table,
metadata=metadata,
exclude_ids=True,
min_features=1,
- max_features=1)
- self.assertTrue(actual.is_empty())
+ max_features=1,
+ allow_empty_table=True)
# where filter one -> exclude one,
# max_features filter one,
- # min_features filter one
+ # min_features filter one raising ValueError
df = pd.DataFrame({'Subject': ['subject-1', 'subject-2'],
'SampleType': ['gut', 'tongue']},
index=pd.Index(['S1', 'S2'], name='id'))
@@ -799,12 +897,22 @@ class FilterSamplesTests(unittest.TestCase):
['O1', 'O2'],
['S1', 'S2', 'S3'])
where = "SampleType='tongue'"
+ with self.assertRaisesRegex(ValueError, 'table is empty'):
+ filter_samples(table,
+ metadata=metadata,
+ where=where,
+ exclude_ids=True,
+ min_features=1,
+ max_features=1)
+
+ # allow empty table
actual = filter_samples(table,
metadata=metadata,
where=where,
exclude_ids=True,
min_features=1,
- max_features=1)
+ max_features=1,
+ allow_empty_table=True)
self.assertTrue(actual.is_empty())
=====================================
q2_feature_table/tests/filter/test_filter_sequences.py
=====================================
@@ -31,8 +31,8 @@ class FilterSeqsTests(unittest.TestCase):
md_full.index.name = 'FeatureID'
self.md_full = qiime2.Metadata(md_full)
- def filter_and_assertEqual(self, exp, md=None, exclude_ids=False,
- where=None):
+ def _filter_and_assertEqual(self, exp, md=None, exclude_ids=False,
+ where=None):
if md is None:
md = self.md_full
obs = filter_seqs(self.seqs, metadata=md,
@@ -41,13 +41,13 @@ class FilterSeqsTests(unittest.TestCase):
def test_id_based_filtering(self):
# filter none
- self.filter_and_assertEqual(self.seqs,
- md=qiime2.Metadata(self.df_lite))
+ self._filter_and_assertEqual(self.seqs,
+ md=qiime2.Metadata(self.df_lite))
# filter one
md = qiime2.Metadata(self.df_lite.drop(['O1']))
exp = pd.Series(['GCTA', 'CCCC', 'TGTT'], index=['O2', 'O3', 'O4'])
- self.filter_and_assertEqual(exp, md=md)
+ self._filter_and_assertEqual(exp, md=md)
# filter all
md = qiime2.Metadata(pd.DataFrame({},
@@ -58,12 +58,12 @@ class FilterSeqsTests(unittest.TestCase):
# exclude none
md = qiime2.Metadata(pd.DataFrame({},
index=pd.Index(['foo'], name='id')))
- self.filter_and_assertEqual(self.seqs, md=md, exclude_ids=True)
+ self._filter_and_assertEqual(self.seqs, md=md, exclude_ids=True)
# exclude one
md = qiime2.Metadata(self.df_lite.drop(['O1', 'O2', 'O3']))
exp = pd.Series(['ACGT', 'GCTA', 'CCCC'], index=['O1', 'O2', 'O3'])
- self.filter_and_assertEqual(exp, md=md, exclude_ids=True)
+ self._filter_and_assertEqual(exp, md=md, exclude_ids=True)
# exclude all
md = qiime2.Metadata(self.df_lite)
@@ -74,17 +74,17 @@ class FilterSeqsTests(unittest.TestCase):
md = qiime2.Metadata(pd.DataFrame([],
index=pd.Index(['O1', 'O3', 'foo'], name='id')))
exp = pd.Series(['ACGT', 'CCCC'], index=['O1', 'O3'])
- self.filter_and_assertEqual(exp, md=md)
+ self._filter_and_assertEqual(exp, md=md)
def test_where_param(self):
# filter none
where = "stuff='foo' OR stuff='bar' OR stuff='baz'"
- self.filter_and_assertEqual(self.seqs, where=where)
+ self._filter_and_assertEqual(self.seqs, where=where)
# filter one
where = "stuff='foo' OR stuff='bar'"
exp = pd.Series(['ACGT', 'GCTA', 'TGTT'], index=['O1', 'O2', 'O4'])
- self.filter_and_assertEqual(exp, where=where)
+ self._filter_and_assertEqual(exp, where=where)
# filter all
where = "stuff='boo'"
@@ -93,12 +93,12 @@ class FilterSeqsTests(unittest.TestCase):
# exclude none
where = 'CAST(some_numbers AS INTEGER) < 0'
- self.filter_and_assertEqual(self.seqs, exclude_ids=True, where=where)
+ self._filter_and_assertEqual(self.seqs, exclude_ids=True, where=where)
# exclude one
where = 'CAST(some_numbers AS INTEGER) > 3'
exp = pd.Series(['ACGT', 'GCTA', 'CCCC'], index=['O1', 'O2', 'O3'])
- self.filter_and_assertEqual(exp, exclude_ids=True, where=where)
+ self._filter_and_assertEqual(exp, exclude_ids=True, where=where)
# exclude all
where = 'CAST(some_numbers AS INTEGER) BETWEEN 0 AND 5'
=====================================
q2_feature_table/tests/test_subsample.py
=====================================
@@ -12,16 +12,16 @@ import numpy as np
import numpy.testing as npt
from biom.table import Table
-from q2_feature_table import subsample
+from q2_feature_table import subsample_ids
-class SubsampleTests(TestCase):
+class SubsampleIDsTests(TestCase):
def test_subsample_samples(self):
t = Table(np.array([[0, 1, 3], [1, 1, 2]]),
['O1', 'O2'],
['S1', 'S2', 'S3'])
- a = subsample(t, 2, 'sample')
+ a = subsample_ids(t, 2, 'sample')
self.assertEqual(a.shape, (2, 2))
sample_ids = frozenset(a.ids(axis='sample'))
@@ -38,7 +38,7 @@ class SubsampleTests(TestCase):
t = Table(np.array([[0, 1, 3], [1, 1, 2]]).T,
['O1', 'O2', 'O3'],
['S1', 'S2'])
- a = subsample(t, 2, 'feature')
+ a = subsample_ids(t, 2, 'feature')
self.assertEqual(a.shape, (2, 2))
sample_ids = frozenset(a.ids(axis='observation'))
@@ -56,28 +56,28 @@ class SubsampleTests(TestCase):
['O1', 'O2', 'O3'],
['S1', 'S2'])
with self.assertRaisesRegex(ValueError, "depth exceeds"):
- subsample(t, 10, 'sample')
+ subsample_ids(t, 10, 'sample')
def test_subsample_features_oversample(self):
t = Table(np.array([[0, 1, 3], [1, 1, 2]]).T,
['O1', 'O2', 'O3'],
['S1', 'S2'])
with self.assertRaisesRegex(ValueError, "depth exceeds"):
- subsample(t, 10, 'feature')
+ subsample_ids(t, 10, 'feature')
def test_subsample_samples_empty(self):
t = Table(np.array([[0, 0, 0], [0, 0, 0]]).T,
['O1', 'O2', 'O3'],
['S1', 'S2'])
with self.assertRaisesRegex(ValueError, "contains no"):
- subsample(t, 2, 'sample')
+ subsample_ids(t, 2, 'sample')
def test_subsample_features_empty(self):
t = Table(np.array([[0, 0, 0], [0, 0, 0]]).T,
['O1', 'O2', 'O3'],
['S1', 'S2'])
with self.assertRaisesRegex(ValueError, "contains no"):
- subsample(t, 2, 'feature')
+ subsample_ids(t, 2, 'feature')
if __name__ == "__main__":
=====================================
q2_feature_table/tests/test_summarize.py
=====================================
@@ -16,9 +16,13 @@ import pandas as pd
import numpy as np
import qiime2
from q2_types.feature_data import DNAIterator
+from qiime2.plugin.testing import TestPluginBase
+from qiime2 import Artifact, Metadata
import csv
-from q2_feature_table import tabulate_seqs, summarize
+from q2_feature_table import (
+ tabulate_seqs, summarize,
+ tabulate_feature_frequencies, tabulate_sample_frequencies)
from q2_feature_table._summarize._visualizer import _compute_descriptive_stats
from q2_feature_table._summarize._visualizer import _frequencies
from q2_feature_table._summarize._vega_spec import vega_spec
@@ -379,16 +383,6 @@ class SummarizeTests(TestCase):
index_fp = os.path.join(output_dir, 'index.html')
self.assertTrue(os.path.exists(index_fp))
- feature_freq_fp = os.path.join(output_dir,
- 'feature-frequency-detail.csv')
- self.assertTrue(os.path.exists(feature_freq_fp))
- self.assertTrue('O1,4' in open(feature_freq_fp).read())
-
- sample_freq_fp = os.path.join(output_dir,
- 'sample-frequency-detail.csv')
- self.assertTrue(os.path.exists(sample_freq_fp))
- self.assertTrue('S1,1453' in open(sample_freq_fp).read())
-
def test_frequency_ranges_are_zero(self):
table = biom.Table(np.array([[25, 25, 25], [25, 25, 25]]),
['O1', 'O2'],
@@ -400,16 +394,6 @@ class SummarizeTests(TestCase):
index_fp = os.path.join(output_dir, 'index.html')
self.assertTrue(os.path.exists(index_fp))
- feature_freq_fp = os.path.join(output_dir,
- 'feature-frequency-detail.csv')
- self.assertTrue(os.path.exists(feature_freq_fp))
- self.assertTrue('O1,75' in open(feature_freq_fp).read())
-
- sample_freq_fp = os.path.join(output_dir,
- 'sample-frequency-detail.csv')
- self.assertTrue(os.path.exists(sample_freq_fp))
- self.assertTrue('S1,50' in open(sample_freq_fp).read())
-
def test_one_sample(self):
sample_frequencies_pdf_fn = 'sample-frequencies.pdf'
# sample-frequencies.pdf should not be written when there is only
@@ -471,16 +455,6 @@ class SummarizeTests(TestCase):
index_fp = os.path.join(output_dir, 'index.html')
self.assertTrue(os.path.exists(index_fp))
- feature_freq_fp = os.path.join(output_dir,
- 'feature-frequency-detail.csv')
- self.assertTrue(os.path.exists(feature_freq_fp))
- self.assertTrue('O1,4' in open(feature_freq_fp).read())
-
- sample_freq_fp = os.path.join(output_dir,
- 'sample-frequency-detail.csv')
- self.assertTrue(os.path.exists(sample_freq_fp))
- self.assertTrue('S1,1' in open(sample_freq_fp).read())
-
def test_vega_spec_data(self):
# test if metadata is converted correctly to vega compatible JSON
df = pd.DataFrame({'Subject': ['subject-1', 'subject-1', 'subject-2'],
@@ -516,5 +490,133 @@ class SummarizeTests(TestCase):
self.assertEqual(spec['data'][0]['values'], exp)
+class TabulateSampleFrequencyTests(TestCase):
+
+ def test_basic_case(self):
+ table = biom.Table(np.array([[0, 25, 25], [25, 25, 25]]),
+ ['O1', 'O2'],
+ ['S1', 'S2', 'S3'])
+ obs = tabulate_sample_frequencies(table).to_dataframe()
+
+ exp = pd.DataFrame({'Frequency': ['25.0', '50.0', '50.0'],
+ 'No. of Associated Features':
+ ['1', '2', '2']},
+ index=['S1', 'S2', 'S3'])
+ exp.index.name = 'Sample ID'
+ pd.testing.assert_frame_equal(exp, obs)
+
+
+class TabulateFeatureFrequencyTests(TestCase):
+
+ def test_basic_case(self):
+ table = biom.Table(np.array([[25, 25, 0], [25, 25, 25]]),
+ ['O1', 'O2'],
+ ['S1', 'S2', 'S3'])
+ obs = tabulate_feature_frequencies(table).to_dataframe()
+
+ exp = pd.DataFrame({'Frequency': ['50.0', '75.0'],
+ 'No. of Samples Observed In':
+ ['2', '3']},
+ index=['O1', 'O2'])
+ exp.index.name = 'Feature ID'
+ pd.testing.assert_frame_equal(exp, obs)
+
+
+class SummarizePlusTests(TestPluginBase):
+
+ package = 'q2_feature_table'
+
+ def setUp(self):
+ super().setUp()
+ self.summarize_plus = self.plugin.pipelines['summarize_plus']
+
+ def test_basic(self):
+ table = biom.Table(np.array([[25, 0, 25], [25, 25, 25]]),
+ ['O1', 'O2'],
+ ['S1', 'S2', 'S3'])
+ table = Artifact.import_data('FeatureTable[Frequency]', table)
+ results = self.summarize_plus(table)
+
+ self.assertEqual(len(results), 3)
+ self.assertEqual(repr(results.feature_frequencies.type),
+ 'ImmutableMetadata')
+ self.assertEqual(repr(results.sample_frequencies.type),
+ 'ImmutableMetadata')
+ self.assertEqual(repr(results.summary.type),
+ 'Visualization')
+
+ exp_feature = pd.DataFrame({'Frequency': ['50.0', '75.0'],
+ 'No. of Samples Observed In':
+ ['2', '3']},
+ index=['O1', 'O2'])
+ exp_feature.index.name = "Feature ID"
+ obs_feature = results[0].view(Metadata).to_dataframe()
+ pd.testing.assert_frame_equal(exp_feature, obs_feature)
+
+ exp_sample = pd.DataFrame({'Frequency': ['50.0', '25.0', '50.0'],
+ 'No. of Associated Features':
+ ['2', '1', '2']},
+ index=['S1', 'S2', 'S3'])
+ exp_sample.index.name = "Sample ID"
+ obs_sample = results[1].view(Metadata).to_dataframe()
+ pd.testing.assert_frame_equal(exp_sample, obs_sample)
+
+ def test_no_samples(self):
+ table = biom.Table(np.array([[], []]),
+ ['O1', 'O2'],
+ [])
+ table = Artifact.import_data('FeatureTable[Frequency]', table)
+
+ with self.assertRaises(ValueError) as context:
+ self.summarize_plus(table)
+
+ self.assertTrue('Cannot summarize a table with no samples' in
+ context.exception)
+
+ def test_no_features(self):
+ table = biom.Table(np.array([]),
+ [],
+ ['S1', 'S2', 'S3'])
+ table = Artifact.import_data('FeatureTable[Frequency]', table)
+
+ with self.assertRaises(ValueError) as context:
+
+ self.summarize_plus(table)
+
+ self.assertTrue('Cannot summarize a table with no features' in
+ context.exception)
+
+ def test_all_zeros(self):
+ table = biom.Table(np.array([[0, 0, 0], [0, 0, 0]]),
+ ['O1', 'O2'],
+ ['S1', 'S2', 'S3'])
+ table = Artifact.import_data('FeatureTable[Frequency]', table)
+ results = self.summarize_plus(table)
+
+ self.assertEqual(len(results), 3)
+ self.assertEqual(repr(results.feature_frequencies.type),
+ 'ImmutableMetadata')
+ self.assertEqual(repr(results.sample_frequencies.type),
+ 'ImmutableMetadata')
+ self.assertEqual(repr(results.summary.type),
+ 'Visualization')
+
+ exp_feature = pd.DataFrame({'Frequency': ['0.0', '0.0'],
+ 'No. of Samples Observed In':
+ ['0', '0']},
+ index=['O1', 'O2'])
+ exp_feature.index.name = "Feature ID"
+ obs_feature = results[0].view(Metadata).to_dataframe()
+ pd.testing.assert_frame_equal(exp_feature, obs_feature)
+
+ exp_sample = pd.DataFrame({'Frequency': ['0.0', '0.0', '0.0'],
+ 'No. of Associated Features':
+ ['0', '0', '0']},
+ index=['S1', 'S2', 'S3'])
+ exp_sample.index.name = "Sample ID"
+ obs_sample = results[1].view(Metadata).to_dataframe()
+ pd.testing.assert_frame_equal(exp_sample, obs_sample)
+
+
if __name__ == "__main__":
main()
View it on GitLab: https://salsa.debian.org/med-team/q2-feature-table/-/commit/66136d9a5b3c89a1cb1e483f9d48ae0e09d942b2
--
View it on GitLab: https://salsa.debian.org/med-team/q2-feature-table/-/commit/66136d9a5b3c89a1cb1e483f9d48ae0e09d942b2
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20240218/1c79dd7e/attachment-0001.htm>
More information about the debian-med-commit
mailing list