[med-svn] [python-biom-format] 01/02: Imported Upstream version 2.1.5+dfsg
Andreas Tille
tille at debian.org
Mon Oct 26 21:33:18 UTC 2015
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch master
in repository python-biom-format.
commit bbda645b611f86d43e80b616d8481d1b05d474f0
Author: Andreas Tille <tille at debian.org>
Date: Mon Oct 26 22:32:58 2015 +0100
Imported Upstream version 2.1.5+dfsg
---
ChangeLog.md | 132 +-
MANIFEST.in | 1 -
PKG-INFO | 4 +-
README.md | 9 +-
biom/__init__.py | 17 +-
biom/_filter.pyx | 2 +-
biom/_subsample.pyx | 26 +-
biom/cli/__init__.py | 31 +
biom/cli/installation_informer.py | 121 +
biom/cli/metadata_adder.py | 186 ++
biom/cli/table_converter.py | 209 ++
biom/cli/table_head.py | 46 +
biom/cli/table_normalizer.py | 74 +
biom/cli/table_subsetter.py | 139 +
biom/cli/table_summarizer.py | 136 +
biom/{commands => cli}/table_validator.py | 121 +-
biom/cli/uc_processor.py | 85 +
biom/cli/util.py | 35 +
biom/commands/installation_informer.py | 123 -
biom/commands/metadata_adder.py | 165 -
biom/commands/table_converter.py | 221 --
biom/commands/table_subsetter.py | 122 -
biom/commands/table_summarizer.py | 159 -
biom/err.py | 1 -
biom/interfaces/__init__.py | 0
biom/interfaces/html/__init__.py | 0
biom/interfaces/html/config/__init__.py | 0
biom/interfaces/html/config/add_metadata.py | 99 -
biom/interfaces/html/config/convert.py | 74 -
biom/interfaces/html/config/show_install_info.py | 36 -
biom/interfaces/html/config/summarize_table.py | 53 -
biom/interfaces/html/config/validate_table.py | 57 -
biom/interfaces/html/input_handler.py | 56 -
biom/interfaces/optparse/__init__.py | 0
biom/interfaces/optparse/config/__init__.py | 0
biom/interfaces/optparse/config/add_metadata.py | 112 -
biom/interfaces/optparse/config/convert.py | 81 -
.../optparse/config/show_install_info.py | 39 -
biom/interfaces/optparse/config/subset_table.py | 77 -
biom/interfaces/optparse/config/summarize_table.py | 64 -
biom/interfaces/optparse/config/validate_table.py | 69 -
biom/interfaces/optparse/input_handler.py | 79 -
biom/interfaces/optparse/output_handler.py | 80 -
biom/parse.py | 166 +-
biom/table.py | 728 +++--
biom/util.py | 77 +-
doc/conf.py | 6 +-
doc/documentation/biom_conversion.rst | 2 +-
doc/documentation/generated/biom.load_table.rst | 6 -
.../generated/biom.table.Table.__eq__.rst | 8 -
.../generated/biom.table.Table.__format__.rst | 8 -
.../generated/biom.table.Table.__getitem__.rst | 8 -
.../generated/biom.table.Table.__init__.rst | 8 -
.../generated/biom.table.Table.__iter__.rst | 8 -
.../generated/biom.table.Table.__ne__.rst | 8 -
.../generated/biom.table.Table.__new__.rst | 8 -
.../generated/biom.table.Table.__reduce__.rst | 8 -
.../generated/biom.table.Table.__reduce_ex__.rst | 8 -
.../generated/biom.table.Table.__repr__.rst | 8 -
.../generated/biom.table.Table.__sizeof__.rst | 8 -
.../generated/biom.table.Table.__str__.rst | 8 -
.../biom.table.Table.__subclasshook__.rst | 8 -
.../generated/biom.table.Table._axis_to_num.rst | 8 -
.../generated/biom.table.Table._cast_metadata.rst | 8 -
.../biom.table.Table._conv_to_self_type.rst | 8 -
.../generated/biom.table.Table._data_equality.rst | 8 -
.../biom.table.Table._extract_data_from_tsv.rst | 8 -
.../generated/biom.table.Table._get_col.rst | 8 -
.../generated/biom.table.Table._get_row.rst | 8 -
.../biom.table.Table._get_sparse_data.rst | 8 -
.../generated/biom.table.Table._index.rst | 8 -
.../generated/biom.table.Table._index_ids.rst | 8 -
.../biom.table.Table._intersect_id_order.rst | 8 -
.../generated/biom.table.Table._invert_axis.rst | 8 -
.../generated/biom.table.Table._iter_obs.rst | 8 -
.../generated/biom.table.Table._iter_samp.rst | 8 -
.../generated/biom.table.Table._to_dense.rst | 8 -
.../generated/biom.table.Table._to_sparse.rst | 8 -
.../generated/biom.table.Table._union_id_order.rst | 8 -
.../biom.table.Table._verify_metadata.rst | 8 -
.../biom.table.Table.add_group_metadata.rst | 8 -
.../generated/biom.table.Table.add_metadata.rst | 8 -
.../generated/biom.table.Table.collapse.rst | 8 -
.../generated/biom.table.Table.copy.rst | 8 -
.../generated/biom.table.Table.data.rst | 8 -
.../generated/biom.table.Table.delimited_self.rst | 8 -
.../biom.table.Table.descriptive_equality.rst | 8 -
.../generated/biom.table.Table.dtype.rst | 8 -
.../generated/biom.table.Table.exists.rst | 8 -
.../generated/biom.table.Table.filter.rst | 8 -
.../generated/biom.table.Table.from_hdf5.rst | 8 -
.../generated/biom.table.Table.from_json.rst | 8 -
.../generated/biom.table.Table.from_tsv.rst | 8 -
.../biom.table.Table.get_table_density.rst | 8 -
.../biom.table.Table.get_value_by_ids.rst | 8 -
.../generated/biom.table.Table.group_metadata.rst | 8 -
.../generated/biom.table.Table.ids.rst | 8 -
.../generated/biom.table.Table.index.rst | 8 -
.../generated/biom.table.Table.is_empty.rst | 8 -
.../generated/biom.table.Table.iter.rst | 8 -
.../generated/biom.table.Table.iter_data.rst | 8 -
.../generated/biom.table.Table.iter_pairwise.rst | 8 -
.../generated/biom.table.Table.matrix_data.rst | 8 -
.../generated/biom.table.Table.max.rst | 8 -
.../generated/biom.table.Table.merge.rst | 8 -
.../generated/biom.table.Table.metadata.rst | 8 -
.../generated/biom.table.Table.min.rst | 8 -
.../generated/biom.table.Table.nnz.rst | 8 -
.../generated/biom.table.Table.nonzero.rst | 8 -
.../generated/biom.table.Table.nonzero_counts.rst | 8 -
.../generated/biom.table.Table.norm.rst | 8 -
.../generated/biom.table.Table.pa.rst | 8 -
.../generated/biom.table.Table.partition.rst | 8 -
.../generated/biom.table.Table.reduce.rst | 8 -
doc/documentation/generated/biom.table.Table.rst | 98 -
.../generated/biom.table.Table.shape.rst | 8 -
.../generated/biom.table.Table.sort.rst | 8 -
.../generated/biom.table.Table.sort_order.rst | 8 -
.../generated/biom.table.Table.subsample.rst | 8 -
.../generated/biom.table.Table.sum.rst | 8 -
.../generated/biom.table.Table.to_hdf5.rst | 8 -
.../generated/biom.table.Table.to_json.rst | 8 -
.../generated/biom.table.Table.to_tsv.rst | 8 -
.../generated/biom.table.Table.transform.rst | 8 -
.../generated/biom.table.Table.transpose.rst | 8 -
doc/index.rst | 88 +-
examples/asasd | 7 -
examples/asd.hdf5 | Bin 28920 -> 0 bytes
examples/asdasdsd | Bin 35720 -> 0 bytes
examples/bar_hdf5 | Bin 35720 -> 0 bytes
examples/bar_json | 1 -
examples/bar_tsv | 7 -
examples/foo_hdf5 | Bin 35720 -> 0 bytes
examples/foo_json | 1 -
examples/foo_tsv | 7 -
examples/foobar_hdf5 | Bin 33352 -> 0 bytes
examples/foobar_json | 1 -
examples/foobar_json2 | 1 -
examples/foobar_tsv | 7 -
examples/qweqweqwasd | 1 -
examples/rich_sparse_otu_table.biom.gz | Bin 735 -> 0 bytes
examples/wtf | Bin 35720 -> 0 bytes
examples/wtf2 | 1 -
scripts/biom | 22 -
scripts/serve-biom | 11 -
setup.cfg | 4 +-
setup.py | 41 +-
{biom/commands => tests}/__init__.py | 0
.../test_cli/__init__.py | 4 +-
tests/test_cli/test_add_metadata.py | 134 +
tests/test_cli/test_show_install_info.py | 24 +
tests/test_cli/test_subset_table.py | 122 +
tests/test_cli/test_summarize_table.py | 122 +
tests/test_cli/test_table_converter.py | 292 ++
tests/test_cli/test_table_normalizer.py | 50 +
tests/test_cli/test_uc_processor.py | 109 +
tests/test_cli/test_validate_table.py | 718 +++++
tests/test_err.py | 218 ++
tests/test_parse.py | 1692 ++++++++++
tests/test_table.py | 3376 ++++++++++++++++++++
tests/test_util.py | 384 +++
161 files changed, 9276 insertions(+), 2993 deletions(-)
diff --git a/ChangeLog.md b/ChangeLog.md
index 3b3f023..0eeec8e 100644
--- a/ChangeLog.md
+++ b/ChangeLog.md
@@ -1,18 +1,131 @@
BIOM-Format ChangeLog
=====================
+biom 2.1.5
+----------
+
+New features and bug fixes, released on 21 October 2015.
+
+Changes:
+
+* Codebase is now Python 2/3 compatible. It is currently tested with Python
+ versions 2.7, 3.4 and 3.5.
+* `biom-serve` and the accompanying html interface has been removed.
+
+New Features:
+
+* `Table.head` has been added to retrieve the first few rows and or columns
+ from a table. This can be accessed through the new ``biom head`` command.
+ See [issue #639](https://github.com/biocore/biom-format/issues/639).
+* ``biom.parse.from_uc`` has been added to support creation of ``biom.Table``
+ objects from vsearch/uclust/usearch ``.uc`` files. This can be accessed
+ through the new ``biom from-uc`` command. See
+ [issue #648](https://github.com/biocore/biom-format/issues/648).
+* Codebase now uses [click](http://click.pocoo.org) instead of
+ [pyqi](https://github.com/biocore/pyqi) for its command line interface.
+ See [issue #631](https://github.com/biocore/biom-format/issues/631).
+
+Bug fixes:
+
+* `Table.update_ids` strict check was too aggressive. See
+ [issue #633](https://github.com/biocore/biom-format/issues/633).
+* `biom --version` now prints the software version (previously the individual
+ commands did this, but not the base command).
+
+biom 2.1.4
+----------
+
+Bug fixes, released on April 22nd 2015
+
+Changes:
+
+* Codebase updated to reflect pep8 1.6.x
+
+New features:
+
+* `Table.to_hdf5` and `Table.from_hdf5` now support custom parsers and
+ formatters, see issue #608
+
+Bug fixes:
+
+* `Table.update_ids` was not updating the internal ID lookup caches, issue #599
+* `--is-json` has been removed from the table validator as it was being ignored
+ anyway, issue #591
+* `biom summarize-table` can now properly interact with pipes. This previously
+ worked on OSX but did not on Linux. Issue #598
+* `biom convert` was recording the wrong version information from HDF5 -> JSON,
+ issue #595
+* `Table.collapse`, under `one_to_many` was not constructing the resulting
+ matrix properly, issue #606
+* Improve error message when trying to load an empty file, issue #614.
+* Improve error handling when filtering tables, and return tables of shape
+ `(0, n)` instead of `(0, 0)` when fully filtering out a table along an
+ axis, issue #620
+* Fix `Table.nonzero` to work on data that is not already in csr, issue #625.
+
+biom 2.1.3
+----------
+
+Minor fixes, released on January 29, 2014
+
+Bug fixes:
+
+* Improve error message when trying to load an HDF5 file without h5py being
+ installed.
+* Allow validating json files when h5py is not installed.
+
+biom 2.1.2
+----------
+
+Minor fixes, released on December 18, 2014
+
+Bug fixes:
+
+* Remove syntax error from `normalize_table.py`.
+* `Table.to_json` was not serializing empty tables properly, see #571
+* `biom summarize-table` could not handle empty tables, see #571
+
+biom 2.1.1
+----------
+
+Minor fixes and performance improvements, released on November 19th 2014
+
+Changes:
+
+* The collapsing function to `Table.collapse` is now passed the entire table to
+ allow for more complex collapses (e.g., median, random selection, etc). See
+ #544, #545 and #547.
+* Updated version strings in the project to be
+ [Semantic Versioning](www.semver.org)-stlye. This better matches with other
+ open source python projects, and plays nicer with pip.
+* Conversion from TSV now takes less memory. See #551.
+* Parameter header_mark has been removed from _extract_data_from_tsv()
+ in table.py
+* Order of magnitude improvement in parsing HDF5 BIOM tables, see #529
+* Added `Table.length`, see #548
+* Order of magnitude performance increase in `Table.nonzero`, see #538
+
+Bug fixes:
+
+* Ensure that a copy is performed in `Table.subsample`
+* Avoided a memory leak when checking if a table is JSON or TSV, see #552.
+
biom 2.1
--------
Format finalization, released on August 7th 2014
-New features:
+New features:
* Group metadata (e.g., a phylogenetic tree) can now be stored within the HDF5
representation. These data are available within the `Table` object
* Matrix data can now be accessed by the ``Table.matrix_data`` property
* ``Table`` IDs are now accessed via the ``Table.ids`` method
* ``Table`` metadata are now accessed via the ``Table.metadata`` method
+* New method ``Table.update_ids``, which allows for updating the ids along
+ either axis.
+* added ``normalize-table`` option to optparse and HTML interfaces which
+ utilizes the new TableNormalizer command from ``table_normalizer.py``
Changes:
@@ -28,11 +141,12 @@ Changes:
* iter methods now support dense/sparse
* added ``Table.matrix_data`` property
* ``Table.filter`` yields a sparse vector, see #470
-* ``Table.subsample`` can now sample by IDs (e.g., get a random subset of
+* ``Table.subsample`` can now sample by IDs (e.g., get a random subset of
samples or observations from a ``Table``).
-* ``biom.util.generate_subsamples`` will generate an infinite number of
+* ``biom.util.generate_subsamples`` will generate an infinite number of
subsamples and can be used for rarefaction.
* ``biom summarize-table`` can now operate on observations.
+* 10% performance boost in ``Table.subsample``, see #532
Bug fixes:
@@ -78,10 +192,10 @@ Changes:
been combined into `Table.partition`, which takes an axis argument
* `Table.collapse_samples_by_metadata` and
`Table.collapse_observations_by_metadata` have been combined into
- `Table.collapse`, which now takes an axis argument
-* `Table.filter_samples` and `Table.filter_observations` have been combined
- into `Table.filter`, which now takes an axis argument
-* `Table.transform_samples` and `Table.transform_observations` have been
+ `Table.collapse`, which now takes an axis argument
+* `Table.filter_samples` and `Table.filter_observations` have been combined
+ into `Table.filter`, which now takes an axis argument
+* `Table.transform_samples` and `Table.transform_observations` have been
combined into `Table.transform`, which now takes an axis argument
* `Table.norm_sample_by_observation` and `Table.norm_observation_by_sample`
have been combined into `Table.norm`, which now takes an axis argument
@@ -149,8 +263,8 @@ New Features:
* ```Table.delimitedSelf``` now has an additional argument, ```observation_column_name```, which allows the user to specify the name of the first column in the output table (e.g. 'OTU ID', 'Taxon', etc.).
* Added new ```Table.transpose``` method.
* ```Table.__init``` has change from ```__init__(self, data, sample_ids, observation_ids, sample_metadata=None,
-observation_metadata=None, table_id=None, type=None, **kwargs)``` to ```__init__(self, data, observation_ids, sample_ids, observation_metadata=None, sample_metadata=None, table_id=None, type=None, **kwargs)``` This is for clarity, the data is in the same order as the arguments to the constructor.
-*```table_factory``` has changed from ```table_factory(data, sample_ids, observation_ids, sample_metadata=None, observation_metadata=None, table_id=None, input_is_dense=False, transpose=False, **kwargs)``` to ```table_factory(data, observation_ids, sample_ids, observation_metadata=None, sample_metadata=None, table_id=None, input_is_dense=False, transpose=False, **kwargs)``` This is for clarity, the data is in the same order as the arguments to the function.
+observation_metadata=None, table_id=None, type=None, **kwargs)``` to ```__init__(self, data, observation_ids, sample_ids, observation_metadata=None, sample_metadata=None, table_id=None, type=None, **kwargs)``` This is for clarity, the data is in the same order as the arguments to the constructor.
+*```table_factory``` has changed from ```table_factory(data, sample_ids, observation_ids, sample_metadata=None, observation_metadata=None, table_id=None, input_is_dense=False, transpose=False, **kwargs)``` to ```table_factory(data, observation_ids, sample_ids, observation_metadata=None, sample_metadata=None, table_id=None, input_is_dense=False, transpose=False, **kwargs)``` This is for clarity, the data is in the same order as the arguments to the function.
Changes:
diff --git a/MANIFEST.in b/MANIFEST.in
index 5e27d9a..7f908c9 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -5,7 +5,6 @@ include ChangeLog.md
graft biom
graft support_files
graft examples
-graft scripts
graft doc
prune docs/_build
diff --git a/PKG-INFO b/PKG-INFO
index 244a02d..6bcbffe 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
-Metadata-Version: 1.0
+Metadata-Version: 1.1
Name: biom-format
-Version: 2.1
+Version: 2.1.5
Summary: Biological Observation Matrix (BIOM) format
Home-page: http://www.biom-format.org
Author: Daniel McDonald
diff --git a/README.md b/README.md
index 5db966b..6aba07f 100644
--- a/README.md
+++ b/README.md
@@ -1,10 +1,15 @@
README
======
-[](https://travis-ci.org/biocore/biom-format) [](https://coveralls.io/r/biocore/biom-format)
+[](https://gitter.im/biocore/biom-format?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
-**BIOM 2.1 with metadata structure updates are coming on June 30th**
+[](https://travis-ci.org/biocore/biom-format) [](https://coveralls.io/r/biocore/biom-format)
The BIOM file format (canonically pronounced *biome*) is designed to be a general-use format for representing counts of observations (e.g., OTUs, KO categories, lipid types) in one or more biological samples (e.g., microbiome samples, genomes, metagenomes).
Further details can be found at http://biom-format.org.
+
+Getting help
+------------
+
+To get help with biom, you should use the [biom](http://stackoverflow.com/questions/tagged/biom) tag on StackOverflow (SO), or post the the [QIIME Forum](http://forum.qiime.org). Before posting a question, check out SO's guide on how to [ask a question](http://stackoverflow.com/questions/how-to-ask). The biom-format developers regularly monitor the `biom` SO tag.
diff --git a/biom/__init__.py b/biom/__init__.py
old mode 100644
new mode 100755
index 20a92c9..388709c
--- a/biom/__init__.py
+++ b/biom/__init__.py
@@ -41,28 +41,28 @@ either in TSV, HDF5, JSON, gzip'd JSON or gzip'd TSV and parse accordingly:
"""
# ----------------------------------------------------------------------------
-# Copyright (c) 2011-2013, The BIOM Format Development Team.
+# Copyright (c) 2011-2015, The BIOM Format Development Team.
#
# Distributed under the terms of the Modified BSD License.
#
# The full license is in the file COPYING.txt, distributed with this software.
# ----------------------------------------------------------------------------
+from .table import Table
+from .parse import parse_biom_table as parse_table, load_table
+from .util import __format_version__, __version__
+
__author__ = "Daniel McDonald"
__copyright__ = "Copyright 2011-2013, The BIOM Format Development Team"
__credits__ = ["Daniel McDonald", "Jai Ram Rideout", "Greg Caporaso",
"Jose Clemente", "Justin Kuczynski", "Antonio Gonzalez",
"Yoshiki Vazquez Baeza", "Jose Navas", "Adam Robbins-Pianka",
- "Rob Knight", "Joshua Shorenstein", "Emily TerAvest"]
+ "Rob Knight", "Joshua Shorenstein", "Emily TerAvest",
+ "Michael Shaffer"]
__license__ = "BSD"
__url__ = "http://biom-format.org"
-__version__ = "2.1"
__maintainer__ = "Daniel McDonald"
__email__ = "daniel.mcdonald at colorado.edu"
-__format_version__ = (2, 1)
-
-from .table import Table
-from .parse import parse_biom_table as parse_table, load_table
example_table = Table([[0, 1, 2], [3, 4, 5]], ['O1', 'O2'],
['S1', 'S2', 'S3'],
@@ -73,4 +73,5 @@ example_table = Table([[0, 1, 2], [3, 4, 5]], ['O1', 'O2'],
{'environment': 'A'}], input_is_dense=True)
-__all__ = ['Table', 'example_table', 'parse_table', 'load_table']
+__all__ = ['Table', 'example_table', 'parse_table', 'load_table',
+ '__format_version__', '__version__']
diff --git a/biom/_filter.pyx b/biom/_filter.pyx
index 21b426e..e5ebc10 100644
--- a/biom/_filter.pyx
+++ b/biom/_filter.pyx
@@ -84,7 +84,7 @@ cdef _remove_rows_csr(arr, cnp.ndarray[cnp.uint8_t, ndim=1] booleans):
arr.data = data[:nnz]
arr.indices = indices[:nnz]
arr.indptr = indptr[:m-offset_rows+1]
- arr._shape = (m - offset_rows, n) if m-offset_rows else (0, 0)
+ arr._shape = (m - offset_rows, n)
def _filter(arr, ids, metadata, index, ids_to_keep, axis, invert):
"""Filter row/columns of a sparse matrix according to the output of a
diff --git a/biom/_subsample.pyx b/biom/_subsample.pyx
index 4680aee..71350e9 100644
--- a/biom/_subsample.pyx
+++ b/biom/_subsample.pyx
@@ -1,10 +1,10 @@
-# ----------------------------------------------------------------------------
-# Copyright (c) 2013--, biom development team.
+# -----------------------------------------------------------------------------
+# Copyright (c) 2011-2013, The BIOM Format Development Team.
#
# Distributed under the terms of the Modified BSD License.
#
# The full license is in the file COPYING.txt, distributed with this software.
-# ----------------------------------------------------------------------------
+# -----------------------------------------------------------------------------
from __future__ import division
@@ -35,34 +35,28 @@ def _subsample(arr, n):
cdef:
cnp.int64_t counts_sum
cnp.ndarray[cnp.float64_t, ndim=1] data = arr.data
+ cnp.ndarray[cnp.int32_t, ndim=1] data_i = arr.data.astype(np.int32)
cnp.ndarray[cnp.float64_t, ndim=1] result
cnp.ndarray[cnp.int32_t, ndim=1] indices = arr.indices
cnp.ndarray[cnp.int32_t, ndim=1] indptr = arr.indptr
- cnp.ndarray[cnp.int32_t, ndim=1] permuted, unpacked
+ cnp.ndarray[cnp.int32_t, ndim=1] permuted, unpacked, r
cnp.float64_t cnt
- Py_ssize_t unpacked_idx, i, j
+ Py_ssize_t i, j, length
for i in range(indptr.shape[0] - 1):
start, end = indptr[i], indptr[i+1]
+ length = end - start
counts_sum = data[start:end].sum()
if counts_sum < n:
data[start:end] = 0
continue
- unpacked = np.empty(counts_sum, dtype=np.int32)
- unpacked_idx = 0
-
- for i in range(start, end):
- cnt = data[i]
-
- for j in range(int(cnt)):
- unpacked[unpacked_idx] = i - start
- unpacked_idx += 1
-
+ r = np.arange(length, dtype=np.int32)
+ unpacked = np.repeat(r, data_i[start:end])
permuted = np.random.permutation(unpacked)[:n]
- result = np.zeros(end - start, dtype=np.float64)
+ result = np.zeros(length, dtype=np.float64)
for idx in range(permuted.shape[0]):
result[permuted[idx]] += 1
diff --git a/biom/cli/__init__.py b/biom/cli/__init__.py
new file mode 100644
index 0000000..1cbc08c
--- /dev/null
+++ b/biom/cli/__init__.py
@@ -0,0 +1,31 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2011-2015, The BIOM Format Development Team.
+#
+# Distributed under the terms of the Modified BSD License.
+#
+# The full license is in the file COPYING.txt, distributed with this software.
+# ----------------------------------------------------------------------------
+
+from __future__ import division
+
+from importlib import import_module
+
+import click
+import biom
+
+
+ at click.group()
+ at click.version_option(version=biom.__version__)
+def cli():
+ pass
+
+
+import_module('biom.cli.table_summarizer')
+import_module('biom.cli.metadata_adder')
+import_module('biom.cli.table_converter')
+import_module('biom.cli.installation_informer')
+import_module('biom.cli.table_subsetter')
+import_module('biom.cli.table_normalizer')
+import_module('biom.cli.table_head')
+import_module('biom.cli.table_validator')
+import_module('biom.cli.uc_processor')
diff --git a/biom/cli/installation_informer.py b/biom/cli/installation_informer.py
new file mode 100644
index 0000000..9e3a999
--- /dev/null
+++ b/biom/cli/installation_informer.py
@@ -0,0 +1,121 @@
+# -----------------------------------------------------------------------------
+# Copyright (c) 2011-2015, The BIOM Format Development Team.
+#
+# Distributed under the terms of the Modified BSD License.
+#
+# The full license is in the file COPYING.txt, distributed with this software.
+# -----------------------------------------------------------------------------
+
+from __future__ import division
+
+import sys
+
+import click
+
+from biom.cli import cli
+
+
+ at cli.command(name='show-install-info')
+def show_install_info():
+ """Provide information about the biom-format installation.
+
+ Provide information about the biom-format installation, including settings
+ pulled from the configuration file. For more details, see
+ http://biom-format.org
+
+ Example usage:
+
+ Display biom-format installation information:
+
+ $ biom show-install-info
+
+ """
+ click.echo(_show_install_info())
+
+
+def _show_install_info():
+ lines = []
+ lines.extend(_get_formatted_system_info())
+ lines.extend(_get_formatted_dependency_version_info())
+ lines.extend(_get_formatted_package_info())
+ lines.append('')
+ return '\n'.join(lines)
+
+
+def _get_formatted_system_info():
+ return _format_info(_get_system_info(), 'System information')
+
+
+def _get_formatted_dependency_version_info():
+ return _format_info(_get_dependency_version_info(), 'Dependency versions')
+
+
+def _get_formatted_package_info():
+ return _format_info(_get_package_info(), 'biom-format package information')
+
+
+def _get_system_info():
+ return (("Platform", sys.platform),
+ ("Python version", sys.version.replace('\n', ' ')),
+ ("Python executable", sys.executable))
+
+
+def _get_dependency_version_info():
+ not_installed_msg = "Not installed"
+
+ try:
+ from click import __version__ as click_lib_version
+ except ImportError:
+ click_lib_version = not_installed_msg
+
+ try:
+ from numpy import __version__ as numpy_lib_version
+ except ImportError:
+ numpy_lib_version = ("ERROR: Not installed - this is required! "
+ "(This will also cause the BIOM library to "
+ "not be importable.)")
+
+ try:
+ from scipy import __version__ as scipy_lib_version
+ except ImportError:
+ scipy_lib_version = not_installed_msg
+
+ try:
+ from h5py import __version__ as h5py_lib_version
+ except ImportError:
+ h5py_lib_version = ("WARNING: Not installed - this is an optional "
+ "dependency. It is strongly recommended for "
+ "large datasets.")
+
+ return (("click version", click_lib_version),
+ ("NumPy version", numpy_lib_version),
+ ("SciPy version", scipy_lib_version),
+ ("h5py version", h5py_lib_version))
+
+
+def _get_package_info():
+ import_error_msg = ("ERROR: Can't find the BIOM library code (or "
+ "numpy) - is it installed and in your "
+ "$PYTHONPATH?")
+ try:
+ from biom import __version__ as biom_lib_version
+ except ImportError:
+ biom_lib_version = import_error_msg
+
+ return (("biom-format version", biom_lib_version),)
+
+
+def _format_info(info, title):
+ max_len = _get_max_length(info)
+
+ lines = ['']
+ lines.append(title)
+ lines.append('=' * len(title))
+ for e in info:
+ lines.append("%*s:\t%s" % (max_len, e[0], e[1]))
+
+ return lines
+
+
+def _get_max_length(info):
+ return max([len(e[0]) for e in info])
diff --git a/biom/cli/metadata_adder.py b/biom/cli/metadata_adder.py
new file mode 100644
index 0000000..96e2660
--- /dev/null
+++ b/biom/cli/metadata_adder.py
@@ -0,0 +1,186 @@
+# -----------------------------------------------------------------------------
+# Copyright (c) 2011-2015, The BIOM Format Development Team.
+#
+# Distributed under the terms of the Modified BSD License.
+#
+# The full license is in the file COPYING.txt, distributed with this software.
+# -----------------------------------------------------------------------------
+
+from __future__ import division
+
+import click
+
+from biom import load_table
+from biom.cli import cli
+from biom.cli.util import write_biom_table
+from biom.parse import MetadataMap
+from biom.util import HAVE_H5PY
+
+
+ at cli.command(name='add-metadata')
+ at click.option('-i', '--input-fp', required=True,
+ type=click.Path(exists=True, dir_okay=False),
+ help='The input BIOM table')
+ at click.option('-o', '--output-fp', required=True,
+ type=click.Path(exists=False, dir_okay=False),
+ help='The output BIOM table')
+ at click.option('-m', '--sample-metadata-fp', required=False,
+ type=click.Path(exists=True, dir_okay=False),
+ help='The sample metadata mapping file (will add sample '
+ 'metadata to the input BIOM table, if provided).')
+ at click.option('--observation-metadata-fp', required=False,
+ type=click.Path(exists=True, dir_okay=False),
+ help='The observation metadata mapping file (will add '
+ 'observation metadata to the input BIOM table, if '
+ 'provided).')
+ at click.option('--sc-separated', required=False, type=click.STRING,
+ help='Comma-separated list of the metadata fields to split '
+ 'on semicolons. This is useful for hierarchical data such '
+ 'as taxonomy or functional categories.')
+ at click.option('--sc-pipe-separated', required=False, type=click.STRING,
+ help='Comma-separated list of the metadata fields to split '
+ 'on semicolons and pipes ("|"). This is useful for '
+ 'hierarchical data such as functional categories with '
+ 'one-to-many mappings (e.g. x;y;z|x;y;w)).')
+ at click.option('--int-fields', required=False, type=click.STRING,
+ help='Comma-separated list of the metadata fields to cast '
+ 'to integers. This is useful for integer data such as '
+ '"DaysSinceStart".')
+ at click.option('--float-fields', required=False, type=click.STRING,
+ help='Comma-separated list of the metadata fields to cast '
+ 'to floating point numbers. This is useful for real number '
+ 'data such as "pH".')
+ at click.option('--sample-header', required=False, type=click.STRING,
+ help='Comma-separated list of the sample metadata field '
+ 'names. This is useful if a header line is not provided '
+ 'with the metadata, if you want to rename the fields, or '
+ 'if you want to include only the first n fields where n is '
+ 'the number of entries provided here.')
+ at click.option('--observation-header', required=False, type=click.STRING,
+ help='Comma-separated list of the observation metadata '
+ 'field names. This is useful if a header line is not '
+ 'provided with the metadata, if you want to rename the '
+ 'fields, or if you want to include only the first n fields '
+ 'where n is the number of entries provided here.')
+ at click.option('--output-as-json', default=not HAVE_H5PY, is_flag=True,
+ help='Write the output file in JSON format.')
+def add_metadata(input_fp, output_fp, sample_metadata_fp,
+ observation_metadata_fp, sc_separated, sc_pipe_separated,
+ int_fields, float_fields, sample_header, observation_header,
+ output_as_json):
+ """Add metadata to a BIOM table.
+
+ Add sample and/or observation metadata to BIOM-formatted files. See
+ examples here: http://biom-format.org/documentation/adding_metadata.html
+
+ Example usage:
+
+ Add sample metadata to a BIOM table:
+
+ $ biom add-metadata -i otu_table.biom -o table_with_sample_metadata.biom
+ -m sample_metadata.txt
+ """
+ table = load_table(input_fp)
+ if sample_metadata_fp is not None:
+ sample_metadata_f = open(sample_metadata_fp, 'U')
+ else:
+ sample_metadata_f = None
+ if observation_metadata_fp is not None:
+ observation_metadata_f = open(observation_metadata_fp, 'U')
+ else:
+ observation_metadata_f = None
+ if sc_separated is not None:
+ sc_separated = sc_separated.split(',')
+ if sc_pipe_separated is not None:
+ sc_pipe_separated = sc_pipe_separated.split(',')
+ if int_fields is not None:
+ int_fields = int_fields.split(',')
+ if float_fields is not None:
+ float_fields = float_fields.split(',')
+ if sample_header is not None:
+ sample_header = sample_header.split(',')
+ if observation_header is not None:
+ observation_header = observation_header.split(',')
+
+ result = _add_metadata(table, sample_metadata_f, observation_metadata_f,
+ sc_separated, sc_pipe_separated, int_fields,
+ float_fields, sample_header, observation_header)
+
+ if output_as_json:
+ fmt = 'json'
+ else:
+ fmt = 'hdf5'
+
+ write_biom_table(result, fmt, output_fp)
+
+
+def _split_on_semicolons(x):
+ return [e.strip() for e in x.split(';')]
+
+
+def _split_on_semicolons_and_pipes(x):
+ return [[e.strip() for e in y.split(';')] for y in x.split('|')]
+
+
+def _int(x):
+ try:
+ return int(x)
+ except ValueError:
+ return x
+
+
+def _float(x):
+ try:
+ return float(x)
+ except ValueError:
+ return x
+
+
+def _add_metadata(table, sample_metadata=None, observation_metadata=None,
+ sc_separated=None, sc_pipe_separated=None, int_fields=None,
+ float_fields=None, sample_header=None,
+ observation_header=None):
+
+ if sample_metadata is None and observation_metadata is None:
+ raise ValueError('Must specify sample_metadata and/or '
+ 'observation_metadata.')
+
+ # define metadata processing functions, if any
+ process_fns = {}
+ if sc_separated is not None:
+ process_fns.update(dict.fromkeys(sc_separated,
+ _split_on_semicolons))
+
+ if sc_pipe_separated is not None:
+ process_fns.update(dict.fromkeys(sc_pipe_separated,
+ _split_on_semicolons_and_pipes))
+
+ if int_fields is not None:
+ process_fns.update(dict.fromkeys(int_fields, _int))
+
+ if float_fields is not None:
+ process_fns.update(dict.fromkeys(float_fields, _float))
+
+ # parse mapping files
+ if sample_metadata is not None:
+ sample_metadata = MetadataMap.from_file(sample_metadata,
+ process_fns=process_fns,
+ header=sample_header)
+
+ if observation_metadata is not None:
+ observation_metadata = MetadataMap.from_file(
+ observation_metadata,
+ process_fns=process_fns,
+ header=observation_header)
+
+ # NAUGHTY: this is modifying the input table IN PLACE!!! And then
+ # RETURNING IT! MetadataAdder is angry!
+
+ # add metadata as necessary
+ if sample_metadata:
+ table.add_metadata(sample_metadata, axis='sample')
+
+ if observation_metadata:
+ table.add_metadata(observation_metadata, axis='observation')
+
+ return table
diff --git a/biom/cli/table_converter.py b/biom/cli/table_converter.py
new file mode 100644
index 0000000..2506e0d
--- /dev/null
+++ b/biom/cli/table_converter.py
@@ -0,0 +1,209 @@
+# -----------------------------------------------------------------------------
+# Copyright (c) 2011-2015, The BIOM Format Development Team.
+#
+# Distributed under the terms of the Modified BSD License.
+#
+# The full license is in the file COPYING.txt, distributed with this software.
+# -----------------------------------------------------------------------------
+
+from __future__ import division
+
+import click
+
+from biom import load_table
+from biom.cli import cli
+from biom.cli.util import write_biom_table
+from biom.parse import MetadataMap
+
+
+table_types = ["OTU table",
+ "Pathway table",
+ "Function table",
+ "Ortholog table",
+ "Gene table",
+ "Metabolite table",
+ "Taxon table",
+ "Table"]
+
+observation_metadata_types = {
+ 'sc_separated': lambda x: [e.strip() for e in x.split(';')],
+ 'naive': lambda x: x
+}
+observation_metadata_types['taxonomy'] = \
+ observation_metadata_types['sc_separated']
+
+observation_metadata_formatters = {
+ 'sc_separated': lambda x: '; '.join(x),
+ 'naive': lambda x: x
+}
+
+
+ at cli.command(name='convert')
+ at click.option('-i', '--input-fp', required=True,
+ type=click.Path(exists=True, dir_okay=False),
+ help='The input BIOM table')
+ at click.option('-o', '--output-fp', required=True,
+ type=click.Path(exists=False, dir_okay=False),
+ help='The output BIOM table')
+ at click.option('-m', '--sample-metadata-fp', required=False,
+ type=click.Path(exists=True, dir_okay=False),
+ help='The sample metadata mapping file (will add sample '
+ 'metadata to the input BIOM table, if provided).')
+ at click.option('--observation-metadata-fp', required=False,
+ type=click.Path(exists=True, dir_okay=False),
+ help='The observation metadata mapping file (will add '
+ 'observation metadata to the input BIOM table, if '
+ 'provided).')
+ at click.option('--to-json', default=False, is_flag=True,
+ help='Output as JSON-formatted table.')
+ at click.option('--to-hdf5', default=False, is_flag=True,
+ help='Output as HDF5-formatted table.')
+ at click.option('--to-tsv', default=False, is_flag=True,
+ help='Output as TSV-formatted (classic) table.')
+ at click.option('--collapsed-samples', default=False, is_flag=True,
+ help='If --to_hdf5 is passed and the original table is a '
+ 'BIOM table with collapsed samples, this will '
+ 'update the sample metadata of the table to '
+ 'the supported HDF5 collapsed format.')
+ at click.option('--collapsed-observations', default=False, is_flag=True,
+ help='If --to_hdf5 is passed and the original table is a '
+ 'BIOM table with collapsed observations, this will '
+ 'update the observation metadata of the table '
+ 'to the supported HDF5 collapsed format.')
+ at click.option('--header-key', required=False, type=click.STRING,
+ help='The observation metadata to include from the input '
+ 'BIOM table file when creating a tsv table file. '
+ 'By default no observation metadata will be included.')
+ at click.option('--output-metadata-id', required=False, type=click.STRING,
+ help='The name to be given to the observation metadata '
+ 'column when creating a tsv table file if the column '
+ 'should be renamed.')
+ at click.option('--table-type', required=False,
+ type=click.Choice(table_types),
+ help='The type of the table.')
+ at click.option('--process-obs-metadata', required=False,
+ type=click.Choice(
+ observation_metadata_types),
+ help='Process metadata associated with observations when '
+ 'converting from a classic table.')
+ at click.option('--tsv-metadata-formatter', required=False,
+ default='sc_separated',
+ type=click.Choice(
+ observation_metadata_formatters),
+ help='Method for formatting the observation metadata.')
+def convert(input_fp, output_fp, sample_metadata_fp, observation_metadata_fp,
+ to_json, to_hdf5, to_tsv, collapsed_samples,
+ collapsed_observations, header_key, output_metadata_id, table_type,
+ process_obs_metadata, tsv_metadata_formatter):
+ """Convert to/from the BIOM table format.
+
+ Convert between BIOM table formats. See examples here:
+ http://biom-format.org/documentation/biom_conversion.html
+
+ Example usage:
+
+ Convert a "classic" BIOM file (tab-separated text) to an HDF5 BIOM
+ formatted OTU table:
+
+ $ biom convert -i table.txt -o table.biom --to-hdf5
+ """
+ if sum([to_tsv, to_hdf5, to_json]) > 1:
+ raise ValueError("--to-tsv, --to-json, and --to-hdf5 are mutually "
+ "exclusive. You can only pass one of these options.")
+
+ table = load_table(input_fp)
+ if sample_metadata_fp is not None:
+ with open(sample_metadata_fp, 'U') as f:
+ sample_metadata_f = MetadataMap.from_file(f)
+ else:
+ sample_metadata_f = None
+ if observation_metadata_fp is not None:
+ with open(observation_metadata_fp, 'U') as f:
+ observation_metadata_f = MetadataMap.from_file(f)
+ else:
+ observation_metadata_f = None
+
+ _convert(table, output_fp, sample_metadata_f, observation_metadata_f,
+ to_json, to_hdf5, to_tsv, collapsed_samples,
+ collapsed_observations, header_key, output_metadata_id,
+ table_type, process_obs_metadata, tsv_metadata_formatter)
+
+
+def _convert(table, output_filepath, sample_metadata=None,
+ observation_metadata=None, to_json=False, to_hdf5=False,
+ to_tsv=False, collapsed_samples=False,
+ collapsed_observations=False, header_key=None,
+ output_metadata_id=None, table_type=None,
+ process_obs_metadata=None, tsv_metadata_formatter='sc_separated'):
+
+ if sum([to_tsv, to_hdf5, to_json]) == 0:
+ raise ValueError("Must specify an output format")
+ elif sum([to_tsv, to_hdf5, to_json]) > 1:
+ raise ValueError("Can only specify a single output format")
+
+ if table_type is None:
+ if table.type in [None, "None"]:
+ table.type = "Table"
+ else:
+ pass
+ else:
+ table.type = table_type
+
+ if tsv_metadata_formatter is not None:
+ obs_md_fmt_f = observation_metadata_formatters[tsv_metadata_formatter]
+
+ if sample_metadata is not None:
+ table.add_metadata(sample_metadata)
+
+ # if the user does not specify a name for the output metadata column,
+ # set it to the same as the header key
+ output_metadata_id = output_metadata_id or header_key
+
+ if process_obs_metadata is not None and not to_tsv:
+ if table.metadata(axis='observation') is None:
+ raise ValueError("Observation metadata processing requested "
+ "but it doesn't appear that there is any "
+ "metadata to operate on!")
+
+ # and if this came in as TSV, then we expect only a single type of
+ # metadata
+ md_key = list(table.metadata(axis='observation')[0].keys())[0]
+
+ process_f = observation_metadata_types[process_obs_metadata]
+ it = zip(table.ids(axis='observation'),
+ table.metadata(axis='observation'))
+ new_md = {id_: {md_key: process_f(md[md_key])} for id_, md in it}
+
+ if observation_metadata:
+ for k, v in observation_metadata.items():
+ new_md[k].update(v)
+ table.add_metadata(new_md, 'observation')
+
+ if to_tsv:
+ result = table.to_tsv(header_key=header_key,
+ header_value=output_metadata_id,
+ metadata_formatter=obs_md_fmt_f)
+ with open(output_filepath, 'w') as f:
+ f.write(result)
+ return
+ elif to_json:
+ fmt = 'json'
+ result = table
+ elif to_hdf5:
+ fmt = 'hdf5'
+ result = table
+ if collapsed_observations:
+ metadata = [{'collapsed_ids': sorted(md.keys())}
+ for md in result.metadata(axis='observation')]
+ result._observation_metadata = metadata
+ if collapsed_samples:
+ metadata = [{'collapsed_ids': sorted(md.keys())}
+ for md in result.metadata()]
+ result._sample_metadata = metadata
+ if collapsed_observations or collapsed_samples:
+ # We have changed the metadata, it is safer to make sure that
+ # it is correct
+ result._cast_metadata()
+ write_biom_table(result, fmt, output_filepath)
+
+ return
diff --git a/biom/cli/table_head.py b/biom/cli/table_head.py
new file mode 100644
index 0000000..9d924c7
--- /dev/null
+++ b/biom/cli/table_head.py
@@ -0,0 +1,46 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2011-2013, The BIOM Format Development Team.
+#
+# Distributed under the terms of the Modified BSD License.
+#
+# The full license is in the file COPYING.txt, distributed with this software.
+# ----------------------------------------------------------------------------
+
+from __future__ import division
+
+import click
+
+from biom import load_table
+from biom.cli import cli
+
+
+ at cli.command()
+ at click.option('-i', '--input-fp', required=True,
+ type=click.Path(exists=True, dir_okay=False),
+ help='The input BIOM table')
+ at click.option('-o', '--output-fp', default=None,
+ type=click.Path(writable=True),
+ help='An output file-path', required=False)
+ at click.option('-n', '--n-obs', default=5, type=int,
+ help="The number of observations to show",
+ required=False)
+ at click.option('-m', '--n-samp', default=5, type=int,
+ help="The number of samples to show",
+ required=False)
+def head(input_fp, output_fp, n_obs, n_samp):
+ """Dump the first bit of a table.
+
+ Example usage:
+
+ Print out the upper left corner of a BIOM table to standard out:
+
+ $ biom head -i table.biom
+
+ """
+ table = load_table(input_fp).head(n=n_obs, m=n_samp)
+
+ if output_fp is None:
+ click.echo(str(table))
+ else:
+ with open(output_fp, 'w') as fp:
+ fp.write(str(table))
diff --git a/biom/cli/table_normalizer.py b/biom/cli/table_normalizer.py
new file mode 100755
index 0000000..cad6ebf
--- /dev/null
+++ b/biom/cli/table_normalizer.py
@@ -0,0 +1,74 @@
+#!/usr/bin/env python
+
+# ----------------------------------------------------------------------------
+# Copyright (c) 2011-2013, The BIOM Format Development Team.
+#
+# Distributed under the terms of the Modified BSD License.
+#
+# The full license is in the file COPYING.txt, distributed with this software.
+# ----------------------------------------------------------------------------
+
+from __future__ import division
+
+import click
+
+from biom import load_table
+from biom.cli import cli
+from biom.cli.util import write_biom_table
+from biom.util import HAVE_H5PY
+
+
+ at cli.command(name='normalize-table')
+ at click.option('-i', '--input-fp', required=True,
+ type=click.Path(exists=True, dir_okay=False),
+ help='The input BIOM table')
+ at click.option('-o', '--output-fp', default=None,
+ type=click.Path(writable=True),
+ help='An output file-path')
+ at click.option('-r', '--relative-abund', default=False, is_flag=True,
+ help='convert table to relative abundance',
+ required=False)
+ at click.option('-p', '--presence-absence', default=False, is_flag=True,
+ help='convert table to presence/absence',
+ required=False)
+ at click.option('-a', '--axis', default='sample',
+ type=click.Choice(['sample', 'observation']),
+ help='The axis to normalize over')
+def normalize_table(input_fp, output_fp, relative_abund, presence_absence,
+ axis):
+ """Normalize a BIOM table.
+
+ Normalize the values of a BIOM table through various methods. Relative
+ abundance will take the relative abundance of each observation in terms of
+ samples or observations. Presence absensece will convert observations to
+ 1's and 0's based on presence of the observation.
+
+ Example usage:
+
+ Normalizing a BIOM table to relative abundnace:
+
+ $ biom normalize-table -i table.biom -r -o normalized_table.biom
+
+ Converting a BIOM table to a presence/absence table:
+
+ $ biom normalize-table -i table.biom -p -o converted_table.biom
+ """
+ table = load_table(input_fp)
+ result = _normalize_table(table, relative_abund, presence_absence, axis)
+
+ write_biom_table(result, 'hdf5' if HAVE_H5PY else 'json', output_fp)
+
+
+def _normalize_table(table, relative_abund=False, presence_absence=False,
+ axis='sample'):
+ if relative_abund is False and presence_absence is False:
+ raise ValueError("Must specifiy a normalization type")
+ elif relative_abund is True and presence_absence is True:
+ raise ValueError("Must specify only one normalization type")
+
+ if relative_abund:
+ table.norm(axis=axis)
+ else:
+ table.pa()
+
+ return table
diff --git a/biom/cli/table_subsetter.py b/biom/cli/table_subsetter.py
new file mode 100644
index 0000000..fa056f2
--- /dev/null
+++ b/biom/cli/table_subsetter.py
@@ -0,0 +1,139 @@
+# -----------------------------------------------------------------------------
+# Copyright (c) 2011-2015, The BIOM Format Development Team.
+#
+# Distributed under the terms of the Modified BSD License.
+#
+# The full license is in the file COPYING.txt, distributed with this software.
+# -----------------------------------------------------------------------------
+
+from __future__ import division
+
+import click
+
+from biom.cli import cli
+from biom.parse import (get_axis_indices, direct_slice_data, direct_parse_key,
+ generatedby)
+from biom.table import Table
+from biom.util import biom_open, HAVE_H5PY
+
+
+ at cli.command(name='subset-table')
+ at click.option('-i', '--input-hdf5-fp', default=None,
+ type=click.Path(exists=True, dir_okay=False),
+ help='the input hdf5 BIOM table filepath to subset')
+ at click.option('-j', '--input-json-fp', default=None,
+ type=click.Path(exists=True, dir_okay=False),
+ help='the input json BIOM table filepath to subset')
+ at click.option('-a', '--axis', required=True,
+ type=click.Choice(['sample', 'observation']),
+ help='the axis to subset over, either sample or observation')
+ at click.option('-s', '--ids', required=True,
+ type=click.Path(exists=True, dir_okay=False),
+ help='a file containing a single column of IDs to retain '
+ '(either sample IDs or observation IDs, depending on the '
+ 'axis)')
+ at click.option('-o', '--output-fp', required=True,
+ type=click.Path(writable=True, dir_okay=False),
+ help='the output BIOM table filepath')
+def subset_table(input_hdf5_fp, input_json_fp, axis, ids, output_fp):
+ """Subset a BIOM table.
+
+ Subset a BIOM table, over either observations or samples, without fully
+ parsing it. This command is intended to assist in working with very large
+ tables when tight on memory, or as a lightweight way to subset a full
+ table. Currently, it is possible to produce tables with rows or columns
+ (observations or samples) that are fully zeroed.
+
+ Example usage:
+
+ Choose a subset of the observations in table.biom (JSON) and write them to
+ subset.biom:
+
+ $ biom subset-table -j table.biom -a observations -s observation_ids.txt \
+ -o subset.biom
+
+ Choose a subset of the observations in table.biom (HDF5) and write them to
+ subset.biom:
+
+ $ biom subset-table -i table.biom -a observations -s observation_ids.txt \
+ -o subset.biom
+
+ """
+ if input_json_fp is not None:
+ with open(input_json_fp, 'U') as f:
+ input_json_fp = f.read()
+
+ with open(ids, 'U') as f:
+ ids = [line.strip() for line in f]
+
+ table, format_ = _subset_table(input_hdf5_fp, input_json_fp, axis, ids)
+
+ if format_ == 'json':
+ with open(output_fp, 'w') as f:
+ for line in table:
+ f.write(line)
+ f.write('\n')
+ else:
+ if HAVE_H5PY:
+ import h5py
+ else:
+ # This should never be raised here
+ raise ImportError("h5py is not available, cannot write HDF5!")
+
+ with h5py.File(output_fp, 'w') as f:
+ table.to_hdf5(f, generatedby())
+
+
+def _subset_table(hdf5_biom, json_table_str, axis, ids):
+ if axis not in ['sample', 'observation']:
+ raise ValueError("Invalid axis '%s'. Must be either 'sample' or "
+ "'observation'." % axis)
+
+ if hdf5_biom is None and json_table_str is None:
+ raise ValueError("Must specify an input table")
+ elif hdf5_biom is not None and json_table_str is not None:
+ raise ValueError("Can only specify one input table")
+
+ if json_table_str is not None:
+ idxs, new_axis_md = get_axis_indices(json_table_str, ids, axis)
+ new_data = direct_slice_data(json_table_str, idxs, axis)
+
+ # multiple walks over the string. bad form, but easy right now
+ # ...should add a yield_and_ignore parser or something.
+ def subset_generator():
+ yield "{"
+ yield direct_parse_key(json_table_str, "id")
+ yield ","
+ yield direct_parse_key(json_table_str, "format")
+ yield ","
+ yield direct_parse_key(json_table_str, "format_url")
+ yield ","
+ yield direct_parse_key(json_table_str, "type")
+ yield ","
+ yield direct_parse_key(json_table_str, "generated_by")
+ yield ","
+ yield direct_parse_key(json_table_str, "date")
+ yield ","
+ yield direct_parse_key(json_table_str, "matrix_type")
+ yield ","
+ yield direct_parse_key(json_table_str, "matrix_element_type")
+ yield ","
+ yield new_data
+ yield ","
+ yield new_axis_md
+ yield ","
+
+ if axis == "observation":
+ yield direct_parse_key(json_table_str, "columns")
+ else:
+ yield direct_parse_key(json_table_str, "rows")
+ yield "}"
+
+ format_ = 'json'
+ table = subset_generator()
+ else:
+ with biom_open(hdf5_biom) as f:
+ table = Table.from_hdf5(f, ids=ids, axis=axis)
+ format_ = 'hdf5'
+
+ return table, format_
diff --git a/biom/cli/table_summarizer.py b/biom/cli/table_summarizer.py
new file mode 100644
index 0000000..77a0778
--- /dev/null
+++ b/biom/cli/table_summarizer.py
@@ -0,0 +1,136 @@
+# -----------------------------------------------------------------------------
+# Copyright (c) 2011-2015, The BIOM Format Development Team.
+#
+# Distributed under the terms of the Modified BSD License.
+#
+# The full license is in the file COPYING.txt, distributed with this software.
+# -----------------------------------------------------------------------------
+
+from __future__ import division
+
+from operator import itemgetter
+
+import click
+from numpy import std
+
+from biom import load_table
+from biom.cli import cli
+from biom.util import compute_counts_per_sample_stats
+
+
+ at cli.command(name='summarize-table')
+ at click.option('-i', '--input-fp', required=True,
+ type=click.Path(exists=True, dir_okay=False),
+ help='The input BIOM table')
+ at click.option('-o', '--output-fp', default=None,
+ type=click.Path(writable=True, dir_okay=False),
+ help='An output file-path')
+ at click.option('--qualitative', default=False, is_flag=True,
+ help="Present counts as number of unique observation ids per"
+ " sample, rather than counts of observations per sample.")
+ at click.option('--observations', default=False, is_flag=True,
+ help="Summarize over observations")
+def summarize_table(input_fp, output_fp, qualitative, observations):
+ """Summarize sample or observation data in a BIOM table.
+
+ Provides details on the observation counts per sample, including summary
+ statistics, as well as metadata categories associated with samples and
+ observations.
+
+ Example usage:
+
+ Write a summary of table.biom to table_summary.txt:
+
+ $ biom summarize-table -i table.biom -o table_summary.txt
+
+ """
+ table = load_table(input_fp)
+ result = _summarize_table(table, qualitative, observations)
+ if output_fp:
+ with open(output_fp, 'w') as fh:
+ fh.write(result)
+ else:
+ click.echo(result)
+
+
+def _summarize_table(table, qualitative=False, observations=False):
+ lines = []
+
+ if observations:
+ table = table.transpose()
+
+ min_counts, max_counts, median_counts, mean_counts, counts_per_samp =\
+ compute_counts_per_sample_stats(table, qualitative)
+ num_observations = len(table.ids(axis='observation'))
+
+ counts_per_sample_values = list(counts_per_samp.values())
+
+ if table.metadata() is None:
+ sample_md_keys = ["None provided"]
+ else:
+ sample_md_keys = table.metadata()[0].keys()
+
+ if table.metadata(axis='observation') is None:
+ observation_md_keys = ["None provided"]
+ else:
+ observation_md_keys = table.metadata(axis='observation')[0].keys()
+
+ num_samples = len(table.ids())
+
+ if observations:
+ # as this is a transpose of the original table...
+ lines.append('Num samples: %d' % num_observations)
+ lines.append('Num observations: %d' % num_samples)
+ else:
+ lines.append('Num samples: %d' % num_samples)
+ lines.append('Num observations: %d' % num_observations)
+
+ if not qualitative:
+ total_count = sum(counts_per_sample_values)
+ lines.append('Total count: %d' % total_count)
+ lines.append('Table density (fraction of non-zero values): %1.3f' %
+ table.get_table_density())
+
+ lines.append('')
+
+ if qualitative:
+ if observations:
+ lines.append('Sample/observations summary:')
+ else:
+ lines.append('Observations/sample summary:')
+ else:
+ lines.append('Counts/sample summary:')
+
+ lines.append(' Min: %r' % min_counts)
+ lines.append(' Max: %r' % max_counts)
+ lines.append(' Median: %1.3f' % median_counts)
+ lines.append(' Mean: %1.3f' % mean_counts)
+ lines.append(' Std. dev.: %1.3f' % std(counts_per_sample_values))
+
+ if observations:
+ # since this is a transpose...
+ lines.append(
+ ' Sample Metadata Categories: %s' %
+ '; '.join(observation_md_keys))
+ lines.append(
+ ' Observation Metadata Categories: %s' %
+ '; '.join(sample_md_keys))
+ lines.append('')
+ else:
+ lines.append(
+ ' Sample Metadata Categories: %s' %
+ '; '.join(sample_md_keys))
+ lines.append(
+ ' Observation Metadata Categories: %s' %
+ '; '.join(observation_md_keys))
+ lines.append('')
+
+ if qualitative:
+ lines.append('Observations/sample detail:')
+ else:
+ lines.append('Counts/sample detail:')
+
+ for k, v in sorted(counts_per_samp.items(), key=itemgetter(1)):
+ lines.append('%s: %r' % (k, v))
+
+ return "\n".join(lines)
diff --git a/biom/commands/table_validator.py b/biom/cli/table_validator.py
similarity index 88%
rename from biom/commands/table_validator.py
rename to biom/cli/table_validator.py
index f9ed531..ee90005 100644
--- a/biom/commands/table_validator.py
+++ b/biom/cli/table_validator.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python
-
+# -*- coding: utf-8 -*-
# -----------------------------------------------------------------------------
# Copyright (c) 2011-2013, The BIOM Format Development Team.
#
@@ -15,64 +15,67 @@ from datetime import datetime
from operator import and_
from functools import reduce
+import click
import numpy as np
-from pyqi.core.command import (Command, CommandIn, CommandOut,
- ParameterCollection)
-
-from biom.util import HAVE_H5PY, biom_open
-
-
-__author__ = "Daniel McDonald"
-__copyright__ = "Copyright 2011-2013, The BIOM Format Development Team"
-__credits__ = ["Daniel McDonald", "Jose Clemente", "Greg Caporaso",
- "Jai Ram Rideout", "Justin Kuczynski", "Andreas Wilke",
- "Tobias Paczian", "Rob Knight", "Folker Meyer", "Sue Huse"]
-__license__ = "BSD"
-__url__ = "http://biom-format.org"
-__author__ = "Daniel McDonald"
-__email__ = "daniel.mcdonald at colorado.edu"
-
-
-class TableValidator(Command):
- BriefDescription = "Validate a BIOM-formatted file"
- LongDescription = ("Test a file for adherence to the Biological "
- "Observation Matrix (BIOM) format specification. This "
- "specification is defined at http://biom-format.org")
-
- CommandIns = ParameterCollection([
- CommandIn(Name='table', DataType=object,
- Description='the input BIOM JSON object (e.g., the output '
- 'of json.load)', Required=True),
- CommandIn(Name='is_json', DataType=bool,
- Description='the input type',
- Required=False, Default=False),
- CommandIn(Name='format_version', DataType=str,
- Description='the specific format version to validate '
- 'against', Required=False, Default=None),
- CommandIn(Name='detailed_report', DataType=bool,
- Description='include more details in the output report',
- Required=False, Default=False)
- ])
-
- CommandOuts = ParameterCollection([
- CommandOut(Name='valid_table',
- Description='Is the table valid?',
- DataType=bool),
- CommandOut(Name='report_lines',
- Description='Detailed report',
- DataType=list)
- ])
+
+from biom.cli import cli
+from biom.util import HAVE_H5PY, biom_open, is_hdf5_file
+
+
+ at cli.command(name='validate-table')
+ at click.option('-i', '--input-fp', required=True,
+ type=click.Path(exists=True, dir_okay=False),
+ help='The input filpath to validate against the BIOM format'
+ ' specification')
+ at click.option('-f', '--format-version', default=None,
+ help='The specific format version to validate against')
+ at click.option('--detailed-report', is_flag=True, default=False,
+ help='Include more details in the output report')
+def validate_table(input_fp, format_version, detailed_report):
+ """Validate a BIOM-formatted file.
+
+ Test a file for adherence to the Biological Observation Matrix (BIOM)
+ format specification. This specification is defined at
+ http://biom-format.org
+
+ Example usage:
+
+ Validate the contents of table.biom for adherence to the BIOM format
+ specification
+
+ $ biom validate-table -i table.biom
+
+ """
+ valid, report = _validate_table(input_fp, format_version, detailed_report)
+ click.echo("\n".join(report))
+ if valid:
+ # apparently silence is too quiet to be golden.
+ click.echo("The input file is a valid BIOM-formatted file.")
+ sys.exit(0)
+ else:
+ click.echo("The input file is not a valid BIOM-formatted file.")
+ sys.exit(1)
+
+
+def _validate_table(input_fp, format_version=None, detailed_report=False):
+ result = TableValidator()(table=input_fp, format_version=format_version,
+ detailed_report=detailed_report)
+ return result['valid_table'], result['report_lines']
+
+
+# Refactor in the future. Also need to address #664
+class TableValidator(object):
FormatURL = "http://biom-format.org"
TableTypes = set(['otu table', 'pathway table', 'function table',
'ortholog table', 'gene table', 'metabolite table',
'taxon table'])
MatrixTypes = set(['sparse', 'dense'])
- ElementTypes = {'int': int, 'str': str, 'float': float, 'unicode': unicode}
+ ElementTypes = {'int': int, 'str': str, 'float': float, 'unicode': str}
HDF5FormatVersions = set([(2, 0), (2, 0, 0), (2, 1), (2, 1, 0)])
def run(self, **kwargs):
- is_json = kwargs['is_json']
+ is_json = not is_hdf5_file(kwargs['table'])
if kwargs['format_version'] in [None, 'None']:
if is_json:
@@ -88,8 +91,6 @@ class TableValidator(Command):
raise ValueError("Unrecognized format version: %s" %
kwargs['format_version'])
- # this is not pyqi-appriopriate, but how we parse this thing is
- # dependent on runtime options :(
with biom_open(kwargs['table']) as f:
if is_json:
kwargs['table'] = json.load(f)
@@ -107,6 +108,10 @@ class TableValidator(Command):
raise IOError("h5py is not installed, can only validate JSON "
"tables")
+ def __call__(self, table, format_version=None, detailed_report=False):
+ return self.run(table=table, format_version=format_version,
+ detailed_report=detailed_report)
+
def _validate_hdf5(self, **kwargs):
table = kwargs['table']
@@ -324,7 +329,10 @@ class TableValidator(Command):
def _json_or_hdf5_get(self, table, key):
if hasattr(table, 'attrs'):
- return table.attrs.get(key, None)
+ item = table.attrs.get(key, None)
+ if item is not None and isinstance(item, bytes):
+ item = item.decode('utf8')
+ return item
else:
return table.get(key, None)
@@ -336,11 +344,11 @@ class TableValidator(Command):
def _is_int(self, x):
"""Return True if x is an int"""
- return isinstance(x, int)
+ return isinstance(x, (int, np.int64))
def _valid_nnz(self, table):
"""Check if nnz seems correct"""
- if not isinstance(table.attrs['nnz'], int):
+ if not self._is_int(table.attrs['nnz']):
return "nnz is not an integer!"
if table.attrs['nnz'] < 0:
return "nnz is negative!"
@@ -384,6 +392,9 @@ class TableValidator(Command):
"%Y-%m-%dT%H:%M",
"%Y-%m-%dT%H:%M:%S",
"%Y-%m-%dT%H:%M:%S.%f"]
+ if isinstance(val, bytes):
+ val = val.decode('utf8')
+
valid_time = False
for fmt in valid_times:
try:
@@ -562,5 +573,3 @@ class TableValidator(Command):
return self._valid_dense_data(table_json)
else:
return "Unknown matrix type"
-
-CommandConstructor = TableValidator
diff --git a/biom/cli/uc_processor.py b/biom/cli/uc_processor.py
new file mode 100644
index 0000000..9534d73
--- /dev/null
+++ b/biom/cli/uc_processor.py
@@ -0,0 +1,85 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2011-2013, The BIOM Format Development Team.
+#
+# Distributed under the terms of the Modified BSD License.
+#
+# The full license is in the file COPYING.txt, distributed with this software.
+# ----------------------------------------------------------------------------
+
+from __future__ import division
+
+import click
+
+from biom.cli import cli
+from biom.cli.util import write_biom_table
+from biom.parse import parse_uc
+from biom.exception import TableException
+
+
+ at cli.command('from-uc')
+ at click.option('-i', '--input-fp', required=True,
+ type=click.Path(exists=True, dir_okay=False),
+ help='The input uc filepath.')
+ at click.option('-o', '--output-fp', default=None,
+ type=click.Path(writable=True),
+ help='The output BIOM filepath', required=False)
+ at click.option('--rep-set-fp', type=click.Path(exists=True, dir_okay=False),
+ help="Fasta file containing representative sequences with "
+ "where sequences are labeled with OTU identifiers, and "
+ "description fields contain original sequence identifiers. "
+ "This output is created, for example, by vsearch with the "
+ "--relabel_sha1 --relabel_keep options.",
+ required=False)
+def from_uc(input_fp, output_fp, rep_set_fp):
+ """Create a BIOM table from a vsearch/uclust/usearch BIOM file.
+
+ Example usage:
+
+ Simple BIOM creation:
+
+ $ biom from-uc -i in.uc -o out.biom
+
+ BIOM creation with OTU re-naming:
+
+ $ biom from-uc -i in.uc -o out.biom --rep-set-fp rep-set.fna
+
+ """
+ input_f = open(input_fp, 'U')
+ if rep_set_fp is not None:
+ rep_set_f = open(rep_set_fp, 'U')
+ else:
+ rep_set_f = None
+ table = _from_uc(input_f, rep_set_f)
+ write_biom_table(table, 'hdf5', output_fp)
+
+
+def _id_map_from_fasta(fasta_lines):
+ result = {}
+ for line in fasta_lines:
+ if line.startswith('>'):
+ try:
+ obs_id, seq_id = line.split()[:2]
+ except ValueError:
+ raise ValueError('Sequence identifiers in fasta file '
+ 'must contain at least two space-'
+ 'separated fields.')
+ result[seq_id] = obs_id[1:]
+ else:
+ pass
+ return result
+
+
+def _from_uc(input_f, rep_set_f=None):
+ table = parse_uc(input_f)
+
+ if rep_set_f is not None:
+ obs_id_map = _id_map_from_fasta(rep_set_f)
+ try:
+ table.update_ids(obs_id_map, axis='observation', strict=True,
+ inplace=True)
+ except TableException:
+ raise ValueError('Not all sequence identifiers in the input BIOM '
+ 'file are present in description fields in the '
+ 'representative sequence fasta file.')
+
+ return table
diff --git a/biom/cli/util.py b/biom/cli/util.py
new file mode 100644
index 0000000..8b5c972
--- /dev/null
+++ b/biom/cli/util.py
@@ -0,0 +1,35 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2011-2015, The BIOM Format Development Team.
+#
+# Distributed under the terms of the Modified BSD License.
+#
+# The full license is in the file COPYING.txt, distributed with this software.
+# ----------------------------------------------------------------------------
+
+from __future__ import division
+
+import biom.util
+import biom.parse
+
+
+def write_biom_table(table, fmt, filepath):
+ """Write table in specified format to filepath"""
+
+ if fmt not in ['hdf5', 'json', 'tsv']:
+ raise ValueError("Unknown file format")
+
+ if fmt == 'hdf5' and not biom.util.HAVE_H5PY:
+ fmt = 'json'
+
+ if fmt == 'json':
+ with open(filepath, 'w') as f:
+ f.write(table.to_json(biom.parse.generatedby()))
+ elif fmt == 'tsv':
+ with open(filepath, 'w') as f:
+ f.write(table)
+ f.write('\n')
+ else:
+ import h5py
+
+ with h5py.File(filepath, 'w') as f:
+ table.to_hdf5(f, biom.parse.generatedby())
diff --git a/biom/commands/installation_informer.py b/biom/commands/installation_informer.py
deleted file mode 100644
index 11b126f..0000000
--- a/biom/commands/installation_informer.py
+++ /dev/null
@@ -1,123 +0,0 @@
-#!/usr/bin/env python
-
-# ----------------------------------------------------------------------------
-# Copyright (c) 2011-2013, The BIOM Format Development Team.
-#
-# Distributed under the terms of the Modified BSD License.
-#
-# The full license is in the file COPYING.txt, distributed with this software.
-# ----------------------------------------------------------------------------
-
-from __future__ import division
-from sys import platform, version as python_version, executable
-from pyqi.core.command import Command, CommandOut, ParameterCollection
-
-__author__ = "Greg Caporaso"
-__copyright__ = "Copyright 2011-2013, The BIOM Format Development Team"
-__credits__ = ["Daniel McDonald", "Jose Clemente", "Greg Caporaso",
- "Jai Ram Rideout", "Justin Kuczynski", "Andreas Wilke",
- "Tobias Paczian", "Rob Knight", "Folker Meyer", "Sue Huse"]
-__license__ = "BSD"
-__url__ = "http://biom-format.org"
-__maintainer__ = "Greg Caporaso"
-__email__ = "gregcaporaso at gmail.com"
-
-
-class InstallationInformer(Command):
- BriefDescription = ("Provide information about the biom-format "
- "installation")
- LongDescription = ("Provide information about the biom-format "
- "installation, including settings pulled from the "
- "configuration file. For more details, see "
- "http://biom-format.org")
- CommandIns = ParameterCollection([])
- CommandOuts = ParameterCollection([
- CommandOut(Name='install_info_lines',
- DataType='str',
- Description='Installation info')
- ])
-
- def run(self, **kwargs):
- lines = []
-
- lines.extend(self.get_formatted_system_info())
- lines.extend(self.get_formatted_dependency_version_info())
- lines.extend(self.get_formatted_package_info())
- lines.append('')
-
- return {'install_info_lines': lines}
-
- def get_formatted_system_info(self):
- return self._format_info(self.get_system_info(), 'System information')
-
- def get_formatted_dependency_version_info(self):
- return self._format_info(self.get_dependency_version_info(),
- 'Dependency versions')
-
- def get_formatted_package_info(self):
- return self._format_info(self.get_package_info(),
- 'biom-format package information')
-
- def get_system_info(self):
- return (("Platform", platform),
- ("Python/GCC version", python_version.replace('\n', ' ')),
- ("Python executable", executable))
-
- def get_dependency_version_info(self):
- not_installed_msg = "Not installed"
-
- try:
- from pyqi import __version__ as pyqi_lib_version
- except ImportError:
- pyqi_lib_version = not_installed_msg
-
- try:
- from numpy import __version__ as numpy_lib_version
- except ImportError:
- numpy_lib_version = ("ERROR: Not installed - this is required! "
- "(This will also cause the BIOM library to "
- "not be importable.)")
-
- try:
- from scipy import __version__ as scipy_lib_version
- except ImportError:
- scipy_lib_version = not_installed_msg
-
- try:
- from h5py import __version__ as h5py_lib_version
- except ImportError:
- h5py_lib_version = ("WARNING: Not installed - this is an optional "
- "dependency. It is strongly recommended for "
- "large datasets.")
-
- return (("pyqi version", pyqi_lib_version),
- ("NumPy version", numpy_lib_version),
- ("SciPy version", scipy_lib_version),
- ("h5py version", h5py_lib_version))
-
- def get_package_info(self):
- import_error_msg = ("ERROR: Can't find the BIOM library code (or "
- "numpy) - is it installed and in your "
- "$PYTHONPATH?")
- try:
- from biom import __version__ as biom_lib_version
- except ImportError:
- biom_lib_version = import_error_msg
-
- return (("biom-format version", biom_lib_version),)
-
- def _format_info(self, info, title):
- max_len = self._get_max_length(info)
-
- lines = ['']
- lines.append(title)
- lines.append('=' * len(title))
- for e in info:
- lines.append("%*s:\t%s" % (max_len, e[0], e[1]))
-
- return lines
-
- def _get_max_length(self, info):
- return max([len(e[0]) for e in info])
-
-CommandConstructor = InstallationInformer
diff --git a/biom/commands/metadata_adder.py b/biom/commands/metadata_adder.py
deleted file mode 100644
index cdab17f..0000000
--- a/biom/commands/metadata_adder.py
+++ /dev/null
@@ -1,165 +0,0 @@
-#!/usr/bin/env python
-
-# ----------------------------------------------------------------------------
-# Copyright (c) 2011-2013, The BIOM Format Development Team.
-#
-# Distributed under the terms of the Modified BSD License.
-#
-# The full license is in the file COPYING.txt, distributed with this software.
-# ----------------------------------------------------------------------------
-
-from __future__ import division
-from pyqi.core.command import (Command, CommandIn, CommandOut,
- ParameterCollection)
-from pyqi.core.exception import CommandError
-from biom.parse import MetadataMap
-from biom.table import Table
-
-__author__ = "Greg Caporaso"
-__copyright__ = "Copyright 2011-2013, The BIOM Format Development Team"
-__credits__ = ["Greg Caporaso", "Morgan Langille", "Jai Ram Rideout",
- "Daniel McDonald"]
-__license__ = "BSD"
-__url__ = "http://biom-format.org"
-__maintainer__ = "Greg Caporaso"
-__email__ = "gregcaporaso at gmail.com"
-
-
-class MetadataAdder(Command):
- BriefDescription = "Add metadata to a BIOM table"
- LongDescription = ("Add sample and/or observation metadata to "
- "BIOM-formatted files. Detailed usage examples can be "
- "found here: http://biom-format.org/documentation/add"
- "ing_metadata.html")
-
- CommandIns = ParameterCollection([
- CommandIn(Name='table', DataType=Table,
- Description='the input BIOM table', Required=True),
- # sample_metadata and observation_metadata are currently files (or
- # file-like) because of the existing metadata map / processing function
- # support. Ideally, these two parameters should be MetadataMap
- # instances.
- CommandIn(Name='sample_metadata', DataType=file,
- Description='the sample metadata map (will add sample '
- 'metadata to the input BIOM table, if provided)'),
- CommandIn(Name='observation_metadata', DataType=file,
- Description='the observation metadata map (will add '
- 'observation metadata to the input BIOM table, if '
- 'provided)'),
- CommandIn(Name='sc_separated', DataType=list,
- Description='list of the metadata fields to split on '
- 'semicolons. This is useful for hierarchical data such as '
- 'taxonomy or functional categories'),
- CommandIn(Name='sc_pipe_separated', DataType=list,
- Description='list of the metadata fields to split on '
- 'semicolons and pipes ("|"). This is useful for '
- 'hierarchical data such as functional categories with '
- 'one-to-many mappings (e.g. x;y;z|x;y;w)'),
- CommandIn(Name='int_fields', DataType=list,
- Description='list of the metadata fields to cast to '
- 'integers. This is useful for integer data such as '
- '"DaysSinceStart"'),
- CommandIn(Name='float_fields', DataType=list,
- Description='list of the metadata fields to cast to '
- 'floating point numbers. This is useful for real number '
- 'data such as "pH"'),
- CommandIn(Name='sample_header', DataType=list,
- Description='list of the sample metadata field names. This '
- 'is useful if a header line is not provided with the '
- 'metadata, if you want to rename the fields, or if you want '
- 'to include only the first n fields where n is the number '
- 'of entries provided here',
- DefaultDescription='use header from sample metadata map'),
- CommandIn(Name='observation_header', DataType=list,
- Description='list of the observation metadata field names. '
- 'This is useful if a header line is not provided with the '
- 'metadata, if you want to rename the fields, or if you want '
- 'to include only the first n fields where n is the number '
- 'of entries provided here',
- DefaultDescription='use header from observation metadata '
- 'map'),
- CommandIn(Name='output_as_json', DataType=bool,
- Description='Output as JSON', Default=False)
- ])
-
- CommandOuts = ParameterCollection([
- CommandOut(Name='table', DataType=tuple,
- Description='Table with added metadata, and the output '
- 'format')
- ])
-
- def run(self, **kwargs):
- table = kwargs['table']
- sample_metadata = kwargs['sample_metadata']
- observation_metadata = kwargs['observation_metadata']
- sc_separated = kwargs['sc_separated']
- sc_pipe_separated = kwargs['sc_pipe_separated']
- int_fields = kwargs['int_fields']
- float_fields = kwargs['float_fields']
- sample_header = kwargs['sample_header']
- observation_header = kwargs['observation_header']
- output_as = 'json' if kwargs['output_as_json'] else 'hdf5'
-
- # define metadata processing functions, if any
- process_fns = {}
- if sc_separated is not None:
- process_fns.update(dict.fromkeys(sc_separated,
- self._split_on_semicolons))
-
- if sc_pipe_separated is not None:
- process_fns.update(dict.fromkeys(sc_pipe_separated,
- self._split_on_semicolons_and_pipes))
-
- if int_fields is not None:
- process_fns.update(dict.fromkeys(int_fields, self._int))
-
- if float_fields is not None:
- process_fns.update(dict.fromkeys(float_fields, self._float))
-
- # parse mapping files
- if sample_metadata is not None:
- sample_metadata = MetadataMap.from_file(sample_metadata,
- process_fns=process_fns,
- header=sample_header)
-
- if observation_metadata is not None:
- observation_metadata = MetadataMap.from_file(
- observation_metadata,
- process_fns=process_fns,
- header=observation_header)
-
- if sample_metadata is None and observation_metadata is None:
- raise CommandError('Must specify sample_metadata and/or '
- 'observation_metadata.')
-
- # NAUGHTY: this is modifying the input table IN PLACE!!! And then
- # RETURNING IT! MetadataAdder is angry!
-
- # add metadata as necessary
- if sample_metadata:
- table.add_metadata(sample_metadata, axis='sample')
-
- if observation_metadata:
- table.add_metadata(observation_metadata, axis='observation')
-
- return {'table': (table, output_as)}
-
- def _split_on_semicolons(self, x):
- return [e.strip() for e in x.split(';')]
-
- def _split_on_semicolons_and_pipes(self, x):
- return [[e.strip() for e in y.split(';')] for y in x.split('|')]
-
- def _int(self, x):
- try:
- return int(x)
- except ValueError:
- return x
-
- def _float(self, x):
- try:
- return float(x)
- except ValueError:
- return x
-
-CommandConstructor = MetadataAdder
diff --git a/biom/commands/table_converter.py b/biom/commands/table_converter.py
deleted file mode 100644
index 3399124..0000000
--- a/biom/commands/table_converter.py
+++ /dev/null
@@ -1,221 +0,0 @@
-#!/usr/bin/env python
-
-# ----------------------------------------------------------------------------
-# Copyright (c) 2011-2013, The BIOM Format Development Team.
-#
-# Distributed under the terms of the Modified BSD License.
-#
-# The full license is in the file COPYING.txt, distributed with this software.
-# ----------------------------------------------------------------------------
-
-from __future__ import division
-from pyqi.core.command import (Command, CommandIn, CommandOut,
- ParameterCollection)
-from pyqi.core.exception import CommandError
-from biom.table import Table
-from biom.parse import MetadataMap
-
-__author__ = "Greg Caporaso"
-__copyright__ = "Copyright 2011-2013, The BIOM Format Development Team"
-__credits__ = ["Greg Caporaso", "Daniel McDonald",
- "Jose Carlos Clemente Litran", "Jai Ram Rideout",
- "Jose Antonio Navas Molina"]
-__license__ = "BSD"
-__url__ = "http://biom-format.org"
-__maintainer__ = "Greg Caporaso"
-__email__ = "gregcaporaso at gmail.com"
-
-
-class TableConverter(Command):
- TableTypes = ["OTU table",
- "Pathway table",
- "Function table",
- "Ortholog table",
- "Gene table",
- "Metabolite table",
- "Taxon table"]
-
- ObservationMetadataTypes = {
- 'sc_separated': lambda x: [e.strip() for e in x.split(';')],
- 'naive': lambda x: x
- }
-
- ObservationMetadataFormatters = {
- 'sc_separated': lambda x: '; '.join(x),
- 'naive': lambda x: x
- }
-
- ObservationMetadataTypes['taxonomy'] = \
- ObservationMetadataTypes['sc_separated']
-
- BriefDescription = "Convert to/from the BIOM table format"
- LongDescription = ("Convert between BIOM and 'classic' (tab-delimited) "
- "table formats. Detailed usage examples can be found "
- "here: http://biom-format.org/documentation/biom_conver"
- "sion.html")
-
- CommandIns = ParameterCollection([
- # This is not an ideal usage of the pyqi framework because we are
- # expecting a file-like object here, and a lot of the parameters deal
- # with I/O-ish things, like converting between file formats. Even
- # though no I/O is forced here, it would be better to have rich objects
- # as input and output, instead of lines of data. However, this will
- # likely require a refactoring/redesign of our interface for table
- # conversions because the primary input here can be either a BIOM table
- # or a classic table. One possible solution is to split out different
- # types of conversions into their own (smaller and simpler) commands,
- # which would allow us to avoid some of this I/O-ish stuff.
- CommandIn(Name='table', DataType=Table,
- Description='the input table (file-like object), either in '
- 'BIOM or classic format', Required=True),
- CommandIn(Name='to_json', DataType=bool,
- Description='Output as a JSON table', Default=False),
- CommandIn(Name='to_hdf5', DataType=bool,
- Description='Output as a HDF5 table', Default=False),
- CommandIn(Name='to_tsv', DataType=bool,
- Description='Output as a TSV table', Default=False),
- CommandIn(Name='collapsed_samples', DataType=bool,
- Description='If to_hdf5 and the original table is a '
- 'collapsed by samples biom table, this will '
- 'update the sample metadata of the table to '
- 'the supported HDF5 collapsed format'),
- CommandIn(Name='collapsed_observations', DataType=bool,
- Description='If to_hdf5 and the original table is a '
- 'collapsed by observations biom table, this will'
- ' update the observation metadata of the table '
- 'to the supported HDF5 collapsed format'),
- CommandIn(Name='sample_metadata', DataType=MetadataMap,
- Description='the sample metadata map (will add sample '
- 'metadata to the BIOM table, if provided). Only applies '
- 'when converting from classic table file to BIOM table '
- 'file'),
- CommandIn(Name='observation_metadata', DataType=MetadataMap,
- Description='the observation metadata map (will add '
- 'observation metadata to the BIOM table, if provided). Only '
- 'applies when converting from classic table file to BIOM '
- 'table file'),
- CommandIn(Name='header_key', DataType=str,
- Description='pull this key from observation metadata within '
- 'a BIOM table file when creating a classic table file',
- DefaultDescription='no observation metadata will be '
- 'included'),
- CommandIn(Name='output_metadata_id', DataType=str,
- Description='the name to be given to the observation '
- 'metadata column when creating a classic table from a BIOM-'
- 'formatted table', DefaultDescription='same name as in the '
- 'BIOM-formatted table'),
- CommandIn(Name='table_type', DataType=str,
- Description='the type of the table, must be one of: %s' %
- ', '.join(TableTypes), Required=False),
- CommandIn(Name='process_obs_metadata', DataType=str,
- Description='process metadata associated with observations '
- 'when converting from a classic table. Must be one of: %s' %
- ', '.join(ObservationMetadataTypes), Default=None),
- CommandIn(Name='tsv_metadata_formatter', DataType=str,
- Description='Method for formatting the observation '
- 'metadata, must be one of: %s' %
- ', '.join(ObservationMetadataFormatters),
- Default='sc_separated')
- ])
-
- CommandOuts = ParameterCollection([
- CommandOut(Name='table', DataType=tuple,
- Description='The resulting table and format')
- ])
-
- def run(self, **kwargs):
- table = kwargs['table']
- sample_metadata = kwargs['sample_metadata']
- observation_metadata = kwargs['observation_metadata']
- header_key = kwargs['header_key']
- output_metadata_id = kwargs['output_metadata_id']
- process_obs_metadata = kwargs['process_obs_metadata']
- obs_md_fmt = kwargs['tsv_metadata_formatter']
- table_type = kwargs['table_type']
- to_tsv = kwargs['to_tsv']
- to_hdf5 = kwargs['to_hdf5']
- to_json = kwargs['to_json']
- collapsed_observations = kwargs['collapsed_observations']
- collapsed_samples = kwargs['collapsed_samples']
-
- if sum([to_tsv, to_hdf5, to_json]) == 0:
- raise CommandError("Must specify an output format")
- elif sum([to_tsv, to_hdf5, to_json]) > 1:
- raise CommandError("Can only specify a single output format")
-
- # if we don't have a table type, then one is required to be specified
- if table.type in [None, "None"]:
- if table_type is None:
- raise CommandError("Must specify --table-type!")
- else:
- if table_type not in self.TableTypes:
- raise CommandError("Unknown table type: %s" % table_type)
-
- table.type = table_type
-
- if obs_md_fmt not in self.ObservationMetadataFormatters:
- raise CommandError("Unknown tsv_metadata_formatter: %s" %
- obs_md_fmt)
- else:
- obs_md_fmt_f = self.ObservationMetadataFormatters[obs_md_fmt]
-
- if sample_metadata is not None:
- table.add_metadata(sample_metadata)
-
- # if the user does not specify a name for the output metadata column,
- # set it to the same as the header key
- output_metadata_id = output_metadata_id or header_key
-
- if process_obs_metadata is not None and not to_tsv:
- if process_obs_metadata not in self.ObservationMetadataTypes:
- raise CommandError(
- "Unknown observation metadata processing method, must be "
- "one of: %s" %
- ', '.join(self.ObservationMetadataTypes.keys()))
-
- if table.metadata(axis='observation') is None:
- raise CommandError("Obseration metadata processing requested "
- "but it doesn't appear that there is any "
- "metadata to operate on!")
-
- # and if this came in as TSV, then we expect only a single type of
- # metadata
- md_key = table.metadata(axis='observation')[0].keys()[0]
-
- process_f = self.ObservationMetadataTypes[process_obs_metadata]
- it = zip(table.ids(axis='observation'),
- table.metadata(axis='observation'))
- new_md = {id_: {md_key: process_f(md[md_key])} for id_, md in it}
-
- if observation_metadata:
- for k, v in observation_metadata.items():
- new_md[k].update(v)
- table.add_metadata(new_md, 'observation')
-
- if to_tsv:
- result = table.to_tsv(header_key=header_key,
- header_value=output_metadata_id,
- metadata_formatter=obs_md_fmt_f)
- fmt = 'tsv'
- elif to_json:
- result = table
- fmt = 'json'
- elif to_hdf5:
- result = table
- if collapsed_observations:
- metadata = [{'collapsed_ids': md.keys()}
- for md in result.metadata(axis='observation')]
- result._observation_metadata = metadata
- if collapsed_samples:
- metadata = [{'collapsed_ids': md.keys()}
- for md in result.metadata()]
- result._sample_metadata = metadata
- if collapsed_observations or collapsed_samples:
- # We have changed the metadata, it is safer to make sure that
- # it is correct
- result._cast_metadata()
- fmt = 'hdf5'
-
- return {'table': (result, fmt)}
-
-CommandConstructor = TableConverter
diff --git a/biom/commands/table_subsetter.py b/biom/commands/table_subsetter.py
deleted file mode 100644
index 4ae26e9..0000000
--- a/biom/commands/table_subsetter.py
+++ /dev/null
@@ -1,122 +0,0 @@
-#!/usr/bin/env python
-
-# ----------------------------------------------------------------------------
-# Copyright (c) 2011-2013, The BIOM Format Development Team.
-#
-# Distributed under the terms of the Modified BSD License.
-#
-# The full license is in the file COPYING.txt, distributed with this software.
-# ----------------------------------------------------------------------------
-
-from __future__ import division
-from pyqi.core.command import (Command, CommandIn, CommandOut,
- ParameterCollection)
-from pyqi.core.exception import CommandError
-from biom.parse import get_axis_indices, direct_slice_data, direct_parse_key
-from biom.table import Table
-from biom.util import biom_open
-
-__author__ = "Daniel McDonald"
-__copyright__ = "Copyright 2011-2013, The BIOM Format Development Team"
-__credits__ = ["Daniel McDonald", "Jai Ram Rideout",
- "Jose Antonio Navas Molina"]
-__license__ = "BSD"
-__url__ = "http://biom-format.org"
-__author__ = "Daniel McDonald"
-__email__ = "daniel.mcdonald at colorado.edu"
-
-
-class TableSubsetter(Command):
- Axes = ['sample', 'observation']
-
- BriefDescription = "Subset a BIOM table"
- LongDescription = ("Subset a BIOM table, over either observations or "
- "samples, without fully parsing it. This command is "
- "intended to assist in working with very large tables "
- "when tight on memory, or as a lightweight way to "
- "subset a full table. Currently, it is possible to "
- "produce tables with rows or columns (observations or "
- "samples) that are fully zeroed.")
-
- CommandIns = ParameterCollection([
- CommandIn(Name='json_table_str', DataType=str,
- Description='the input BIOM table as an unparsed json '
- 'string',
- Required=False),
- CommandIn(Name='hdf5_table', DataType=str,
- Description='the fp to the input BIOM table',
- Required=False),
- CommandIn(Name='axis', DataType=str,
- Description='the axis to subset over, either ' +
- ' or '.join(Axes), Required=True),
- CommandIn(Name='ids', DataType=list,
- Description='the IDs to retain (either sample IDs or '
- 'observation IDs, depending on the axis)', Required=True)
- ])
-
- CommandOuts = ParameterCollection([
- CommandOut(Name='subsetted_table', DataType=tuple,
- Description='The subset generator')
- ])
-
- def run(self, **kwargs):
- json_table_str = kwargs['json_table_str']
- hdf5_biom = kwargs['hdf5_table']
- axis = kwargs['axis']
- ids = kwargs['ids']
-
- if axis not in self.Axes:
- raise CommandError("Invalid axis '%s'. Must be either %s." % (
- axis,
- ' or '.join(map(lambda e: "'%s'" % e, self.Axes))))
-
- if hdf5_biom is None and json_table_str is None:
- raise CommandError("Must specify an input table")
- elif hdf5_biom is not None and json_table_str is not None:
- raise CommandError("Can only specify one input table")
-
- if json_table_str is not None:
- idxs, new_axis_md = get_axis_indices(json_table_str, ids, axis)
- new_data = direct_slice_data(json_table_str, idxs, axis)
-
- # multiple walks over the string. bad form, but easy right now
- # ...should add a yield_and_ignore parser or something.
- def subset_generator():
- yield "{"
- yield direct_parse_key(json_table_str, "id")
- yield ","
- yield direct_parse_key(json_table_str, "format")
- yield ","
- yield direct_parse_key(json_table_str, "format_url")
- yield ","
- yield direct_parse_key(json_table_str, "type")
- yield ","
- yield direct_parse_key(json_table_str, "generated_by")
- yield ","
- yield direct_parse_key(json_table_str, "date")
- yield ","
- yield direct_parse_key(json_table_str, "matrix_type")
- yield ","
- yield direct_parse_key(json_table_str, "matrix_element_type")
- yield ","
- yield new_data
- yield ","
- yield new_axis_md
- yield ","
-
- if axis == "observation":
- yield direct_parse_key(json_table_str, "columns")
- else:
- yield direct_parse_key(json_table_str, "rows")
- yield "}"
-
- format_ = 'json'
- table = subset_generator()
- else:
- with biom_open(hdf5_biom) as f:
- table = Table.from_hdf5(f, ids=ids, axis=axis)
- format_ = 'hdf5'
-
- return {'subsetted_table': (table, format_)}
-
-CommandConstructor = TableSubsetter
diff --git a/biom/commands/table_summarizer.py b/biom/commands/table_summarizer.py
deleted file mode 100644
index e0f48cf..0000000
--- a/biom/commands/table_summarizer.py
+++ /dev/null
@@ -1,159 +0,0 @@
-#!/usr/bin/env python
-
-# -----------------------------------------------------------------------------
-# Copyright (c) 2011-2013, The BIOM Format Development Team.
-#
-# Distributed under the terms of the Modified BSD License.
-#
-# The full license is in the file COPYING.txt, distributed with this software.
-# -----------------------------------------------------------------------------
-
-from __future__ import division
-from pyqi.core.command import (Command, CommandIn, CommandOut,
- ParameterCollection)
-
-from numpy import std
-from operator import itemgetter
-from biom.util import compute_counts_per_sample_stats
-
-__author__ = "Greg Caporaso"
-__copyright__ = "Copyright 2011-2013, The BIOM Format Development Team"
-__credits__ = ["Greg Caporaso", "Daniel McDonald", "Jose Antonio Navas Molina"]
-__license__ = "BSD"
-__maintainer__ = "Greg Caporaso"
-__email__ = "gregcaporaso at gmail.com"
-
-
-class TableSummarizer(Command):
-
- """
- Example usage:
- from biom.commands.table_summarizer import TableSummarizer
- from biom.parse import parse_biom_table
- c = TableSummarizer()
- table_f = open("table.biom")
- t = parse_biom_table(table_f)
- table_f.seek(0)
- result = c(table=(t,None))
- result = c(table=(t,None),qualitative=True)
- result = c(table=(t,table_f),qualitative=True)
- table_f.close()
- """
- BriefDescription = "Summarize sample or observation data in a BIOM table"
- LongDescription = ("Provides details on the observation counts per sample,"
- " including summary statistics, as well as metadata "
- "categories associated with samples and observations.")
-
- CommandIns = ParameterCollection([
- CommandIn(Name='table',
- DataType=tuple,
- Description='the input BIOM table',
- Required=True),
- CommandIn(Name='qualitative',
- DataType=bool,
- Description=('Present counts as number of unique '
- 'observation ids per sample, rather than '
- 'counts of observations per sample.'),
- Required=False,
- Default=False),
- CommandIn(Name='observations',
- DataType=bool,
- Default=False,
- Description=('Summarize over observations'))
- ])
-
- CommandOuts = ParameterCollection([
- CommandOut(Name='biom_summary',
- DataType=list,
- Description='The table summary')
- ])
-
- def run(self, **kwargs):
- result = {}
- qualitative = kwargs['qualitative']
- by_observations = kwargs['observations']
- table, table_lines = kwargs['table']
-
- if by_observations:
- table = table.transpose()
-
- min_counts, max_counts, median_counts, mean_counts, counts_per_samp =\
- compute_counts_per_sample_stats(table, qualitative)
- num_observations = len(table.ids(axis='observation'))
-
- counts_per_sample_values = counts_per_samp.values()
-
- if table.metadata() is None:
- sample_md_keys = ["None provided"]
- else:
- sample_md_keys = table.metadata()[0].keys()
-
- if table.metadata(axis='observation') is None:
- observation_md_keys = ["None provided"]
- else:
- observation_md_keys = table.metadata(axis='observation')[0].keys()
-
- lines = []
-
- num_samples = len(table.ids())
-
- if by_observations:
- # as this is a transpose of the original table...
- lines.append('Num samples: %d' % num_observations)
- lines.append('Num observations: %d' % num_samples)
- else:
- lines.append('Num samples: %d' % num_samples)
- lines.append('Num observations: %d' % num_observations)
-
- if not qualitative:
- total_count = sum(counts_per_sample_values)
- lines.append('Total count: %d' % total_count)
- lines.append('Table density (fraction of non-zero values): %1.3f' %
- table.get_table_density())
-
- lines.append('')
-
- if qualitative:
- if by_observations:
- lines.append('Sample/observations summary:')
- else:
- lines.append('Observations/sample summary:')
- else:
- lines.append('Counts/sample summary:')
-
- lines.append(' Min: %r' % min_counts)
- lines.append(' Max: %r' % max_counts)
- lines.append(' Median: %1.3f' % median_counts)
- lines.append(' Mean: %1.3f' % mean_counts)
- lines.append(' Std. dev.: %1.3f' % std(counts_per_sample_values))
-
- if by_observations:
- # since this is a transpose...
- lines.append(
- ' Sample Metadata Categories: %s' %
- '; '.join(observation_md_keys))
- lines.append(
- ' Observation Metadata Categories: %s' %
- '; '.join(sample_md_keys))
- lines.append('')
- else:
- lines.append(
- ' Sample Metadata Categories: %s' %
- '; '.join(sample_md_keys))
- lines.append(
- ' Observation Metadata Categories: %s' %
- '; '.join(observation_md_keys))
- lines.append('')
-
- if qualitative:
- lines.append('Observations/sample detail:')
- else:
- lines.append('Counts/sample detail:')
-
- for k, v in sorted(counts_per_samp.items(), key=itemgetter(1)):
- lines.append(' %s: %r' % (k, v))
-
- result['biom_summary'] = lines
- return result
-
-CommandConstructor = TableSummarizer
diff --git a/biom/err.py b/biom/err.py
index a25c536..7da399a 100644
--- a/biom/err.py
+++ b/biom/err.py
@@ -61,7 +61,6 @@ TableException: Empty table!
from warnings import warn
from sys import stdout
from contextlib import contextmanager
-import types
from biom.exception import TableException
diff --git a/biom/interfaces/__init__.py b/biom/interfaces/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/biom/interfaces/html/__init__.py b/biom/interfaces/html/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/biom/interfaces/html/config/__init__.py b/biom/interfaces/html/config/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/biom/interfaces/html/config/add_metadata.py b/biom/interfaces/html/config/add_metadata.py
deleted file mode 100644
index d400236..0000000
--- a/biom/interfaces/html/config/add_metadata.py
+++ /dev/null
@@ -1,99 +0,0 @@
-#!/usr/bin/env python
-
-# -----------------------------------------------------------------------------
-# Copyright (c) 2011-2013, The BIOM Format Development Team.
-#
-# Distributed under the terms of the Modified BSD License.
-#
-# The full license is in the file COPYING.txt, distributed with this software.
-# -----------------------------------------------------------------------------
-
-from pyqi.core.interfaces.html import (HTMLInputOption, HTMLDownload)
-from pyqi.core.command import (make_command_in_collection_lookup_f,
- make_command_out_collection_lookup_f)
-from pyqi.core.interfaces.html.output_handler import newline_list_of_strings
-from pyqi.core.interfaces.optparse.input_handler import string_list_handler
-
-from biom.interfaces.html.input_handler import load_biom_table
-from biom.commands.metadata_adder import CommandConstructor
-
-__author__ = "Evan Bolyen"
-__copyright__ = "Copyright 2011-2013, The BIOM Format Development Team"
-__credits__ = [
- "Evan Bolyen", "Jai Ram Rideout", "Greg Caporaso", "Morgan Langille",
- "Daniel McDonald"]
-__license__ = "BSD"
-__url__ = "http://biom-format.org"
-__maintainer__ = "Evan Bolyen"
-__email__ = "ebolyen at gmail.com"
-
-cmd_in_lookup = make_command_in_collection_lookup_f(CommandConstructor)
-cmd_out_lookup = make_command_out_collection_lookup_f(CommandConstructor)
-
-
-inputs = [
- HTMLInputOption(Parameter=cmd_in_lookup('table'),
- Type='upload_file',
- Handler=load_biom_table,
- Name='input-fp'),
-
- HTMLInputOption(Parameter=cmd_in_lookup('sample_metadata'),
- Type='upload_file',
- Name='sample-metadata-fp'),
-
- HTMLInputOption(Parameter=cmd_in_lookup('observation_metadata'),
- Type='upload_file',
- Name='observation-metadata-fp'),
-
- HTMLInputOption(Parameter=cmd_in_lookup('sc_separated'),
- Handler=string_list_handler,
- Help='comma-separated list of the metadata fields to '
- 'split on semicolons. This is useful for hierarchical '
- 'data such as taxonomy or functional categories'),
-
- HTMLInputOption(Parameter=cmd_in_lookup('sc_pipe_separated'),
- Handler=string_list_handler,
- Help='comma-separated list of the metadata fields to split'
- ' on semicolons and pipes ("|"). This is useful for '
- 'hierarchical data such as functional categories with '
- 'one-to-many mappings (e.g. x;y;z|x;y;w)'),
-
- HTMLInputOption(Parameter=cmd_in_lookup('int_fields'),
- Handler=string_list_handler,
- Help='comma-separated list of the metadata fields to cast '
- 'to integers. This is useful for integer data such as '
- '"DaysSinceStart"'),
-
- HTMLInputOption(Parameter=cmd_in_lookup('float_fields'),
- Handler=string_list_handler,
- Help='comma-separated list of the metadata fields to cast '
- 'to floating point numbers. This is useful for real number'
- ' data such as "pH"'),
-
- HTMLInputOption(Parameter=cmd_in_lookup('sample_header'),
- Handler=string_list_handler,
- Help='comma-separated list of the sample metadata field '
- 'names. This is useful if a header line is not provided '
- 'with the metadata, if you want to rename the fields, or '
- 'if you want to include only the first n fields where n is'
- ' the number of entries provided here'),
-
- HTMLInputOption(Parameter=cmd_in_lookup('observation_header'),
- Handler=string_list_handler,
- Help='comma-separated list of the observation metadata '
- 'field names. This is useful if a header line is not '
- 'provided with the metadata, if you want to rename the '
- 'fields, or if you want to include only the first n fields'
- ' where n is the number of entries provided here'),
- HTMLInputOption(Parameter=None,
- Name='download-file',
- Required=True,
- Help='the download file')
-]
-
-outputs = [
- HTMLDownload(Parameter=cmd_out_lookup('table'),
- Handler=newline_list_of_strings,
- FilenameLookup='download-file',
- FileExtension='.biom')
-]
diff --git a/biom/interfaces/html/config/convert.py b/biom/interfaces/html/config/convert.py
deleted file mode 100644
index c2d119f..0000000
--- a/biom/interfaces/html/config/convert.py
+++ /dev/null
@@ -1,74 +0,0 @@
-#!/usr/bin/env python
-
-# -----------------------------------------------------------------------------
-# Copyright (c) 2011-2013, The BIOM Format Development Team.
-#
-# Distributed under the terms of the Modified BSD License.
-#
-# The full license is in the file COPYING.txt, distributed with this software.
-# -----------------------------------------------------------------------------
-
-__author__ = "Evan Bolyen"
-__copyright__ = "Copyright 2011-2013, The BIOM Format Development Team"
-__credits__ = [
- "Evan Bolyen",
- "Jai Ram Rideout",
- "Greg Caporaso",
- "Daniel McDonald"]
-__license__ = "BSD"
-__url__ = "http://biom-format.org"
-__maintainer__ = "Evan Bolyen"
-__email__ = "ebolyen at gmail.com"
-
-from pyqi.core.interfaces.html import (HTMLInputOption, HTMLDownload)
-from pyqi.core.command import (make_command_in_collection_lookup_f,
- make_command_out_collection_lookup_f)
-from pyqi.core.interfaces.html.output_handler import newline_list_of_strings
-
-from biom.interfaces.html.input_handler import load_metadata
-from biom.commands.table_converter import CommandConstructor
-
-cmd_in_lookup = make_command_in_collection_lookup_f(CommandConstructor)
-cmd_out_lookup = make_command_out_collection_lookup_f(CommandConstructor)
-
-
-inputs = [
- HTMLInputOption(Parameter=cmd_in_lookup('table'),
- Type='upload_file',
- Help='the input table filepath, either in BIOM or classic '
- 'format'),
- HTMLInputOption(Parameter=cmd_in_lookup('to_tsv'),
- Type=bool),
- HTMLInputOption(Parameter=cmd_in_lookup('to_json'),
- Type=bool),
- HTMLInputOption(Parameter=cmd_in_lookup('to_hdf5'),
- Type=bool),
- HTMLInputOption(Parameter=cmd_in_lookup('sample_metadata'),
- Type='upload_file',
- Handler=load_metadata),
- HTMLInputOption(Parameter=cmd_in_lookup('observation_metadata'),
- Type='upload_file',
- Handler=load_metadata),
- HTMLInputOption(Parameter=cmd_in_lookup('header_key')),
- HTMLInputOption(Parameter=cmd_in_lookup('output_metadata_id')),
- HTMLInputOption(Parameter=cmd_in_lookup('process_obs_metadata'),
- Type='multiple_choice',
- Choices=['taxonomy', 'naive', 'sc_separated'],
- Help='Process metadata associated with observations when '
- 'converting from a classic table'),
- HTMLInputOption(Parameter=cmd_in_lookup('tsv_metadata_formatter'),
- Type='multiple_choice',
- Choices=['naive', 'sc_separated'],
- Help='Format the metadata for TSV output'),
- HTMLInputOption(Parameter=None,
- Name='download-file',
- Required=True,
- Help='the download file')
-]
-
-outputs = [
- HTMLDownload(Parameter=cmd_out_lookup('table'),
- Handler=newline_list_of_strings,
- FilenameLookup='download-file',
- FileExtension='.biom')
-]
diff --git a/biom/interfaces/html/config/show_install_info.py b/biom/interfaces/html/config/show_install_info.py
deleted file mode 100644
index f2c8c00..0000000
--- a/biom/interfaces/html/config/show_install_info.py
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/usr/bin/env python
-
-# -----------------------------------------------------------------------------
-# Copyright (c) 2011-2013, The BIOM Format Development Team.
-#
-# Distributed under the terms of the Modified BSD License.
-#
-# The full license is in the file COPYING.txt, distributed with this software.
-# -----------------------------------------------------------------------------
-
-__author__ = "Evan Bolyen"
-__copyright__ = "Copyright 2011-2013, The BIOM Format Development Team"
-__credits__ = [
- "Evan Bolyen",
- "Jai Ram Rideout",
- "Greg Caporaso",
- "Daniel McDonald"]
-__license__ = "BSD"
-__url__ = "http://biom-format.org"
-__maintainer__ = "Evan Bolyen"
-__email__ = "ebolyen at gmail.com"
-
-from pyqi.core.interfaces.html import HTMLPage
-from pyqi.core.command import make_command_out_collection_lookup_f
-from pyqi.core.interfaces.html.output_handler import html_list_of_strings
-from biom.commands.installation_informer import CommandConstructor
-
-cmd_out_lookup = make_command_out_collection_lookup_f(CommandConstructor)
-
-
-inputs = []
-
-outputs = [
- HTMLPage(Parameter=cmd_out_lookup('install_info_lines'),
- Handler=html_list_of_strings)
-]
diff --git a/biom/interfaces/html/config/summarize_table.py b/biom/interfaces/html/config/summarize_table.py
deleted file mode 100644
index 89619e1..0000000
--- a/biom/interfaces/html/config/summarize_table.py
+++ /dev/null
@@ -1,53 +0,0 @@
-#!/usr/bin/env python
-
-# -----------------------------------------------------------------------------
-# Copyright (c) 2011-2013, The BIOM Format Development Team.
-#
-# Distributed under the terms of the Modified BSD License.
-#
-# The full license is in the file COPYING.txt, distributed with this software.
-# -----------------------------------------------------------------------------
-
-__author__ = "Evan Bolyen"
-__copyright__ = "Copyright 2011-2013, The BIOM Format Development Team"
-__credits__ = [
- "Evan Bolyen",
- "Greg Caporaso",
- "Jai Ram Rideout",
- "Daniel McDonald"]
-__license__ = "BSD"
-__url__ = "http://biom-format.org"
-__maintainer__ = "Evan Bolyen"
-__email__ = "ebolyen at gmail.com"
-
-from pyqi.core.command import (make_command_in_collection_lookup_f,
- make_command_out_collection_lookup_f)
-from pyqi.core.interfaces.html.output_handler import newline_list_of_strings
-from pyqi.core.interfaces.html import (HTMLInputOption, HTMLDownload)
-from biom.commands.table_summarizer import CommandConstructor
-from biom.interfaces.html.input_handler import (
- load_biom_table_with_file_contents
- )
-
-cmd_in_lookup = make_command_in_collection_lookup_f(CommandConstructor)
-cmd_out_lookup = make_command_out_collection_lookup_f(CommandConstructor)
-
-inputs = [
- HTMLInputOption(Parameter=cmd_in_lookup('table'),
- Type="upload_file",
- Handler=load_biom_table_with_file_contents,
- Name='input-fp'),
- HTMLInputOption(Parameter=cmd_in_lookup('qualitative'),
- Type=bool),
- HTMLInputOption(Parameter=None,
- Name='download-file',
- Required=True,
- Help='the download file')
-]
-
-outputs = [
- HTMLDownload(Parameter=cmd_out_lookup('biom_summary'),
- Handler=newline_list_of_strings,
- FilenameLookup='download-file',
- FileExtension='.biom.summary.txt')
-]
diff --git a/biom/interfaces/html/config/validate_table.py b/biom/interfaces/html/config/validate_table.py
deleted file mode 100644
index 0e04f09..0000000
--- a/biom/interfaces/html/config/validate_table.py
+++ /dev/null
@@ -1,57 +0,0 @@
-#!/usr/bin/env python
-
-# -----------------------------------------------------------------------------
-# Copyright (c) 2011-2013, The BIOM Format Development Team.
-#
-# Distributed under the terms of the Modified BSD License.
-#
-# The full license is in the file COPYING.txt, distributed with this software.
-# -----------------------------------------------------------------------------
-
-__author__ = "Evan Bolyen"
-__copyright__ = "Copyright 2011-2013, The BIOM Format Development Team"
-__credits__ = ["Evan Bolyen", "Jai Ram Rideout", "Daniel McDonald"]
-__license__ = "BSD"
-__url__ = "http://biom-format.org"
-__maintainer__ = "Evan Bolyen"
-__email__ = "ebolyen at gmail.com"
-
-from pyqi.core.interfaces.html import (HTMLInputOption, HTMLPage)
-from pyqi.core.command import (make_command_in_collection_lookup_f,
- make_command_out_collection_lookup_f)
-from biom.commands.table_validator import CommandConstructor
-from biom.interfaces.html.input_handler import load_json_document
-
-cmd_in_lookup = make_command_in_collection_lookup_f(CommandConstructor)
-cmd_out_lookup = make_command_out_collection_lookup_f(CommandConstructor)
-
-
-def display_table_validity(result_key, data, option_value=None):
- if data is None:
- return "The input file is a valid BIOM-formatted file."
- else:
- return (
- "<br/>".join(["The input file is not a valid BIOM-formatted file."]
- + data)
- )
-
-
-inputs = [
- HTMLInputOption(Parameter=cmd_in_lookup('table'),
- Type='upload_file',
- Handler=load_json_document,
- Name='input-fp',
- Help='the input file to validate against the BIOM '
- 'format specification'),
-
- HTMLInputOption(Parameter=cmd_in_lookup('format_version')),
-
- HTMLInputOption(Parameter=cmd_in_lookup('detailed_report'), Type=bool),
-
- HTMLInputOption(Parameter=cmd_in_lookup('is_json'), Type=bool)
-]
-
-outputs = [
- HTMLPage(Parameter=cmd_out_lookup('report_lines'),
- Handler=display_table_validity)
-]
diff --git a/biom/interfaces/html/input_handler.py b/biom/interfaces/html/input_handler.py
deleted file mode 100644
index a849815..0000000
--- a/biom/interfaces/html/input_handler.py
+++ /dev/null
@@ -1,56 +0,0 @@
-#!/usr/bin/env python
-
-# ----------------------------------------------------------------------------
-# Copyright (c) 2011-2013, The BIOM Format Development Team.
-#
-# Distributed under the terms of the Modified BSD License.
-#
-# The full license is in the file COPYING.txt, distributed with this software.
-# ----------------------------------------------------------------------------
-
-__author__ = "Evan Bolyen"
-__copyright__ = "Copyright 2011-2013, The BIOM Format Development Team"
-__credits__ = ["Evan Bolyen", "Greg Caporaso", "Jai Ram Rideout"]
-__license__ = "BSD"
-__maintainer__ = "Evan Bolyen"
-__email__ = "ebolyen at gmail.com"
-
-import json
-from biom.parse import MetadataMap, parse_biom_table
-
-
-def load_biom_table(table_f):
- """Return a parsed BIOM table."""
- return parse_biom_table(table_f)
-
-
-def load_biom_table_with_file_contents(biom_f):
- """Return a BIOM table and the original open filehandle as a tuple.
-
- Useful when additional computation needs to be performed on the file
- contents, such as an MD5 sum.
-
- WARNING: this function does not close the open filehandle that it returns.
- Users of this function are responsible for closing the filehandle when done
- using it!
- """
- table = parse_biom_table(biom_f)
- if hasattr(biom_f, 'seek'):
- biom_f.seek(0)
- return table, biom_f
-
-
-def load_json_document(f):
- """Return a parsed JSON object."""
- return json.load(f)
-
-
-def load_metadata(lines):
- """Parse a sample/observation metadata file, return a ``MetadataMap``.
-
- If ``lines`` is ``None``, this function will return ``None``.
- """
- if lines is not None:
- return MetadataMap.from_file(lines)
-
- return None
diff --git a/biom/interfaces/optparse/__init__.py b/biom/interfaces/optparse/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/biom/interfaces/optparse/config/__init__.py b/biom/interfaces/optparse/config/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/biom/interfaces/optparse/config/add_metadata.py b/biom/interfaces/optparse/config/add_metadata.py
deleted file mode 100644
index 05f151a..0000000
--- a/biom/interfaces/optparse/config/add_metadata.py
+++ /dev/null
@@ -1,112 +0,0 @@
-#!/usr/bin/env python
-
-# ----------------------------------------------------------------------------
-# Copyright (c) 2011-2013, The BIOM Format Development Team.
-#
-# Distributed under the terms of the Modified BSD License.
-#
-# The full license is in the file COPYING.txt, distributed with this software.
-# ----------------------------------------------------------------------------
-
-from pyqi.core.interfaces.optparse import (OptparseOption,
- OptparseUsageExample,
- OptparseResult)
-from pyqi.core.command import (make_command_in_collection_lookup_f,
- make_command_out_collection_lookup_f)
-from pyqi.core.interfaces.optparse.input_handler import (file_reading_handler,
- string_list_handler)
-from biom.commands.metadata_adder import CommandConstructor
-from biom.interfaces.optparse.input_handler import load_biom_table
-from biom.interfaces.optparse.output_handler import write_biom_table
-
-__author__ = "Jai Ram Rideout"
-__copyright__ = "Copyright 2011-2013, The BIOM Format Development Team"
-__credits__ = ["Jai Ram Rideout", "Greg Caporaso", "Morgan Langille",
- "Daniel McDonald"]
-__license__ = "BSD"
-__url__ = "http://biom-format.org"
-__maintainer__ = "Jai Ram Rideout"
-__email__ = "jai.rideout at gmail.com"
-
-cmd_in_lookup = make_command_in_collection_lookup_f(CommandConstructor)
-cmd_out_lookup = make_command_out_collection_lookup_f(CommandConstructor)
-
-usage_examples = [
- OptparseUsageExample(ShortDesc="Adding sample metadata",
- LongDesc="Add sample metadata to a BIOM table",
- Ex="%prog -i otu_table.biom -o "
- "table_with_sample_metadata.biom -m "
- "sample_metadata.txt")
-]
-
-inputs = [
- OptparseOption(Parameter=cmd_in_lookup('table'),
- Type='existing_filepath',
- Handler=load_biom_table, ShortName='i',
- Name='input-fp'),
-
- OptparseOption(Parameter=cmd_in_lookup('sample_metadata'),
- Type='existing_filepath',
- Handler=file_reading_handler, ShortName='m',
- Name='sample-metadata-fp'),
-
- OptparseOption(Parameter=cmd_in_lookup('observation_metadata'),
- Type='existing_filepath',
- Handler=file_reading_handler,
- Name='observation-metadata-fp'),
-
- OptparseOption(Parameter=cmd_in_lookup('sc_separated'),
- Handler=string_list_handler,
- Help='comma-separated list of the metadata fields to split '
- 'on semicolons. This is useful for hierarchical data such '
- 'as taxonomy or functional categories'),
-
- OptparseOption(Parameter=cmd_in_lookup('sc_pipe_separated'),
- Handler=string_list_handler,
- Help='comma-separated list of the metadata fields to split '
- 'on semicolons and pipes ("|"). This is useful for '
- 'hierarchical data such as functional categories with '
- 'one-to-many mappings (e.g. x;y;z|x;y;w)'),
-
- OptparseOption(Parameter=cmd_in_lookup('int_fields'),
- Handler=string_list_handler,
- Help='comma-separated list of the metadata fields to cast '
- 'to integers. This is useful for integer data such as '
- '"DaysSinceStart"'),
-
- OptparseOption(Parameter=cmd_in_lookup('float_fields'),
- Handler=string_list_handler,
- Help='comma-separated list of the metadata fields to cast '
- 'to floating point numbers. This is useful for real number '
- 'data such as "pH"'),
-
- OptparseOption(Parameter=cmd_in_lookup('sample_header'),
- Handler=string_list_handler,
- Help='comma-separated list of the sample metadata field '
- 'names. This is useful if a header line is not provided '
- 'with the metadata, if you want to rename the fields, or '
- 'if you want to include only the first n fields where n is '
- 'the number of entries provided here'),
-
- OptparseOption(Parameter=cmd_in_lookup('observation_header'),
- Handler=string_list_handler,
- Help='comma-separated list of the observation metadata '
- 'field names. This is useful if a header line is not '
- 'provided with the metadata, if you want to rename the '
- 'fields, or if you want to include only the first n fields '
- 'where n is the number of entries provided here'),
-
- OptparseOption(Parameter=cmd_in_lookup('output_as_json'),
- Type=None,
- Action='store_true'),
-
- OptparseOption(Parameter=None, Type='new_filepath', ShortName='o',
- Name='output-fp', Required=True,
- Help='the output BIOM table')
-]
-
-outputs = [
- OptparseResult(Parameter=cmd_out_lookup('table'),
- Handler=write_biom_table,
- InputName='output-fp')
-]
diff --git a/biom/interfaces/optparse/config/convert.py b/biom/interfaces/optparse/config/convert.py
deleted file mode 100644
index 43e67a0..0000000
--- a/biom/interfaces/optparse/config/convert.py
+++ /dev/null
@@ -1,81 +0,0 @@
-#!/usr/bin/env python
-
-# ----------------------------------------------------------------------------
-# Copyright (c) 2011-2013, The BIOM Format Development Team.
-#
-# Distributed under the terms of the Modified BSD License.
-#
-# The full license is in the file COPYING.txt, distributed with this software.
-# ----------------------------------------------------------------------------
-
-__author__ = "Jai Ram Rideout"
-__copyright__ = "Copyright 2011-2013, The BIOM Format Development Team"
-__credits__ = ["Jai Ram Rideout", "Greg Caporaso", "Daniel McDonald"]
-__license__ = "BSD"
-__url__ = "http://biom-format.org"
-__maintainer__ = "Jai Ram Rideout"
-__email__ = "jai.rideout at gmail.com"
-
-from pyqi.core.interfaces.optparse import (OptparseUsageExample,
- OptparseOption, OptparseResult)
-from pyqi.core.command import (make_command_in_collection_lookup_f,
- make_command_out_collection_lookup_f)
-from biom.interfaces.optparse.input_handler import (load_biom_table,
- load_metadata)
-from biom.interfaces.optparse.output_handler import write_biom_table
-from biom.commands.table_converter import CommandConstructor
-
-cmd_in_lookup = make_command_in_collection_lookup_f(CommandConstructor)
-cmd_out_lookup = make_command_out_collection_lookup_f(CommandConstructor)
-
-usage_examples = [
- OptparseUsageExample(ShortDesc="Converting from classic to BIOM format",
- LongDesc="Convert the classic file table.txt to a "
- "HDF5 BIOM format OTU table",
- Ex='%prog -i table.txt -o table.biom '
- '--table-type "OTU table" --to-hdf5')
-]
-
-inputs = [
- OptparseOption(Parameter=cmd_in_lookup('table'),
- Type='existing_filepath',
- Handler=load_biom_table,
- ShortName='i', Name='input-fp',
- Help='the input table filepath, either in BIOM or classic '
- 'format'),
- OptparseOption(Parameter=cmd_in_lookup('sample_metadata'),
- Type='existing_filepath',
- Handler=load_metadata,
- ShortName='m',
- Name='sample-metadata-fp'),
- OptparseOption(Parameter=cmd_in_lookup('observation_metadata'),
- Type='existing_filepath',
- Handler=load_metadata, Name='observation-metadata-fp'),
- OptparseOption(Parameter=cmd_in_lookup('header_key')),
- OptparseOption(Parameter=cmd_in_lookup('output_metadata_id')),
- OptparseOption(Parameter=cmd_in_lookup('process_obs_metadata')),
- OptparseOption(Parameter=cmd_in_lookup('table_type')),
- OptparseOption(Parameter=cmd_in_lookup('tsv_metadata_formatter')),
- OptparseOption(Parameter=cmd_in_lookup('to_json'),
- Action='store_true'),
- OptparseOption(Parameter=cmd_in_lookup('to_tsv'),
- Action='store_true'),
- OptparseOption(Parameter=cmd_in_lookup('to_hdf5'),
- Action='store_true'),
- OptparseOption(Parameter=cmd_in_lookup('collapsed_samples'),
- Action='store_true'),
- OptparseOption(Parameter=cmd_in_lookup('collapsed_observations'),
- Action='store_true'),
- OptparseOption(Parameter=None,
- Type='new_filepath',
- ShortName='o',
- Name='output-fp',
- Required=True,
- Help='the output filepath')
-]
-
-outputs = [
- OptparseResult(Parameter=cmd_out_lookup('table'),
- Handler=write_biom_table,
- InputName='output-fp')
-]
diff --git a/biom/interfaces/optparse/config/show_install_info.py b/biom/interfaces/optparse/config/show_install_info.py
deleted file mode 100644
index 88bbaa7..0000000
--- a/biom/interfaces/optparse/config/show_install_info.py
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/usr/bin/env python
-
-# ----------------------------------------------------------------------------
-# Copyright (c) 2011-2013, The BIOM Format Development Team.
-#
-# Distributed under the terms of the Modified BSD License.
-#
-# The full license is in the file COPYING.txt, distributed with this software.
-# ----------------------------------------------------------------------------
-
-from pyqi.core.interfaces.optparse import (OptparseUsageExample,
- OptparseResult)
-from pyqi.core.command import make_command_out_collection_lookup_f
-from pyqi.core.interfaces.optparse.output_handler import print_list_of_strings
-from biom.commands.installation_informer import CommandConstructor
-
-__author__ = "Jai Ram Rideout"
-__copyright__ = "Copyright 2011-2013, The BIOM Format Development Team"
-__credits__ = ["Jai Ram Rideout", "Greg Caporaso", "Daniel McDonald"]
-__license__ = "BSD"
-__url__ = "http://biom-format.org"
-__maintainer__ = "Jai Ram Rideout"
-__email__ = "jai.rideout at gmail.com"
-
-cmd_out_lookup = make_command_out_collection_lookup_f(CommandConstructor)
-
-usage_examples = [
- OptparseUsageExample(ShortDesc="Displaying installation info",
- LongDesc="Display biom-format installation "
- "information",
- Ex="%prog")
-]
-
-inputs = []
-
-outputs = [
- OptparseResult(Parameter=cmd_out_lookup('install_info_lines'),
- Handler=print_list_of_strings)
-]
diff --git a/biom/interfaces/optparse/config/subset_table.py b/biom/interfaces/optparse/config/subset_table.py
deleted file mode 100644
index eb1b1e4..0000000
--- a/biom/interfaces/optparse/config/subset_table.py
+++ /dev/null
@@ -1,77 +0,0 @@
-#!/usr/bin/env python
-
-# ----------------------------------------------------------------------------
-# Copyright (c) 2011-2013, The BIOM Format Development Team.
-#
-# Distributed under the terms of the Modified BSD License.
-#
-# The full license is in the file COPYING.txt, distributed with this software.
-# ----------------------------------------------------------------------------
-
-from pyqi.core.interfaces.optparse import (OptparseOption,
- OptparseUsageExample,
- OptparseResult)
-from pyqi.core.command import (make_command_in_collection_lookup_f,
- make_command_out_collection_lookup_f)
-from pyqi.core.interfaces.optparse.input_handler import load_file_lines
-from biom.interfaces.optparse.input_handler import biom_load_file_contents
-from biom.interfaces.optparse.output_handler import write_subsetted_biom_table
-from biom.commands.table_subsetter import CommandConstructor
-
-__author__ = "Jai Ram Rideout"
-__copyright__ = "Copyright 2011-2013, The BIOM Format Development Team"
-__credits__ = ["Jai Ram Rideout", "Daniel McDonald",
- "Jose Antonio Navas Molina"]
-__license__ = "BSD"
-__url__ = "http://biom-format.org"
-__maintainer__ = "Jai Ram Rideout"
-__email__ = "jai.rideout at gmail.com"
-
-cmd_in_lookup = make_command_in_collection_lookup_f(CommandConstructor)
-cmd_out_lookup = make_command_out_collection_lookup_f(CommandConstructor)
-
-
-usage_examples = [
- OptparseUsageExample(ShortDesc="Subsetting a json BIOM table",
- LongDesc="Choose a subset of the observations in "
- "table.biom and write them to subset.biom",
- Ex="%prog -j table.biom -a observations -s "
- "observation_ids.txt -o subset.biom"),
- OptparseUsageExample(ShortDesc="Subsetting a hdf5 BIOM table",
- LongDesc="Choose a subset of the observations in "
- "table.biom and write them to subset.biom",
- Ex="%prog -i table.biom -a observations -s "
- "observation_ids.txt -o subset.biom")
-]
-
-inputs = [
- OptparseOption(Parameter=cmd_in_lookup('hdf5_table'),
- Type='existing_filepath',
- Handler=None, ShortName='i',
- Name='input-hdf5-fp',
- Help='the input hdf5 BIOM table filepath to subset'),
-
- OptparseOption(Parameter=cmd_in_lookup('json_table_str'),
- Type='existing_filepath',
- Handler=biom_load_file_contents, ShortName='j',
- Name='input-json-fp',
- Help='the input hdf5 BIOM table filepath to subset'),
-
- OptparseOption(Parameter=cmd_in_lookup('axis'), ShortName='a'),
-
- OptparseOption(Parameter=cmd_in_lookup('ids'),
- Type='existing_filepath', Handler=load_file_lines,
- ShortName='s', Help='a file containing a single column of '
- 'IDs to retain (either sample IDs or observation IDs, '
- 'depending on the axis)'),
-
- OptparseOption(Parameter=None, Type='new_filepath', ShortName='o',
- Name='output-fp', Required=True,
- Help='the output BIOM table filepath'),
-]
-
-outputs = [
- OptparseResult(Parameter=cmd_out_lookup('subsetted_table'),
- Handler=write_subsetted_biom_table,
- InputName='output-fp')
-]
diff --git a/biom/interfaces/optparse/config/summarize_table.py b/biom/interfaces/optparse/config/summarize_table.py
deleted file mode 100644
index 12fe33f..0000000
--- a/biom/interfaces/optparse/config/summarize_table.py
+++ /dev/null
@@ -1,64 +0,0 @@
-#!/usr/bin/env python
-
-# ----------------------------------------------------------------------------
-# Copyright (c) 2011-2013, The BIOM Format Development Team.
-#
-# Distributed under the terms of the Modified BSD License.
-#
-# The full license is in the file COPYING.txt, distributed with this software.
-# ----------------------------------------------------------------------------
-
-__author__ = "Greg Caporaso"
-__copyright__ = "Copyright 2011-2013, The BIOM Format Development Team"
-__credits__ = ["Greg Caporaso", "Jai Ram Rideout", "Daniel McDonald"]
-__license__ = "BSD"
-__maintainer__ = "Greg Caporaso"
-__email__ = "gregcaporaso at gmail.com"
-
-from pyqi.core.command import (make_command_in_collection_lookup_f,
- make_command_out_collection_lookup_f)
-from pyqi.core.interfaces.optparse import (OptparseOption,
- OptparseUsageExample,
- OptparseResult)
-from pyqi.core.interfaces.optparse.output_handler import (
- write_or_print_list_of_strings)
-from biom.commands.table_summarizer import CommandConstructor
-from biom.interfaces.optparse.input_handler import (
- load_biom_table_with_file_contents)
-
-cmd_in_lookup = make_command_in_collection_lookup_f(CommandConstructor)
-cmd_out_lookup = make_command_out_collection_lookup_f(CommandConstructor)
-
-usage_examples = [
- OptparseUsageExample(ShortDesc="Basic script usage",
- LongDesc="Write a summary of table.biom to "
- "table_summary.txt",
- Ex="%prog -i table.biom -o table_summary.txt")
-]
-
-inputs = [
- OptparseOption(Parameter=cmd_in_lookup('table'),
- Type="existing_filepath",
- Handler=load_biom_table_with_file_contents,
- ShortName='i',
- Name='input-fp'),
- OptparseOption(Parameter=cmd_in_lookup('qualitative'),
- Type=None,
- Action="store_true"),
- OptparseOption(Parameter=cmd_in_lookup('observations'),
- Type=None,
- Action='store_true'),
- OptparseOption(Parameter=None,
- Type='new_filepath',
- ShortName='o',
- Name='output-fp',
- Required=False,
- Default=None,
- Help='the output filepath')
-]
-
-outputs = [
- OptparseResult(Parameter=cmd_out_lookup('biom_summary'),
- Handler=write_or_print_list_of_strings,
- InputName='output-fp')
-]
diff --git a/biom/interfaces/optparse/config/validate_table.py b/biom/interfaces/optparse/config/validate_table.py
deleted file mode 100644
index 2a479b9..0000000
--- a/biom/interfaces/optparse/config/validate_table.py
+++ /dev/null
@@ -1,69 +0,0 @@
-#!/usr/bin/env python
-
-# ----------------------------------------------------------------------------
-# Copyright (c) 2011-2013, The BIOM Format Development Team.
-#
-# Distributed under the terms of the Modified BSD License.
-#
-# The full license is in the file COPYING.txt, distributed with this software.
-# ----------------------------------------------------------------------------
-
-import sys
-from pyqi.core.interfaces.optparse import (OptparseOption,
- OptparseUsageExample,
- OptparseResult)
-from pyqi.core.command import (make_command_in_collection_lookup_f,
- make_command_out_collection_lookup_f)
-from pyqi.core.interfaces.optparse.output_handler import print_list_of_strings
-from biom.commands.table_validator import CommandConstructor
-from biom.interfaces.optparse.input_handler import load_hdf5_or_json
-
-__author__ = "Jai Ram Rideout"
-__copyright__ = "Copyright 2011-2013, The BIOM Format Development Team"
-__credits__ = ["Jai Ram Rideout", "Daniel McDonald"]
-__license__ = "BSD"
-__url__ = "http://biom-format.org"
-__maintainer__ = "Jai Ram Rideout"
-__email__ = "jai.rideout at gmail.com"
-
-cmd_in_lookup = make_command_in_collection_lookup_f(CommandConstructor)
-cmd_out_lookup = make_command_out_collection_lookup_f(CommandConstructor)
-
-
-def report_table_validity(result_key, data, option_value=None):
- if data:
- print "The input file is a valid BIOM-formatted file."
- sys.exit(0)
- else:
- print "The input file is not a valid BIOM-formatted file."
- sys.exit(1)
-
-usage_examples = [
- OptparseUsageExample(ShortDesc="Validating a BIOM file",
- LongDesc="Validate the contents of table.biom for "
- "adherence to the BIOM format specification",
- Ex="%prog -i table.biom")
-]
-
-inputs = [
- OptparseOption(Parameter=cmd_in_lookup('table'),
- Type='existing_filepath',
- Handler=None, ShortName='i',
- Name='input-fp',
- Help='the input filepath to validate against the BIOM '
- 'format specification'),
- OptparseOption(Parameter=cmd_in_lookup('is_json'),
- Type=None,
- Action='store_true'),
- OptparseOption(Parameter=cmd_in_lookup('format_version'), ShortName='f'),
-
- OptparseOption(Parameter=cmd_in_lookup('detailed_report'), Type=None,
- Action='store_true')
-]
-
-outputs = [
- OptparseResult(Parameter=cmd_out_lookup('report_lines'),
- Handler=print_list_of_strings),
- OptparseResult(Parameter=cmd_out_lookup('valid_table'),
- Handler=report_table_validity)
-]
diff --git a/biom/interfaces/optparse/input_handler.py b/biom/interfaces/optparse/input_handler.py
deleted file mode 100644
index 6386ec7..0000000
--- a/biom/interfaces/optparse/input_handler.py
+++ /dev/null
@@ -1,79 +0,0 @@
-#!/usr/bin/env python
-
-# ----------------------------------------------------------------------------
-# Copyright (c) 2011-2013, The BIOM Format Development Team.
-#
-# Distributed under the terms of the Modified BSD License.
-#
-# The full license is in the file COPYING.txt, distributed with this software.
-# ----------------------------------------------------------------------------
-
-__author__ = "Greg Caporaso"
-__copyright__ = "Copyright 2011-2013, The BIOM Format Development Team"
-__credits__ = ["Greg Caporaso", "Jai Ram Rideout"]
-__license__ = "BSD"
-__maintainer__ = "Greg Caporaso"
-__email__ = "gregcaporaso at gmail.com"
-
-import json
-from biom.util import biom_open
-from biom.parse import MetadataMap, parse_biom_table
-from pyqi.core.interfaces.optparse.input_handler import load_file_contents
-
-
-def biom_load_file_contents(fp):
- if fp is None:
- return fp
- return load_file_contents(fp)
-
-
-def load_hdf5_or_json(fp):
- """Return a parsed JSON object or an HDF5 object"""
- with biom_open(fp) as f:
- if hasattr(f, 'seek'):
- return json.load(f)
- else:
- return f
-
-
-def load_biom_table(biom_fp):
- """Return a parsed BIOM table."""
- with biom_open(biom_fp, 'U') as table_f:
- return parse_biom_table(table_f)
-
-
-def load_biom_table_with_file_contents(biom_fp):
- """Return a BIOM table and the original open filehandle as a tuple.
-
- Useful when additional computation needs to be performed on the file
- contents, such as an MD5 sum.
-
- WARNING: this function does not close the open filehandle that it returns.
- Users of this function are responsible for closing the filehandle when done
- using it!
- """
- with biom_open(biom_fp, 'U') as biom_f:
- table = parse_biom_table(biom_f)
-
- if hasattr(biom_f, 'seek'):
- biom_f.seek(0)
-
- return table, biom_f
-
-
-def load_json_document(fp):
- """Return a parsed JSON object."""
- with biom_open(fp, 'U') as f:
- return json.load(f)
-
-
-def load_metadata(fp):
- """Parse a sample/observation metadata file, return a ``MetadataMap``.
-
- If ``fp`` is ``None``, this function will return ``None``.
- """
- if fp is None:
- return None
- else:
- with open(fp, 'U') as f:
- return MetadataMap.from_file(f)
diff --git a/biom/interfaces/optparse/output_handler.py b/biom/interfaces/optparse/output_handler.py
deleted file mode 100644
index a34ec57..0000000
--- a/biom/interfaces/optparse/output_handler.py
+++ /dev/null
@@ -1,80 +0,0 @@
-#!/usr/bin/env python
-
-# ----------------------------------------------------------------------------
-# Copyright (c) 2011-2013, The BIOM Format Development Team.
-#
-# Distributed under the terms of the Modified BSD License.
-#
-# The full license is in the file COPYING.txt, distributed with this software.
-# ----------------------------------------------------------------------------
-
-__author__ = "Greg Caporaso"
-__copyright__ = "Copyright 2011-2013, The BIOM Format Development Team"
-__credits__ = ["Greg Caporaso", "Jai Ram Rideout"]
-__license__ = "BSD"
-__maintainer__ = "Greg Caporaso"
-__email__ = "gregcaporaso at gmail.com"
-
-from os.path import exists
-from pyqi.core.exception import IncompetentDeveloperError
-from pyqi.core.interfaces.optparse.output_handler import write_list_of_strings
-from biom.parse import generatedby
-from biom.util import HAVE_H5PY
-
-
-def write_subsetted_biom_table(result_key, data, option_value=None):
- """Write a string to a file"""
- if option_value is None:
- raise IncompetentDeveloperError("Cannot write output without a "
- "filepath.")
-
- if exists(option_value):
- raise IOError("Output path '%s' already exists." % option_value)
-
- table, fmt = data
-
- if fmt not in ['hdf5', 'json']:
- raise IncompetentDeveloperError("Unknown file format")
-
- if fmt == 'json':
- write_list_of_strings(result_key, table, option_value)
- else:
- if HAVE_H5PY:
- import h5py
- else:
- # This should never be raised here
- raise ImportError("h5py is not available, cannot write HDF5!")
-
- with h5py.File(option_value, 'w') as f:
- table.to_hdf5(f, generatedby())
-
-
-def write_biom_table(result_key, data, option_value=None):
- """Write a string to a file"""
- if option_value is None:
- raise IncompetentDeveloperError("Cannot write output without a "
- "filepath.")
-
- if exists(option_value):
- raise IOError("Output path '%s' already exists." % option_value)
-
- table, fmt = data
-
- if fmt not in ['hdf5', 'json', 'tsv']:
- raise IncompetentDeveloperError("Unknown file format")
-
- if fmt == 'json':
- with open(option_value, 'w') as f:
- f.write(table.to_json(generatedby()))
- elif fmt == 'tsv':
- with open(option_value, 'w') as f:
- f.write(table)
- f.write('\n')
- else:
- if HAVE_H5PY:
- import h5py
- else:
- raise ImportError("h5py is not available, cannot write HDF5!")
-
- with h5py.File(option_value, 'w') as f:
- table.to_hdf5(f, generatedby())
diff --git a/biom/parse.py b/biom/parse.py
index 14e4a41..7cbb243 100644
--- a/biom/parse.py
+++ b/biom/parse.py
@@ -9,13 +9,15 @@
# ----------------------------------------------------------------------------
from __future__ import division
-from string import maketrans
+
import numpy as np
-from biom import __version__
+from future.utils import string_types
+
from biom.exception import BiomParseException, UnknownAxisError
from biom.table import Table
-from biom.util import biom_open
+from biom.util import biom_open, __version__
import json
+import collections
__author__ = "Justin Kuczynski"
__copyright__ = "Copyright 2011-2013, The BIOM Format Development Team"
@@ -27,8 +29,8 @@ __url__ = "http://biom-format.org"
__maintainer__ = "Daniel McDonald"
__email__ = "daniel.mcdonald at colorado.edu"
-MATRIX_ELEMENT_TYPE = {'int': int, 'float': float, 'unicode': unicode,
- u'int': int, u'float': float, u'unicode': unicode}
+MATRIX_ELEMENT_TYPE = {'int': int, 'float': float, 'unicode': str,
+ u'int': int, u'float': float, u'unicode': str}
QUOTE = '"'
JSON_OPEN = set(["[", "{"])
@@ -125,7 +127,7 @@ def direct_slice_data(biom_str, to_keep, axis):
# determine shape
raw_shape = shape_kv_pair.split(':')[-1].replace("[", "").replace("]", "")
- n_rows, n_cols = map(int, raw_shape.split(","))
+ n_rows, n_cols = list(map(int, raw_shape.split(",")))
# slice to just data
data_start = data_fields.find('[') + 1
@@ -157,18 +159,20 @@ def direct_slice_data(biom_str, to_keep, axis):
return '"data": %s, "shape": %s' % (new_data, new_shape)
-STRIP_F = lambda x: x.strip("[] \n\t")
+
+def strip_f(x):
+ return x.strip("[] \n\t")
def _remap_axis_sparse_obs(rcv, lookup):
"""Remap a sparse observation axis"""
- row, col, value = map(STRIP_F, rcv.split(','))
+ row, col, value = list(map(strip_f, rcv.split(',')))
return "%s,%s,%s" % (lookup[row], col, value)
def _remap_axis_sparse_samp(rcv, lookup):
"""Remap a sparse sample axis"""
- row, col, value = map(STRIP_F, rcv.split(','))
+ row, col, value = list(map(strip_f, rcv.split(',')))
return "%s,%s,%s" % (row, lookup[col], value)
@@ -182,7 +186,7 @@ def _direct_slice_data_sparse_obs(data, to_keep):
new_data = []
remap_lookup = dict([(str(v), i) for i, v in enumerate(sorted(to_keep))])
for rcv in data.split('],'):
- r, c, v = STRIP_F(rcv).split(',')
+ r, c, v = strip_f(rcv).split(',')
if r in remap_lookup:
new_data.append(_remap_axis_sparse_obs(rcv, remap_lookup))
return '[[%s]]' % '],['.join(new_data)
@@ -244,6 +248,98 @@ def get_axis_indices(biom_str, to_keep, axis):
return idxs, json.dumps(subset)[1:-1] # trim off { and }
+def parse_uc(fh):
+ """ Create a Table object from a uclust/usearch/vsearch uc file.
+
+ Parameters
+ ----------
+ fh : file handle
+ The ``.uc`` file to be parsed.
+
+ Returns
+ -------
+ biom.Table : The resulting BIOM table.
+
+ Raises
+ ------
+ ValueError
+ If a sequence identifier is encountered that doesn't have at least
+ one underscore in it (see Notes).
+
+ Notes
+ -----
+ This function assumes sequence identifiers in this file are in QIIME's
+ "post-split-libraries" format, where the identifiers are of the form
+ ``<sample-id>_<sequence-id>``. Everything before the first underscore
+ will be used as the sample identifier in the resulting ``Table``.
+ The information after the first underscore is not used directly, though
+ the full identifiers of seeds will be used as the observation
+ identifier in the resulting ``Table``.
+
+ """
+ data = collections.defaultdict(int)
+ sample_idxs = {}
+ sample_ids = []
+ observation_idxs = {}
+ observation_ids = []
+ # The types of hit lines we need here are hit (H), seed (S) and
+ # library seed (L). Store these in a set for quick reference.
+ line_types = set('HSL')
+ for line in fh:
+ # determine if the current line is one that we need
+ line = line.strip()
+ if not line:
+ continue
+ fields = line.split('\t')
+
+ line_type = fields[0]
+ if line_type not in line_types:
+ continue
+
+ # grab the fields we care about
+ observation_id = fields[9].split()[0]
+ query_id = fields[8].split()[0]
+
+ if observation_id == '*':
+ # S and L lines don't have a separate observation id
+ observation_id = query_id
+
+ # get the index of the current observation id, or create it if it's
+ # the first time we're seeing this id
+ if observation_id in observation_idxs:
+ observation_idx = observation_idxs[observation_id]
+ else:
+ observation_idx = len(observation_ids)
+ observation_ids.append(observation_id)
+ observation_idxs[observation_id] = observation_idx
+
+ if line_type == 'H' or line_type == 'S':
+ # get the sample id
+ try:
+ underscore_index = query_id.index('_')
+ except ValueError:
+ raise ValueError(
+ "A query sequence was encountered that does not have an "
+ "underscore. An underscore is required in all query "
+ "sequence identifiers to indicate the sample identifier.")
+ # get the sample id and its index, creating the index if it is the
+ # first time we're seeing this id
+ sample_id = query_id[:underscore_index]
+ if sample_id in sample_idxs:
+ sample_idx = sample_idxs[sample_id]
+ else:
+ sample_idx = len(sample_ids)
+ sample_idxs[sample_id] = sample_idx
+ sample_ids.append(sample_id)
+ # increment the count of the current observation in the current
+ # sample by one.
+ data[(observation_idx, sample_idx)] += 1
+ else:
+ # nothing else needs to be done for 'L' records
+ pass
+ return Table(data, observation_ids=observation_ids, sample_ids=sample_ids)
+
+
def parse_biom_table(fp, ids=None, axis='sample', input_is_dense=False):
r"""Parses the biom table stored in the filepath `fp`
@@ -297,12 +393,17 @@ def parse_biom_table(fp, ids=None, axis='sample', input_is_dense=False):
return Table.from_hdf5(fp, ids=ids, axis=axis)
except:
pass
-
if hasattr(fp, 'read'):
old_pos = fp.tell()
- try:
+ # Read in characters until first non-whitespace
+ # If it is a {, then this is (most likely) JSON
+ c = fp.read(1)
+ while c.isspace():
+ c = fp.read(1)
+ if c == '{':
+ fp.seek(old_pos)
t = Table.from_json(json.load(fp), input_is_dense=input_is_dense)
- except ValueError:
+ else:
fp.seek(old_pos)
t = Table.from_tsv(fp, None, None, lambda x: x)
elif isinstance(fp, list):
@@ -314,12 +415,16 @@ def parse_biom_table(fp, ids=None, axis='sample', input_is_dense=False):
else:
t = Table.from_json(json.loads(fp), input_is_dense=input_is_dense)
+ def subset_ids(data, id_, md):
+ return id_ in ids
+
+ def gt_zero(vals, id_, md):
+ return np.any(vals)
+
if ids is not None:
- f = lambda data, id_, md: id_ in ids
- t.filter(f, axis=axis)
+ t.filter(subset_ids, axis=axis)
axis = 'observation' if axis == 'sample' else 'sample'
- f = lambda vals, id_, md: np.any(vals)
- t.filter(f, axis=axis)
+ t.filter(gt_zero, axis=axis)
return t
@@ -371,18 +476,22 @@ class MetadataMap(dict):
if strip_quotes:
if suppress_stripping:
- # remove quotes but not spaces
- strip_f = lambda x: x.replace('"', '')
+ def strip_f(x):
+ # remove quotes but not spaces
+ return x.replace('"', '')
else:
- # remove quotes and spaces
- strip_f = lambda x: x.replace('"', '').strip()
+ def strip_f(x):
+ # remove quotes and spaces
+ return x.replace('"', '').strip()
else:
if suppress_stripping:
- # don't remove quotes or spaces
- strip_f = lambda x: x
+ def strip_f(x):
+ # don't remove quotes or spaces
+ return x
else:
- # remove spaces but not quotes
- strip_f = lambda x: x.strip()
+ def strip_f(x):
+ # remove spaces but not quotes
+ return x.strip()
# if the user didn't provide process functions, initialize as
# an empty dict
@@ -409,7 +518,7 @@ class MetadataMap(dict):
comments.append(line)
else:
# Will add empty string to empty fields
- tmp_line = map(strip_f, line.split('\t'))
+ tmp_line = list(map(strip_f, line.split('\t')))
if len(tmp_line) < len(header):
tmp_line.extend([''] * (len(header) - len(tmp_line)))
mapping_data.append(tmp_line)
@@ -473,11 +582,10 @@ def biom_meta_to_string(metadata, replace_str=':'):
# Note that since ';' and '|' are used as seperators we must replace them
# if they exist
- # metadata is just a string (not a list)
- if isinstance(metadata, str) or isinstance(metadata, unicode):
+ if isinstance(metadata, string_types):
return metadata.replace(';', replace_str)
elif isinstance(metadata, list):
- transtab = maketrans(';|', ''.join([replace_str, replace_str]))
+ transtab = bytes.maketrans(';|', ''.join([replace_str, replace_str]))
# metadata is list of lists
if isinstance(metadata[0], list):
new_metadata = []
diff --git a/biom/table.py b/biom/table.py
index 6ecba32..8caac29 100644
--- a/biom/table.py
+++ b/biom/table.py
@@ -179,17 +179,18 @@ from datetime import datetime
from json import dumps
from functools import reduce
from operator import itemgetter, add
-from itertools import izip
+from future.builtins import zip
+from future.utils import viewitems
from collections import defaultdict, Hashable, Iterable
from numpy import ndarray, asarray, zeros, newaxis
from scipy.sparse import coo_matrix, csc_matrix, csr_matrix, isspmatrix, vstack
+from future.utils import string_types
from biom.exception import TableException, UnknownAxisError, UnknownIDError
from biom.util import (get_biom_format_version_string,
get_biom_format_url_string, flatten, natsort,
prefer_self, index_list, H5PY_VLEN_STR, HAVE_H5PY,
- H5PY_VLEN_UNICODE)
-from biom import __format_version__
+ __format_version__)
from biom.err import errcheck
from ._filter import _filter
from ._transform import _transform
@@ -208,8 +209,78 @@ __maintainer__ = "Daniel McDonald"
__email__ = "daniel.mcdonald at colorado.edu"
-MATRIX_ELEMENT_TYPE = {'int': int, 'float': float, 'unicode': unicode,
- u'int': int, u'float': float, u'unicode': unicode}
+MATRIX_ELEMENT_TYPE = {'int': int, 'float': float, 'unicode': str,
+ u'int': int, u'float': float, u'unicode': str}
+
+
+def general_parser(x):
+ return x
+
+
+def vlen_list_of_str_parser(value):
+ """Parses the taxonomy value"""
+ new_value = []
+ for v in value:
+ if v:
+ if isinstance(v, bytes):
+ v = v.decode('utf8')
+ new_value.append(v)
+
+ return new_value if new_value else None
+
+
+def general_formatter(grp, header, md, compression):
+ """Creates a dataset for a general atomic type category"""
+ test_val = md[0][header]
+ shape = (len(md),)
+ name = 'metadata/%s' % header
+ if isinstance(test_val, string_types):
+ grp.create_dataset(name, shape=shape,
+ dtype=H5PY_VLEN_STR,
+ data=[m[header].encode('utf8') for m in md],
+ compression=compression)
+ else:
+ grp.create_dataset(
+ 'metadata/%s' % header, shape=(len(md),),
+ data=[m[header] for m in md],
+ compression=compression)
+
+
+def vlen_list_of_str_formatter(grp, header, md, compression):
+ """Creates a (N, ?) vlen str dataset"""
+ # It is possible that the value for some sample/observation
+ # is None. In that case, we still need to see them as
+ # iterables, but their length will be 0
+ iterable_checks = []
+ lengths = []
+ for m in md:
+ if m[header] is None:
+ iterable_checks.append(True)
+ else:
+ iterable_checks.append(
+ isinstance(m.get(header, []), Iterable))
+ lengths.append(len(m[header]))
+
+ if not np.all(iterable_checks):
+ raise TypeError(
+ "Category %s not formatted correctly. Did you pass"
+ " --process-obs-metadata taxonomy when converting "
+ " from tsv?")
+
+ max_list_len = max(lengths)
+ shape = (len(md), max_list_len)
+ data = np.empty(shape, dtype=object)
+ for i, m in enumerate(md):
+ if m[header] is None:
+ continue
+ value = np.asarray(m[header])
+ data[i, :len(value)] = [v.encode('utf8') for v in value]
+ # Change the None entries on data to empty strings ""
+ data = np.where(data == np.array(None), "", data)
+ grp.create_dataset(
+ 'metadata/%s' % header, shape=shape,
+ dtype=H5PY_VLEN_STR, data=data,
+ compression=compression)
class Table(object):
@@ -447,6 +518,32 @@ class Table(object):
"""The sparse matrix object"""
return self._data
+ def length(self, axis='sample'):
+ """Return the length of an axis
+
+ Parameters
+ ----------
+ axis : {'sample', 'observation'}, optional
+ The axis to operate on
+
+ Raises
+ ------
+ UnknownAxisError
+ If provided an unrecognized axis.
+
+ Examples
+ --------
+ >>> from biom import example_table
+ >>> print example_table.length(axis='sample')
+ 3
+ >>> print example_table.length(axis='observation')
+ 2
+ """
+ if axis not in ('sample', 'observation'):
+ raise UnknownAxisError(axis)
+
+ return self.shape[1] if axis == 'sample' else self.shape[0]
+
def add_group_metadata(self, group_md, axis='sample'):
"""Take a dict of group metadata and add it to an axis
@@ -487,7 +584,7 @@ class Table(object):
"""
metadata = self.metadata(axis=axis)
if metadata is not None:
- for id_, md_entry in md.iteritems():
+ for id_, md_entry in viewitems(md):
if self.exists(id_, axis=axis):
idx = self.index(id_, axis=axis)
metadata[idx].update(md_entry)
@@ -741,6 +838,65 @@ class Table(object):
self.ids()[:], self.ids(axis='observation')[:],
sample_md_copy, obs_md_copy, self.table_id)
+ def head(self, n=5, m=5):
+ """Get the first n rows and m columns from self
+
+ Parameters
+ ----------
+ n : int, optional
+ The number of rows (observations) to get. This number must be
+ greater than 0. If not specified, 5 rows will be retrieved.
+
+ m : int, optional
+ The number of columns (samples) to get. This number must be
+ greater than 0. If not specified, 5 columns will be
+ retrieved.
+
+ Notes
+ -----
+ Like `head` for Linux like systems, requesting more rows (or columns)
+ than exists will silently work.
+
+ Raises
+ ------
+ IndexError
+ If `n` or `m` are <= 0.
+
+ Returns
+ -------
+ Table
+ The subset table.
+
+ Examples
+ --------
+ >>> import numpy as np
+ >>> from biom.table import Table
+ >>> data = np.arange(100).reshape(5, 20)
+ >>> obs_ids = ['O%d' % i for i in range(1, 6)]
+ >>> samp_ids = ['S%d' % i for i in range(1, 21)]
+ >>> table = Table(data, obs_ids, samp_ids)
+ >>> print table.head() # doctest: +NORMALIZE_WHITESPACE
+ # Constructed from biom file
+ #OTU ID S1 S2 S3 S4 S5
+ O1 0.0 1.0 2.0 3.0 4.0
+ O2 20.0 21.0 22.0 23.0 24.0
+ O3 40.0 41.0 42.0 43.0 44.0
+ O4 60.0 61.0 62.0 63.0 64.0
+ O5 80.0 81.0 82.0 83.0 84.0
+
+ """
+ if n <= 0:
+ raise IndexError("n cannot be <= 0.")
+
+ if m <= 0:
+ raise IndexError("m cannot be <= 0.")
+
+ row_ids = self.ids(axis='observation')[:n]
+ col_ids = self.ids(axis='sample')[:m]
+
+ table = self.filter(row_ids, axis='observation', inplace=False)
+ return table.filter(col_ids, axis='sample')
+
def group_metadata(self, axis='sample'):
"""Return the group metadata of the given axis
@@ -795,7 +951,7 @@ class Table(object):
Parameters
----------
axis : {'sample', 'observation'}, optional
- Axis to search for `id`. Defaults to 'sample'
+ Axis to return ids from. Defaults to 'sample'
Returns
-------
@@ -834,6 +990,86 @@ class Table(object):
else:
raise UnknownAxisError(axis)
+ def update_ids(self, id_map, axis='sample', strict=True, inplace=True):
+ """Update the ids along the given axis
+
+ Parameters
+ ----------
+ id_map : dict
+ Mapping of old to new ids
+ axis : {'sample', 'observation'}, optional
+ Axis to search for `id`. Defaults to 'sample'
+ strict : bool, optional
+ If ``True``, raise an error if an id is present in the given axis
+ but is not a key in ``id_map``. If False, retain old identifier
+ for ids that are present in the given axis but are not keys in
+ ``id_map``.
+ inplace : bool, optional
+ If ``True`` the ids are updated in ``self``; if ``False`` the ids
+ are updated in a new table is returned.
+
+ Returns
+ -------
+ Table
+ Table object where ids have been updated.
+
+ Raises
+ ------
+ UnknownAxisError
+ If provided an unrecognized axis.
+ TableException
+ If an id from ``self`` is not in ``id_map`` and ``strict`` is
+ ``True``.
+
+ Examples
+ --------
+ Create a 2x3 BIOM table:
+
+ >>> data = np.asarray([[0, 0, 1], [1, 3, 42]])
+ >>> table = Table(data, ['O1', 'O2'], ['S1', 'S2', 'S3'])
+
+ Define a mapping of old to new sample ids:
+
+ >>> id_map = {'S1':'s1.1', 'S2':'s2.2', 'S3':'s3.3'}
+
+ Get the ids along the sample axis in the table:
+
+ >>> print table.ids(axis='sample')
+ ['S1' 'S2' 'S3']
+
+ Update the sample ids and get the ids along the sample axis in the
+ updated table:
+
+ >>> updated_table = table.update_ids(id_map, axis='sample')
+ >>> print updated_table.ids(axis='sample')
+ ['s1.1' 's2.2' 's3.3']
+ """
+ updated_ids = zeros(self.ids(axis=axis).size, dtype=object)
+ for idx, old_id in enumerate(self.ids(axis=axis)):
+ if strict and old_id not in id_map:
+ raise TableException(
+ "Mapping not provided for %s identifier: %s. If this "
+ "identifier should not be updated, pass strict=False."
+ % (axis, old_id))
+
+ updated_ids[idx] = id_map.get(old_id, old_id)
+
+ # prepare the result object and update the ids along the specified
+ # axis
+ result = self if inplace else self.copy()
+ if axis == 'sample':
+ result._sample_ids = updated_ids
+ else:
+ result._observation_ids = updated_ids
+
+ result._index_ids()
+
+ # check for errors (specifically, we want to esnsure that duplicate
+ # ids haven't been introduced)
+ errcheck(result)
+
+ return result
+
def _get_sparse_data(self, axis='sample'):
"""Returns the internal data in the correct sparse representation
@@ -978,6 +1214,25 @@ class Table(object):
-------
float
The data value corresponding to the specified matrix position
+
+ Examples
+ --------
+ >>> import numpy as np
+ >>> from biom.table import Table
+
+ Create a 2x3 BIOM table:
+
+ >>> data = np.asarray([[0, 0, 1], [1, 3, 42]])
+ >>> table = Table(data, ['O1', 'O2'], ['S1', 'S2', 'Z3'])
+
+ Retrieve the number of counts for observation `O1` in sample `Z3`.
+
+ >>> print table.get_value_by_ids('O2', 'Z3')
+ 42.0
+
+ See Also
+ --------
+ Table.data
"""
return self[self.index(obs_id, 'observation'),
self.index(samp_id, 'sample')]
@@ -1045,9 +1300,9 @@ class Table(object):
"""
return id in self._index(axis=axis)
- def delimited_self(self, delim='\t', header_key=None, header_value=None,
+ def delimited_self(self, delim=u'\t', header_key=None, header_value=None,
metadata_formatter=str,
- observation_column_name='#OTU ID'):
+ observation_column_name=u'#OTU ID'):
"""Return self as a string in a delimited form
Default str output for the Table is just row/col ids and table data
@@ -1071,11 +1326,16 @@ class Table(object):
OTU1\t10\t2
OTU2\t4\t8
"""
+ def to_utf8(i):
+ if isinstance(i, bytes):
+ return i.decode('utf8')
+ else:
+ return str(i)
+
if self.is_empty():
raise TableException("Cannot delimit self if I don't have data...")
- samp_ids = delim.join(map(str, self.ids()))
-
+ samp_ids = delim.join([to_utf8(i) for i in self.ids()])
# 17 hrs of straight programming later...
if header_key is not None:
if header_value is None:
@@ -1088,26 +1348,25 @@ class Table(object):
"You need to specify both header_key and header_value")
if header_value:
- output = ['# Constructed from biom file',
- '%s%s%s\t%s' % (observation_column_name, delim, samp_ids,
- header_value)]
+ output = [u'# Constructed from biom file',
+ u'%s%s%s\t%s' % (observation_column_name, delim,
+ samp_ids, header_value)]
else:
output = ['# Constructed from biom file',
'%s%s%s' % (observation_column_name, delim, samp_ids)]
-
obs_metadata = self.metadata(axis='observation')
for obs_id, obs_values in zip(self.ids(axis='observation'),
self._iter_obs()):
str_obs_vals = delim.join(map(str, self._to_dense(obs_values)))
-
+ obs_id = to_utf8(obs_id)
if header_key and obs_metadata is not None:
md = obs_metadata[self._obs_index[obs_id]]
md_out = metadata_formatter(md.get(header_key, None))
output.append(
- '%s%s%s\t%s' %
+ u'%s%s%s\t%s' %
(obs_id, delim, str_obs_vals, md_out))
else:
- output.append('%s%s%s' % (obs_id, delim, str_obs_vals))
+ output.append(u'%s%s%s' % (obs_id, delim, str_obs_vals))
return '\n'.join(output)
@@ -1261,6 +1520,10 @@ class Table(object):
>>> example_table.data('S1', axis='sample')
array([ 0., 3.])
+ See Also
+ --------
+ Table.get_value_by_ids
+
"""
if axis == 'sample':
data = self[:, self.index(id, 'sample')]
@@ -1391,7 +1654,7 @@ class Table(object):
iter_ = self.iter_data(axis=axis, dense=dense)
- return izip(iter_, ids, metadata)
+ return zip(iter_, ids, metadata)
def iter_pairwise(self, dense=True, axis='sample', tri=True, diag=False):
"""Pairwise iteration over self
@@ -1458,9 +1721,11 @@ class Table(object):
diag_v = 1 - diag # for offseting tri_f, where a 0 includes the diag
if tri:
- tri_f = lambda idx: ind[idx+diag_v:]
+ def tri_f(idx):
+ return ind[idx+diag_v:]
else:
- tri_f = lambda idx: np.hstack([ind[:idx], ind[idx+diag_v:]])
+ def tri_f(idx):
+ return np.hstack([ind[:idx], ind[idx+diag_v:]])
for idx, i in enumerate(ind):
id_i = ids[i]
@@ -1739,7 +2004,7 @@ class Table(object):
table._observation_metadata = metadata
table._index_ids()
- errcheck(table, 'empty')
+ errcheck(table)
return table
@@ -1812,7 +2077,7 @@ class Table(object):
md = self.metadata(axis=self._invert_axis(axis))
- for part, (ids, values, metadata) in partitions.iteritems():
+ for part, (ids, values, metadata) in viewitems(partitions):
if axis == 'sample':
data = self._conv_to_self_type(values, transpose=True)
samp_ids = ids
@@ -1830,7 +2095,7 @@ class Table(object):
yield part, Table(data, obs_ids, samp_ids, obs_md, samp_md,
self.table_id, type=self.type)
- def collapse(self, f, reduce_f=add, norm=True, min_group_size=1,
+ def collapse(self, f, collapse_f=None, norm=True, min_group_size=1,
include_collapsed_metadata=True, one_to_many=False,
one_to_many_mode='add', one_to_many_md_key='Path',
strict=False, axis='sample'):
@@ -1884,7 +2149,7 @@ class Table(object):
`one_to_many` and `norm` are not supported together.
- `one_to_many` and `reduce_f` are not supported together.
+ `one_to_many` and `collapse_f` are not supported together.
`one_to_many` and `min_group_size` are not supported together.
@@ -1896,9 +2161,14 @@ class Table(object):
f : function
Function that is used to determine what partition a vector belongs
to
- reduce_f : function, optional
- Defaults to ``operator.add``. Function that reduces two vectors in
- a one-to-one collapse
+ collapse_f : function, optional
+ Function that collapses a partition in a one-to-one collapse. The
+ expected function signature is:
+
+ dense or sparse_vector <- collapse_f(Table, axis)
+
+ Defaults to a pairwise add.
+
norm : bool, optional
Defaults to ``True``. If ``True``, normalize the resulting table
min_group_size : int, optional
@@ -1975,15 +2245,27 @@ class Table(object):
# transpose is only necessary in the one-to-one case
# new_data_shape is only necessary in the one-to-many case
# axis_slice is only necessary in the one-to-many case
- axis_ids_md = lambda t: (t.ids(axis=axis), t.metadata(axis=axis))
+ def axis_ids_md(t):
+ return (t.ids(axis=axis), t.metadata(axis=axis))
+
if axis == 'sample':
transpose = True
- new_data_shape = lambda ids, collapsed: (len(ids), len(collapsed))
- axis_slice = lambda lookup, key: (slice(None), lookup[key])
+
+ def new_data_shape(ids, collapsed):
+ return (len(ids), len(collapsed))
+
+ def axis_slice(lookup, key):
+ return (slice(None), lookup[key])
+
elif axis == 'observation':
transpose = False
- new_data_shape = lambda ids, collapsed: (len(collapsed), len(ids))
- axis_slice = lambda lookup, key: (lookup[key], slice(None))
+
+ def new_data_shape(ids, collapsed):
+ return (len(collapsed), len(ids))
+
+ def axis_slice(lookup, key):
+ return (lookup[key], slice(None))
+
else:
raise UnknownAxisError(axis)
@@ -1997,12 +2279,12 @@ class Table(object):
new_md = {}
md_count = {}
- for id_, md in izip(*axis_ids_md(self)):
+ for id_, md in zip(*axis_ids_md(self)):
md_iter = f(id_, md)
num_md = 0
while True:
try:
- pathway, partition = md_iter.next()
+ pathway, partition = next(md_iter)
except IndexError:
# if a pathway is incomplete
if strict:
@@ -2027,7 +2309,8 @@ class Table(object):
# evenly.
dtype = float if one_to_many_mode == 'divide' else self.dtype
- new_data = zeros(new_data_shape(axis_ids_md(self)[0], new_md),
+ new_data = zeros(new_data_shape(self.ids(self._invert_axis(axis)),
+ new_md),
dtype=dtype)
# for each vector
@@ -2038,7 +2321,7 @@ class Table(object):
while True:
try:
- pathway, part = md_iter.next()
+ pathway, part = next(md_iter)
except IndexError:
# if a pathway is incomplete
if strict:
@@ -2051,7 +2334,6 @@ class Table(object):
continue
except StopIteration:
break
-
if one_to_many_mode == 'add':
new_data[axis_slice(idx_lookup, part)] += vals
else:
@@ -2060,23 +2342,27 @@ class Table(object):
if include_collapsed_metadata:
# reassociate pathway information
- for k, i in sorted(idx_lookup.iteritems(), key=itemgetter(1)):
+ for k, i in sorted(viewitems(idx_lookup), key=itemgetter(1)):
collapsed_md.append({one_to_many_md_key: new_md[k]})
# get the new sample IDs
- collapsed_ids = [k for k, i in sorted(idx_lookup.iteritems(),
+ collapsed_ids = [k for k, i in sorted(viewitems(idx_lookup),
key=itemgetter(1))]
# convert back to self type
data = self._conv_to_self_type(new_data)
else:
+ if collapse_f is None:
+ def collapse_f(t, axis):
+ return t.reduce(add, axis)
+
for part, table in self.partition(f, axis=axis):
axis_ids, axis_md = axis_ids_md(table)
if len(axis_ids) < min_group_size:
continue
- redux_data = table.reduce(reduce_f, self._invert_axis(axis))
+ redux_data = collapse_f(table, self._invert_axis(axis))
if norm:
redux_data /= len(axis_ids)
@@ -2277,22 +2563,17 @@ class Table(object):
if n < 0:
raise ValueError("n cannot be negative.")
- ids = self.ids(axis=axis)
- data = self._get_sparse_data(axis=axis)
+ table = self.copy()
if by_id:
- ids = ids.copy()
+ ids = table.ids(axis=axis).copy()
np.random.shuffle(ids)
subset = set(ids[:n])
- table = self.filter(lambda v, i, md: i in subset, inplace=False)
+ table.filter(lambda v, i, md: i in subset)
else:
+ data = table._get_sparse_data()
_subsample(data, n)
-
- samp_md = deepcopy(self.metadata())
- obs_md = deepcopy(self.metadata(axis='observation'))
-
- table = Table(data, self.ids(axis='observation').copy(),
- self.ids().copy(), obs_md, samp_md)
+ table._data = data
table.filter(lambda v, i, md: v.sum() > 0, axis=axis)
@@ -2521,11 +2802,19 @@ class Table(object):
generator
Yields ``(observation_id, sample_id)`` for each nonzero element
"""
- # this is naively implemented. If performance is a concern, private
- # methods can be written to hit against the underlying types directly
- for o_idx, samp_vals in enumerate(self.iter_data(axis="observation")):
- for s_idx in samp_vals.nonzero()[0]:
- yield (self.ids(axis='observation')[o_idx], self.ids()[s_idx])
+ csr = self._data.tocsr()
+ samp_ids = self.ids()
+ obs_ids = self.ids(axis='observation')
+
+ indptr = csr.indptr
+ indices = csr.indices
+ for row_idx in range(indptr.size - 1):
+ start = indptr[row_idx]
+ end = indptr[row_idx+1]
+
+ obs_id = obs_ids[row_idx]
+ for col_idx in indices[start:end]:
+ yield (obs_id, samp_ids[col_idx])
def nonzero_counts(self, axis, binary=False):
"""Get nonzero summaries about an axis
@@ -2545,10 +2834,14 @@ class Table(object):
"""
if binary:
dtype = 'int'
- op = lambda x: x.nonzero()[0].size
+
+ def op(x):
+ return x.nonzero()[0].size
else:
dtype = self.dtype
- op = lambda x: x.sum()
+
+ def op(x):
+ return x.sum()
if axis in ('sample', 'observation'):
# can use np.bincount for CSMat or ScipySparse
@@ -2817,7 +3110,7 @@ class Table(object):
sample_ids[:], obs_md, sample_md)
@classmethod
- def from_hdf5(cls, h5grp, ids=None, axis='sample'):
+ def from_hdf5(cls, h5grp, ids=None, axis='sample', parse_fs=None):
"""Parse an HDF5 formatted BIOM table
If ids is provided, only the samples/observations listed in ids
@@ -2908,6 +3201,12 @@ dataset of int32
to retrieve from the hdf5 biom table
axis : {'sample', 'observation'}, optional
The axis to subset on
+ parse_fs : dict, optional
+ Specify custom parsing functions for metadata fields. This dict is
+ expected to be {'metadata_field': function}, where the function
+ signature is (object) corresponding to a single row in the
+ associated metadata dataset. The return from this function an
+ object as well, and is the parsed representation of the metadata.
Returns
-------
@@ -2957,6 +3256,9 @@ html
if axis not in ['sample', 'observation']:
raise UnknownAxisError(axis)
+ if parse_fs is None:
+ parse_fs = {}
+
id_ = h5grp.attrs['id']
create_date = h5grp.attrs['creation-date']
generated_by = h5grp.attrs['generated-by']
@@ -2969,25 +3271,26 @@ html
# fetch all of the IDs
ids = grp['ids'][:]
- # define functions for parsing the hdf5 metadata
- general_parser = lambda x: x
-
- def vlen_list_of_str_parser(value):
- """Parses the taxonomy value"""
- # Remove the empty string values and return the results as list
- new_value = value[np.where(
- value == np.array(""), False, True)].tolist()
- return new_value if new_value else None
+ if ids.size > 0 and isinstance(ids[0], bytes):
+ ids = np.array([i.decode('utf8') for i in ids])
parser = defaultdict(lambda: general_parser)
parser['taxonomy'] = vlen_list_of_str_parser
parser['KEGG_Pathways'] = vlen_list_of_str_parser
parser['collapsed_ids'] = vlen_list_of_str_parser
- # fetch all of the metadata
- md = []
- for i in range(len(ids)):
- md.append({cat: parser[cat](vals[i])
- for cat, vals in grp['metadata'].items()})
+ parser.update(parse_fs)
+
+ # fetch ID specific metadata
+ md = [{} for i in range(len(ids))]
+ for category, dset in viewitems(grp['metadata']):
+ parse_f = parser[category]
+ data = dset[:]
+ for md_dict, data_row in zip(md, data):
+ md_dict[category] = parse_f(data_row)
+
+ # If there was no metadata on the axis, set it up as none
+ md = md if any(md) else None
+
# Fetch the group metadata
grp_md = {cat: val
for cat, val in grp['group-metadata'].items()}
@@ -3019,6 +3322,7 @@ html
# Retrieve only the ids that we are interested on
ids = source_ids[idx]
# Check that all desired ids have been found on source ids
+
if ids.shape != desired_ids.shape:
raise ValueError("The following ids could not be "
"found in the biom table: %s" %
@@ -3079,13 +3383,18 @@ html
observation_group_metadata=obs_grp_md,
sample_group_metadata=samp_grp_md)
- f = lambda vals, id_, md: np.any(vals)
- axis = 'observation' if axis == 'sample' else 'sample'
- t.filter(f, axis=axis)
+ if ids is not None:
+ # filter out any empty samples or observations which may exist due
+ # to subsetting
+ def any_value(vals, id_, md):
+ return np.any(vals)
+
+ axis = 'observation' if axis == 'sample' else 'sample'
+ t.filter(any_value, axis=axis)
return t
- def to_hdf5(self, h5grp, generated_by, compress=True):
+ def to_hdf5(self, h5grp, generated_by, compress=True, format_fs=None):
"""Store CSC and CSR in place
The resulting structure of this group is below. A few basic
@@ -3174,6 +3483,18 @@ dataset of int32
compress : bool, optional
Defaults to ``True`` means fields will be compressed with gzip,
``False`` means no compression
+ format_fs : dict, optional
+ Specify custom formatting functions for metadata fields. This dict
+ is expected to be {'metadata_field': function}, where the function
+ signature is (h5py.Group, str, dict, bool) corresponding to the
+ specific HDF5 group the metadata dataset will be associated with,
+ the category being operated on, the metadata for the entire axis
+ being operated on, and whether to enable compression on the
+ dataset. Anything returned by this function is ignored.
+
+ Notes
+ -----
+ This method does not return anything and operates in place on h5grp.
See Also
--------
@@ -3185,7 +3506,7 @@ dataset of int32
py.sparse.csr_matrix.html
.. [2] http://docs.scipy.org/doc/scipy-0.13.0/reference/generated/sci\
py.sparse.csc_matrix.html
- .. [3] http://biom-format.org/documentation/format_versions/biom-2.0.\
+ .. [3] http://biom-format.org/documentation/format_versions/biom-2.1.\
html
Examples
@@ -3202,6 +3523,9 @@ html
raise RuntimeError("h5py is not in the environment, HDF5 support "
"is not available")
+ if format_fs is None:
+ format_fs = {}
+
def axis_dump(grp, ids, md, group_md, order, compression=None):
"""Store for an axis"""
self._data = self._data.asformat(order)
@@ -3230,7 +3554,7 @@ html
# is cleaner, as is the parse
grp.create_dataset('ids', shape=(len_ids,),
dtype=H5PY_VLEN_STR,
- data=[str(i) for i in ids],
+ data=[i.encode('utf8') for i in ids],
compression=compression)
else:
# Empty H5PY_VLEN_STR datasets are not supported.
@@ -3241,68 +3565,11 @@ html
grp.create_group('metadata')
if md is not None:
- # Define functions for writing to hdf5
- def general_formatter(grp, header, md, compression):
- """Creates a dataset for a general atomic type category"""
- test_val = md[0][header]
- shape = (len(md),)
- name = 'metadata/%s' % category
- if isinstance(test_val, unicode):
- grp.create_dataset(name, shape=shape,
- dtype=H5PY_VLEN_UNICODE,
- compression=compression)
- grp[name][:] = [m[header] for m in md]
- elif isinstance(test_val, str):
- grp.create_dataset(name, shape=shape,
- dtype=H5PY_VLEN_STR,
- data=[m[header] for m in md],
- compression=compression)
- else:
- grp.create_dataset(
- 'metadata/%s' % category, shape=(len(md),),
- data=[m[header] for m in md],
- compression=compression)
-
- def vlen_list_of_str_formatter(grp, header, md, compression):
- """Creates a (N, ?) vlen str dataset"""
- # It is possible that the value for some sample/observation
- # is None. In that case, we still need to see them as
- # iterables, but their length will be 0
- iterable_checks = []
- lengths = []
- for m in md:
- if m[header] is None:
- iterable_checks.append(True)
- else:
- iterable_checks.append(
- isinstance(m.get(header, []), Iterable))
- lengths.append(len(m[header]))
-
- if not np.all(iterable_checks):
- raise TypeError(
- "Category %s not formatted correctly. Did you pass"
- " --process-obs-metadata taxonomy when converting "
- " from tsv?")
-
- max_list_len = max(lengths)
- shape = (len(md), max_list_len)
- data = np.empty(shape, dtype=object)
- for i, m in enumerate(md):
- if m[header] is None:
- continue
- value = np.asarray(m[header])
- data[i, :len(value)] = value
- # Change the None entries on data to empty strings ""
- data = np.where(data == np.array(None), "", data)
- grp.create_dataset(
- 'metadata/%s' % header, shape=shape,
- dtype=H5PY_VLEN_STR, data=data,
- compression=compression)
-
formatter = defaultdict(lambda: general_formatter)
formatter['taxonomy'] = vlen_list_of_str_formatter
formatter['KEGG_Pathways'] = vlen_list_of_str_formatter
formatter['collapsed_ids'] = vlen_list_of_str_formatter
+ formatter.update(format_fs)
# Loop through all the categories
for category in md[0]:
# Create the dataset for the current category,
@@ -3444,29 +3711,28 @@ html
str
A JSON-formatted string representing the biom table
"""
- if (not isinstance(generated_by, str) and
- not isinstance(generated_by, unicode)):
+ if not isinstance(generated_by, string_types):
raise TableException("Must specify a generated_by string")
# Fill in top-level metadata.
if direct_io:
- direct_io.write('{')
- direct_io.write('"id": "%s",' % str(self.table_id))
+ direct_io.write(u'{')
+ direct_io.write(u'"id": "%s",' % str(self.table_id))
direct_io.write(
- '"format": "%s",' %
- get_biom_format_version_string(self.format_version))
+ u'"format": "%s",' %
+ get_biom_format_version_string((1, 0))) # JSON table -> 1.0.0
direct_io.write(
- '"format_url": "%s",' %
+ u'"format_url": "%s",' %
get_biom_format_url_string())
- direct_io.write('"generated_by": "%s",' % generated_by)
- direct_io.write('"date": "%s",' % datetime.now().isoformat())
+ direct_io.write(u'"generated_by": "%s",' % generated_by)
+ direct_io.write(u'"date": "%s",' % datetime.now().isoformat())
else:
- id_ = '"id": "%s",' % str(self.table_id)
- format_ = '"format": "%s",' % get_biom_format_version_string(
- self.format_version)
- format_url = '"format_url": "%s",' % get_biom_format_url_string()
- generated_by = '"generated_by": "%s",' % generated_by
- date = '"date": "%s",' % datetime.now().isoformat()
+ id_ = u'"id": "%s",' % str(self.table_id)
+ format_ = u'"format": "%s",' % get_biom_format_version_string(
+ (1, 0)) # JSON table -> 1.0.0
+ format_url = u'"format_url": "%s",' % get_biom_format_url_string()
+ generated_by = u'"generated_by": "%s",' % generated_by
+ date = u'"date": "%s",' % datetime.now().isoformat()
# Determine if we have any data in the matrix, and what the shape of
# the matrix is.
@@ -3484,30 +3750,30 @@ html
# Determine the type of elements the matrix is storing.
if isinstance(test_element, int):
- matrix_element_type = "int"
+ matrix_element_type = u"int"
elif isinstance(test_element, float):
- matrix_element_type = "float"
- elif isinstance(test_element, unicode):
- matrix_element_type = "unicode"
+ matrix_element_type = u"float"
+ elif isinstance(test_element, string_types):
+ matrix_element_type = u"str"
else:
raise TableException("Unsupported matrix data type.")
# Fill in details about the matrix.
if direct_io:
direct_io.write(
- '"matrix_element_type": "%s",' %
+ u'"matrix_element_type": "%s",' %
matrix_element_type)
- direct_io.write('"shape": [%d, %d],' % (num_rows, num_cols))
+ direct_io.write(u'"shape": [%d, %d],' % (num_rows, num_cols))
else:
- matrix_element_type = '"matrix_element_type": "%s",' % \
+ matrix_element_type = u'"matrix_element_type": "%s",' % \
matrix_element_type
- shape = '"shape": [%d, %d],' % (num_rows, num_cols)
+ shape = u'"shape": [%d, %d],' % (num_rows, num_cols)
# Fill in the table type
if self.type is None:
- type_ = '"type": null,'
+ type_ = u'"type": null,'
else:
- type_ = '"type": "%s",' % self.type
+ type_ = u'"type": "%s",' % self.type
if direct_io:
direct_io.write(type_)
@@ -3515,24 +3781,24 @@ html
# Fill in details about the rows in the table and fill in the matrix's
# data. BIOM 2.0+ is now only sparse
if direct_io:
- direct_io.write('"matrix_type": "sparse",')
- direct_io.write('"data": [')
+ direct_io.write(u'"matrix_type": "sparse",')
+ direct_io.write(u'"data": [')
else:
- matrix_type = '"matrix_type": "sparse",'
- data = ['"data": [']
+ matrix_type = u'"matrix_type": "sparse",'
+ data = [u'"data": [']
max_row_idx = len(self.ids(axis='observation')) - 1
max_col_idx = len(self.ids()) - 1
- rows = ['"rows": [']
+ rows = [u'"rows": [']
have_written = False
for obs_index, obs in enumerate(self.iter(axis='observation')):
# i'm crying on the inside
if obs_index != max_row_idx:
- rows.append('{"id": %s, "metadata": %s},' % (dumps(obs[1]),
- dumps(obs[2])))
- else:
- rows.append('{"id": %s, "metadata": %s}],' % (dumps(obs[1]),
+ rows.append(u'{"id": %s, "metadata": %s},' % (dumps(obs[1]),
dumps(obs[2])))
+ else:
+ rows.append(u'{"id": %s, "metadata": %s}],' % (dumps(obs[1]),
+ dumps(obs[2])))
# turns out its a pain to figure out when to place commas. the
# simple work around, at the expense of a little memory
@@ -3541,50 +3807,55 @@ html
built_row = []
for col_index, val in enumerate(obs[0]):
if float(val) != 0.0:
- built_row.append("[%d,%d,%r]" % (obs_index, col_index,
- val))
+ built_row.append(u"[%d,%d,%r]" % (obs_index, col_index,
+ val))
if built_row:
# if we have written a row already, its safe to add a comma
if have_written:
if direct_io:
- direct_io.write(',')
+ direct_io.write(u',')
else:
- data.append(',')
+ data.append(u',')
if direct_io:
- direct_io.write(','.join(built_row))
+ direct_io.write(u','.join(built_row))
else:
- data.append(','.join(built_row))
+ data.append(u','.join(built_row))
have_written = True
# finalize the data block
if direct_io:
- direct_io.write("],")
+ direct_io.write(u"],")
else:
- data.append("],")
+ data.append(u"],")
# Fill in details about the columns in the table.
- columns = ['"columns": [']
+ columns = [u'"columns": [']
for samp_index, samp in enumerate(self.iter()):
if samp_index != max_col_idx:
- columns.append('{"id": %s, "metadata": %s},' % (
+ columns.append(u'{"id": %s, "metadata": %s},' % (
dumps(samp[1]), dumps(samp[2])))
else:
- columns.append('{"id": %s, "metadata": %s}]' % (
+ columns.append(u'{"id": %s, "metadata": %s}]' % (
dumps(samp[1]), dumps(samp[2])))
- rows = ''.join(rows)
- columns = ''.join(columns)
+ if rows[0] == u'"rows": [' and len(rows) == 1:
+ # empty table case
+ rows = [u'"rows": [],']
+ columns = [u'"columns": []']
+
+ rows = u''.join(rows)
+ columns = u''.join(columns)
if direct_io:
direct_io.write(rows)
direct_io.write(columns)
- direct_io.write('}')
+ direct_io.write(u'}')
else:
- return "{%s}" % ''.join([id_, format_, format_url, matrix_type,
- generated_by, date, type_,
- matrix_element_type, shape,
- ''.join(data), rows, columns])
+ return u"{%s}" % ''.join([id_, format_, format_url, matrix_type,
+ generated_by, date, type_,
+ matrix_element_type, shape,
+ u''.join(data), rows, columns])
@staticmethod
def from_tsv(lines, obs_mapping, sample_mapping,
@@ -3640,8 +3911,7 @@ html
return Table(data, obs_ids, sample_ids, obs_metadata, sample_metadata)
@staticmethod
- def _extract_data_from_tsv(lines, delim='\t', dtype=float,
- header_mark=None, md_parse=None):
+ def _extract_data_from_tsv(lines, delim='\t', dtype=float, md_parse=None):
"""Parse a classic table into (sample_ids, obs_ids, data, metadata,
name)
@@ -3652,8 +3922,6 @@ html
delim: string
delimeter in file lines
dtype: type
- header_mark: string or None
- string that indicates start of header line
md_parse: function or None
funtion used to parse metdata
@@ -3684,34 +3952,40 @@ html
"""
if not isinstance(lines, list):
try:
- lines = lines.readlines()
+ hasattr(lines, 'seek')
except AttributeError:
raise RuntimeError(
- "Input needs to support readlines or be indexable")
+ "Input needs to support seek or be indexable")
# find header, the first line that is not empty and does not start
# with a #
- for idx, l in enumerate(lines):
- if not l.strip():
+ header = False
+ list_index = 0
+ for line in lines:
+ if not line.strip():
continue
- if not l.startswith('#'):
- break
- if header_mark and l.startswith(header_mark):
+ if not line.startswith('#'):
+ # Covers the case where the first line is the header
+ # and there is no indication of it (no comment character)
+ if not header:
+ header = line.strip().split(delim)[1:]
+ data_start = list_index + 1
+ else:
+ data_start = list_index
break
-
- if idx == 0:
- data_start = 1
- header = lines[0].strip().split(delim)[1:]
+ list_index += 1
+ header = line.strip().split(delim)[1:]
+ # If the first line is the header, then we need to get the next
+ # line for the "last column" check
+ if isinstance(lines, list):
+ line = lines[data_start]
else:
- if header_mark is not None:
- data_start = idx + 1
- header = lines[idx].strip().split(delim)[1:]
- else:
- data_start = idx
- header = lines[idx - 1].strip().split(delim)[1:]
+ lines.seek(0)
+ for index in range(0, data_start + 1):
+ line = lines.readline()
# attempt to determine if the last column is non-numeric, ie, metadata
- first_values = lines[data_start].strip().split(delim)
+ first_values = line.strip().split(delim)
last_value = first_values[-1]
last_column_is_numeric = True
@@ -3738,7 +4012,17 @@ html
data = []
obs_ids = []
- for line in lines[data_start:]:
+ row_number = 0
+
+ # Go back to the beginning if it is a file:
+ if hasattr(lines, 'seek'):
+ lines.seek(0)
+ for index in range(0, data_start):
+ line = lines.readline()
+ else:
+ lines = lines[data_start:]
+
+ for line in lines:
line = line.strip()
if not line:
continue
@@ -3749,18 +4033,20 @@ html
obs_ids.append(fields[0])
if last_column_is_numeric:
- values = map(dtype, fields[1:])
+ values = list(map(dtype, fields[1:]))
else:
- values = map(dtype, fields[1:-1])
+ values = list(map(dtype, fields[1:-1]))
if md_parse is not None:
metadata.append(md_parse(fields[-1]))
else:
metadata.append(fields[-1])
-
- data.append(values)
-
- return samp_ids, obs_ids, asarray(data), metadata, md_name
+ for column_number in range(0, len(values)):
+ if values[column_number] != dtype(0):
+ data.append([row_number, column_number,
+ values[column_number]])
+ row_number += 1
+ return samp_ids, obs_ids, data, metadata, md_name
def to_tsv(self, header_key=None, header_value=None,
metadata_formatter=str, observation_column_name='#OTU ID'):
@@ -3805,7 +4091,7 @@ html
O1 0.0 0.0 1.0
O2 1.0 3.0 42.0
"""
- return self.delimited_self('\t', header_key, header_value,
+ return self.delimited_self(u'\t', header_key, header_value,
metadata_formatter,
observation_column_name)
@@ -3861,7 +4147,7 @@ def list_list_to_sparse(data, dtype=float, shape=None):
scipy.csr_matrix
The newly generated matrix
"""
- rows, cols, values = izip(*data)
+ rows, cols, values = zip(*data)
if shape is None:
n_rows = max(rows) + 1
@@ -4055,7 +4341,7 @@ def dict_to_sparse(data, dtype=float, shape=None):
rows = []
cols = []
vals = []
- for (r, c), v in data.iteritems():
+ for (r, c), v in viewitems(data):
rows.append(r)
cols.append(c)
vals.append(v)
diff --git a/biom/util.py b/biom/util.py
index 671e597..7ecf6df 100644
--- a/biom/util.py
+++ b/biom/util.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python
-
+# -*- coding: utf-8 -*-
# ----------------------------------------------------------------------------
# Copyright (c) 2011-2013, The BIOM Format Development Team.
#
@@ -9,6 +9,7 @@
# ----------------------------------------------------------------------------
import os
+import sys
import inspect
from contextlib import contextmanager
@@ -18,16 +19,19 @@ from os.path import abspath, dirname, exists
import re
from hashlib import md5
from gzip import open as gzip_open
-from warnings import warn
try:
import h5py
HAVE_H5PY = True
- H5PY_VLEN_STR = h5py.special_dtype(vlen=str)
- H5PY_VLEN_UNICODE = h5py.special_dtype(vlen=unicode)
+
+ if sys.version_info.major == 2:
+ H5PY_VLEN_STR = h5py.special_dtype(vlen=unicode) # noqa
+ H5PY_VLEN_UNICODE = h5py.special_dtype(vlen=unicode) # noqa
+ else:
+ H5PY_VLEN_STR = h5py.special_dtype(vlen=str)
+ H5PY_VLEN_UNICODE = h5py.special_dtype(vlen=str)
except ImportError:
- warn("h5py is not available")
HAVE_H5PY = False
H5PY_VLEN_STR = None
H5PY_VLEN_UNICODE = None
@@ -37,11 +41,13 @@ from numpy import mean, median, min, max
__author__ = "Daniel McDonald"
__copyright__ = "Copyright 2011-2013, The BIOM Format Development Team"
__credits__ = ["Daniel McDonald", "Jai Ram Rideout", "Greg Caporaso",
- "Jose Clemente", "Justin Kuczynski"]
+ "Jose Clemente", "Justin Kuczynski", "Jorge Cañardo Alastuey"]
__license__ = "BSD"
__url__ = "http://biom-format.org"
__maintainer__ = "Daniel McDonald"
__email__ = "daniel.mcdonald at colorado.edu"
+__format_version__ = (2, 1)
+__version__ = "2.1.5"
def generate_subsamples(table, n, axis='sample', by_id=False):
@@ -126,7 +132,7 @@ def unzip(items):
BSD license).
"""
if items:
- return map(list, zip(*items))
+ return list(map(list, zip(*list(items))))
else:
return []
@@ -321,13 +327,16 @@ def compute_counts_per_sample_stats(table, binary_counts=False):
sample_counts[sample_id] = (count_vector != 0).sum()
else:
sample_counts[sample_id] = float(count_vector.sum())
- counts = sample_counts.values()
+ counts = list(sample_counts.values())
- return (min(counts),
- max(counts),
- median(counts),
- mean(counts),
- sample_counts)
+ if len(counts) == 0:
+ return (0, 0, 0, 0, sample_counts)
+ else:
+ return (min(counts),
+ max(counts),
+ median(counts),
+ mean(counts),
+ sample_counts)
def safe_md5(open_file, block_size=2 ** 20):
@@ -346,7 +355,7 @@ def safe_md5(open_file, block_size=2 ** 20):
# While a little hackish, this allows this code to
# safely work either with a file object or a list of lines.
- if isinstance(open_file, file):
+ if hasattr(open_file, 'read'):
data_getter = open_file.read
data_getter_i = block_size
elif isinstance(open_file, list):
@@ -364,7 +373,7 @@ def safe_md5(open_file, block_size=2 ** 20):
while data:
data = data_getter(data_getter_i)
if data:
- result.update(data)
+ result.update(data.encode('utf-8'))
return result.hexdigest()
@@ -378,7 +387,7 @@ def is_gzip(fp):
project, but we obtained permission from the authors of this function to
port it to the BIOM Format project (and keep it under BIOM's BSD license).
"""
- return open(fp, 'rb').read(2) == '\x1f\x8b'
+ return open(fp, 'rb').read(2) == b'\x1f\x8b'
@contextmanager
@@ -410,6 +419,14 @@ def biom_open(fp, permission='U'):
qiime_open. QIIME is a GPL project, but we obtained permission from the
authors of this function to port it to the BIOM Format project (and keep it
under BIOM's BSD license).
+
+ Raises
+ ------
+ RuntimeError
+ If the user tries to parse an HDF5 file without having h5py installed.
+ ValueError
+ If the user tries to read an empty file.
+
"""
if permission not in ['r', 'w', 'U', 'rb', 'wb']:
raise IOError("Unknown mode: %s" % permission)
@@ -417,6 +434,16 @@ def biom_open(fp, permission='U'):
opener = open
mode = permission
+ # don't try to open an HDF5 file if H5PY is not installed, this can only
+ # happen if we are reading a file
+ if mode in {'r', 'rb', 'U'}:
+ if os.path.getsize(fp) == 0:
+ raise ValueError("The file '%s' is empty and can't be parsed" % fp)
+
+ if is_hdf5_file(fp) and not HAVE_H5PY:
+ raise RuntimeError("h5py is not installed, cannot parse HDF5 "
+ "BIOM file")
+
if HAVE_H5PY:
if mode in ['U', 'r', 'rb'] and h5py.is_hdf5(fp):
opener = h5py.File
@@ -468,3 +495,21 @@ def get_data_path(fn):
path = os.path.dirname(os.path.abspath(callers_filename))
data_path = os.path.join(path, 'test_data', fn)
return data_path
+
+
+def is_hdf5_file(fp):
+ """Guess if file is HDF5.
+
+ Parameters
+ ----------
+ fn : str
+ File name
+
+ Returns
+ -------
+ bool
+ Whether the file is an HDF5 file
+ """
+ with open(fp, 'rb') as f:
+ # from the HDF5 documentation about format signature
+ return f.read(8) == b'\x89HDF\r\n\x1a\n'
diff --git a/doc/conf.py b/doc/conf.py
index 43cf358..b8f3658 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -59,15 +59,15 @@ master_doc = 'index'
# General information about the project.
project = u'biom-format'
-copyright = u'2011-2013, The BIOM Format Development Team'
+copyright = u'2011-2015, The BIOM Format Development Team'
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The full version, including alpha/beta/rc tags.
-version = "2.1"
-release = "2.1"
+version = "2.1.5"
+release = "2.1.5"
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
diff --git a/doc/documentation/biom_conversion.rst b/doc/documentation/biom_conversion.rst
index b77bd48..c305b07 100644
--- a/doc/documentation/biom_conversion.rst
+++ b/doc/documentation/biom_conversion.rst
@@ -56,6 +56,6 @@ If you are converting a QIIME 1.4.0 or earlier OTU table to BIOM format, there a
Then, you'll want to perform the conversion including a step to convert the taxonomy `string` from the classic OTU table to a taxonomy `list`, as it's represented in QIIME 1.4.0-dev and later::
- biom convert -i otu_table.taxonomy.txt -o otu_table.from_txt.biom --table-type="OTU table" --process-obs-metadata taxonomy
+ biom convert -i otu_table.taxonomy.txt -o otu_table.from_txt.biom --table-type="OTU table" --process-obs-metadata taxonomy --to-hdf5
diff --git a/doc/documentation/generated/biom.load_table.rst b/doc/documentation/generated/biom.load_table.rst
deleted file mode 100644
index ea700bd..0000000
--- a/doc/documentation/generated/biom.load_table.rst
+++ /dev/null
@@ -1,6 +0,0 @@
-biom.load_table
-===============
-
-.. currentmodule:: biom
-
-.. autofunction:: load_table
\ No newline at end of file
diff --git a/doc/documentation/generated/biom.table.Table.__eq__.rst b/doc/documentation/generated/biom.table.Table.__eq__.rst
deleted file mode 100644
index a6073b0..0000000
--- a/doc/documentation/generated/biom.table.Table.__eq__.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table.__eq__
-=======================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table.__eq__
diff --git a/doc/documentation/generated/biom.table.Table.__format__.rst b/doc/documentation/generated/biom.table.Table.__format__.rst
deleted file mode 100644
index c7a1cd1..0000000
--- a/doc/documentation/generated/biom.table.Table.__format__.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table.__format__
-===========================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table.__format__
diff --git a/doc/documentation/generated/biom.table.Table.__getitem__.rst b/doc/documentation/generated/biom.table.Table.__getitem__.rst
deleted file mode 100644
index c7f2eb4..0000000
--- a/doc/documentation/generated/biom.table.Table.__getitem__.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table.__getitem__
-============================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table.__getitem__
diff --git a/doc/documentation/generated/biom.table.Table.__init__.rst b/doc/documentation/generated/biom.table.Table.__init__.rst
deleted file mode 100644
index 67a8ddd..0000000
--- a/doc/documentation/generated/biom.table.Table.__init__.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table.__init__
-=========================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table.__init__
diff --git a/doc/documentation/generated/biom.table.Table.__iter__.rst b/doc/documentation/generated/biom.table.Table.__iter__.rst
deleted file mode 100644
index b8e7b9f..0000000
--- a/doc/documentation/generated/biom.table.Table.__iter__.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table.__iter__
-=========================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table.__iter__
diff --git a/doc/documentation/generated/biom.table.Table.__ne__.rst b/doc/documentation/generated/biom.table.Table.__ne__.rst
deleted file mode 100644
index 9b7094c..0000000
--- a/doc/documentation/generated/biom.table.Table.__ne__.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table.__ne__
-=======================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table.__ne__
diff --git a/doc/documentation/generated/biom.table.Table.__new__.rst b/doc/documentation/generated/biom.table.Table.__new__.rst
deleted file mode 100644
index d0fc904..0000000
--- a/doc/documentation/generated/biom.table.Table.__new__.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table.__new__
-========================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table.__new__
diff --git a/doc/documentation/generated/biom.table.Table.__reduce__.rst b/doc/documentation/generated/biom.table.Table.__reduce__.rst
deleted file mode 100644
index 1fdf4c3..0000000
--- a/doc/documentation/generated/biom.table.Table.__reduce__.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table.__reduce__
-===========================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table.__reduce__
diff --git a/doc/documentation/generated/biom.table.Table.__reduce_ex__.rst b/doc/documentation/generated/biom.table.Table.__reduce_ex__.rst
deleted file mode 100644
index d30067c..0000000
--- a/doc/documentation/generated/biom.table.Table.__reduce_ex__.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table.__reduce_ex__
-==============================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table.__reduce_ex__
diff --git a/doc/documentation/generated/biom.table.Table.__repr__.rst b/doc/documentation/generated/biom.table.Table.__repr__.rst
deleted file mode 100644
index f73f5be..0000000
--- a/doc/documentation/generated/biom.table.Table.__repr__.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table.__repr__
-=========================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table.__repr__
diff --git a/doc/documentation/generated/biom.table.Table.__sizeof__.rst b/doc/documentation/generated/biom.table.Table.__sizeof__.rst
deleted file mode 100644
index e18e62f..0000000
--- a/doc/documentation/generated/biom.table.Table.__sizeof__.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table.__sizeof__
-===========================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table.__sizeof__
diff --git a/doc/documentation/generated/biom.table.Table.__str__.rst b/doc/documentation/generated/biom.table.Table.__str__.rst
deleted file mode 100644
index 54452f5..0000000
--- a/doc/documentation/generated/biom.table.Table.__str__.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table.__str__
-========================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table.__str__
diff --git a/doc/documentation/generated/biom.table.Table.__subclasshook__.rst b/doc/documentation/generated/biom.table.Table.__subclasshook__.rst
deleted file mode 100644
index 5332175..0000000
--- a/doc/documentation/generated/biom.table.Table.__subclasshook__.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table.__subclasshook__
-=================================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table.__subclasshook__
diff --git a/doc/documentation/generated/biom.table.Table._axis_to_num.rst b/doc/documentation/generated/biom.table.Table._axis_to_num.rst
deleted file mode 100644
index 84ce032..0000000
--- a/doc/documentation/generated/biom.table.Table._axis_to_num.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table._axis_to_num
-=============================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table._axis_to_num
diff --git a/doc/documentation/generated/biom.table.Table._cast_metadata.rst b/doc/documentation/generated/biom.table.Table._cast_metadata.rst
deleted file mode 100644
index 475365d..0000000
--- a/doc/documentation/generated/biom.table.Table._cast_metadata.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table._cast_metadata
-===============================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table._cast_metadata
diff --git a/doc/documentation/generated/biom.table.Table._conv_to_self_type.rst b/doc/documentation/generated/biom.table.Table._conv_to_self_type.rst
deleted file mode 100644
index 0182077..0000000
--- a/doc/documentation/generated/biom.table.Table._conv_to_self_type.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table._conv_to_self_type
-===================================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table._conv_to_self_type
diff --git a/doc/documentation/generated/biom.table.Table._data_equality.rst b/doc/documentation/generated/biom.table.Table._data_equality.rst
deleted file mode 100644
index 16d958f..0000000
--- a/doc/documentation/generated/biom.table.Table._data_equality.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table._data_equality
-===============================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table._data_equality
diff --git a/doc/documentation/generated/biom.table.Table._extract_data_from_tsv.rst b/doc/documentation/generated/biom.table.Table._extract_data_from_tsv.rst
deleted file mode 100644
index 067b426..0000000
--- a/doc/documentation/generated/biom.table.Table._extract_data_from_tsv.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table._extract_data_from_tsv
-=======================================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table._extract_data_from_tsv
diff --git a/doc/documentation/generated/biom.table.Table._get_col.rst b/doc/documentation/generated/biom.table.Table._get_col.rst
deleted file mode 100644
index 97dfd0a..0000000
--- a/doc/documentation/generated/biom.table.Table._get_col.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table._get_col
-=========================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table._get_col
diff --git a/doc/documentation/generated/biom.table.Table._get_row.rst b/doc/documentation/generated/biom.table.Table._get_row.rst
deleted file mode 100644
index 6000fd4..0000000
--- a/doc/documentation/generated/biom.table.Table._get_row.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table._get_row
-=========================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table._get_row
diff --git a/doc/documentation/generated/biom.table.Table._get_sparse_data.rst b/doc/documentation/generated/biom.table.Table._get_sparse_data.rst
deleted file mode 100644
index 2275aca..0000000
--- a/doc/documentation/generated/biom.table.Table._get_sparse_data.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table._get_sparse_data
-=================================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table._get_sparse_data
diff --git a/doc/documentation/generated/biom.table.Table._index.rst b/doc/documentation/generated/biom.table.Table._index.rst
deleted file mode 100644
index a784764..0000000
--- a/doc/documentation/generated/biom.table.Table._index.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table._index
-=======================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table._index
diff --git a/doc/documentation/generated/biom.table.Table._index_ids.rst b/doc/documentation/generated/biom.table.Table._index_ids.rst
deleted file mode 100644
index b4e6967..0000000
--- a/doc/documentation/generated/biom.table.Table._index_ids.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table._index_ids
-===========================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table._index_ids
diff --git a/doc/documentation/generated/biom.table.Table._intersect_id_order.rst b/doc/documentation/generated/biom.table.Table._intersect_id_order.rst
deleted file mode 100644
index 661077c..0000000
--- a/doc/documentation/generated/biom.table.Table._intersect_id_order.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table._intersect_id_order
-====================================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table._intersect_id_order
diff --git a/doc/documentation/generated/biom.table.Table._invert_axis.rst b/doc/documentation/generated/biom.table.Table._invert_axis.rst
deleted file mode 100644
index 6dcb249..0000000
--- a/doc/documentation/generated/biom.table.Table._invert_axis.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table._invert_axis
-=============================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table._invert_axis
diff --git a/doc/documentation/generated/biom.table.Table._iter_obs.rst b/doc/documentation/generated/biom.table.Table._iter_obs.rst
deleted file mode 100644
index 7a2df12..0000000
--- a/doc/documentation/generated/biom.table.Table._iter_obs.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table._iter_obs
-==========================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table._iter_obs
diff --git a/doc/documentation/generated/biom.table.Table._iter_samp.rst b/doc/documentation/generated/biom.table.Table._iter_samp.rst
deleted file mode 100644
index 70f4b58..0000000
--- a/doc/documentation/generated/biom.table.Table._iter_samp.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table._iter_samp
-===========================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table._iter_samp
diff --git a/doc/documentation/generated/biom.table.Table._to_dense.rst b/doc/documentation/generated/biom.table.Table._to_dense.rst
deleted file mode 100644
index 4ea8596..0000000
--- a/doc/documentation/generated/biom.table.Table._to_dense.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table._to_dense
-==========================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table._to_dense
diff --git a/doc/documentation/generated/biom.table.Table._to_sparse.rst b/doc/documentation/generated/biom.table.Table._to_sparse.rst
deleted file mode 100644
index 340d814..0000000
--- a/doc/documentation/generated/biom.table.Table._to_sparse.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table._to_sparse
-===========================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table._to_sparse
diff --git a/doc/documentation/generated/biom.table.Table._union_id_order.rst b/doc/documentation/generated/biom.table.Table._union_id_order.rst
deleted file mode 100644
index 0f07a39..0000000
--- a/doc/documentation/generated/biom.table.Table._union_id_order.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table._union_id_order
-================================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table._union_id_order
diff --git a/doc/documentation/generated/biom.table.Table._verify_metadata.rst b/doc/documentation/generated/biom.table.Table._verify_metadata.rst
deleted file mode 100644
index b53bb56..0000000
--- a/doc/documentation/generated/biom.table.Table._verify_metadata.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table._verify_metadata
-=================================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table._verify_metadata
diff --git a/doc/documentation/generated/biom.table.Table.add_group_metadata.rst b/doc/documentation/generated/biom.table.Table.add_group_metadata.rst
deleted file mode 100644
index 1ce92da..0000000
--- a/doc/documentation/generated/biom.table.Table.add_group_metadata.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table.add_group_metadata
-===================================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table.add_group_metadata
diff --git a/doc/documentation/generated/biom.table.Table.add_metadata.rst b/doc/documentation/generated/biom.table.Table.add_metadata.rst
deleted file mode 100644
index ddff761..0000000
--- a/doc/documentation/generated/biom.table.Table.add_metadata.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table.add_metadata
-=============================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table.add_metadata
diff --git a/doc/documentation/generated/biom.table.Table.collapse.rst b/doc/documentation/generated/biom.table.Table.collapse.rst
deleted file mode 100644
index 3d7d66a..0000000
--- a/doc/documentation/generated/biom.table.Table.collapse.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table.collapse
-=========================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table.collapse
diff --git a/doc/documentation/generated/biom.table.Table.copy.rst b/doc/documentation/generated/biom.table.Table.copy.rst
deleted file mode 100644
index 03c0168..0000000
--- a/doc/documentation/generated/biom.table.Table.copy.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table.copy
-=====================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table.copy
diff --git a/doc/documentation/generated/biom.table.Table.data.rst b/doc/documentation/generated/biom.table.Table.data.rst
deleted file mode 100644
index ae8a953..0000000
--- a/doc/documentation/generated/biom.table.Table.data.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table.data
-=====================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table.data
diff --git a/doc/documentation/generated/biom.table.Table.delimited_self.rst b/doc/documentation/generated/biom.table.Table.delimited_self.rst
deleted file mode 100644
index 40a0fa2..0000000
--- a/doc/documentation/generated/biom.table.Table.delimited_self.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table.delimited_self
-===============================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table.delimited_self
diff --git a/doc/documentation/generated/biom.table.Table.descriptive_equality.rst b/doc/documentation/generated/biom.table.Table.descriptive_equality.rst
deleted file mode 100644
index 7b3c389..0000000
--- a/doc/documentation/generated/biom.table.Table.descriptive_equality.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table.descriptive_equality
-=====================================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table.descriptive_equality
diff --git a/doc/documentation/generated/biom.table.Table.dtype.rst b/doc/documentation/generated/biom.table.Table.dtype.rst
deleted file mode 100644
index 949157a..0000000
--- a/doc/documentation/generated/biom.table.Table.dtype.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table.dtype
-======================
-
-.. currentmodule:: biom.table
-
-.. autoattribute:: Table.dtype
diff --git a/doc/documentation/generated/biom.table.Table.exists.rst b/doc/documentation/generated/biom.table.Table.exists.rst
deleted file mode 100644
index 6a7c624..0000000
--- a/doc/documentation/generated/biom.table.Table.exists.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table.exists
-=======================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table.exists
diff --git a/doc/documentation/generated/biom.table.Table.filter.rst b/doc/documentation/generated/biom.table.Table.filter.rst
deleted file mode 100644
index f4a3174..0000000
--- a/doc/documentation/generated/biom.table.Table.filter.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table.filter
-=======================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table.filter
diff --git a/doc/documentation/generated/biom.table.Table.from_hdf5.rst b/doc/documentation/generated/biom.table.Table.from_hdf5.rst
deleted file mode 100644
index ce8cfe5..0000000
--- a/doc/documentation/generated/biom.table.Table.from_hdf5.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table.from_hdf5
-==========================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table.from_hdf5
diff --git a/doc/documentation/generated/biom.table.Table.from_json.rst b/doc/documentation/generated/biom.table.Table.from_json.rst
deleted file mode 100644
index 62d9899..0000000
--- a/doc/documentation/generated/biom.table.Table.from_json.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table.from_json
-==========================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table.from_json
diff --git a/doc/documentation/generated/biom.table.Table.from_tsv.rst b/doc/documentation/generated/biom.table.Table.from_tsv.rst
deleted file mode 100644
index 74cab74..0000000
--- a/doc/documentation/generated/biom.table.Table.from_tsv.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table.from_tsv
-=========================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table.from_tsv
diff --git a/doc/documentation/generated/biom.table.Table.get_table_density.rst b/doc/documentation/generated/biom.table.Table.get_table_density.rst
deleted file mode 100644
index fcc3ef0..0000000
--- a/doc/documentation/generated/biom.table.Table.get_table_density.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table.get_table_density
-==================================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table.get_table_density
diff --git a/doc/documentation/generated/biom.table.Table.get_value_by_ids.rst b/doc/documentation/generated/biom.table.Table.get_value_by_ids.rst
deleted file mode 100644
index 8976011..0000000
--- a/doc/documentation/generated/biom.table.Table.get_value_by_ids.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table.get_value_by_ids
-=================================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table.get_value_by_ids
diff --git a/doc/documentation/generated/biom.table.Table.group_metadata.rst b/doc/documentation/generated/biom.table.Table.group_metadata.rst
deleted file mode 100644
index e2cbb50..0000000
--- a/doc/documentation/generated/biom.table.Table.group_metadata.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table.group_metadata
-===============================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table.group_metadata
diff --git a/doc/documentation/generated/biom.table.Table.ids.rst b/doc/documentation/generated/biom.table.Table.ids.rst
deleted file mode 100644
index 10d1f9d..0000000
--- a/doc/documentation/generated/biom.table.Table.ids.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table.ids
-====================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table.ids
diff --git a/doc/documentation/generated/biom.table.Table.index.rst b/doc/documentation/generated/biom.table.Table.index.rst
deleted file mode 100644
index c1ab1bd..0000000
--- a/doc/documentation/generated/biom.table.Table.index.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table.index
-======================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table.index
diff --git a/doc/documentation/generated/biom.table.Table.is_empty.rst b/doc/documentation/generated/biom.table.Table.is_empty.rst
deleted file mode 100644
index 02a344a..0000000
--- a/doc/documentation/generated/biom.table.Table.is_empty.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table.is_empty
-=========================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table.is_empty
diff --git a/doc/documentation/generated/biom.table.Table.iter.rst b/doc/documentation/generated/biom.table.Table.iter.rst
deleted file mode 100644
index 7773a11..0000000
--- a/doc/documentation/generated/biom.table.Table.iter.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table.iter
-=====================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table.iter
diff --git a/doc/documentation/generated/biom.table.Table.iter_data.rst b/doc/documentation/generated/biom.table.Table.iter_data.rst
deleted file mode 100644
index 9c3d244..0000000
--- a/doc/documentation/generated/biom.table.Table.iter_data.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table.iter_data
-==========================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table.iter_data
diff --git a/doc/documentation/generated/biom.table.Table.iter_pairwise.rst b/doc/documentation/generated/biom.table.Table.iter_pairwise.rst
deleted file mode 100644
index 4a5eef8..0000000
--- a/doc/documentation/generated/biom.table.Table.iter_pairwise.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table.iter_pairwise
-==============================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table.iter_pairwise
diff --git a/doc/documentation/generated/biom.table.Table.matrix_data.rst b/doc/documentation/generated/biom.table.Table.matrix_data.rst
deleted file mode 100644
index ce01e7e..0000000
--- a/doc/documentation/generated/biom.table.Table.matrix_data.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table.matrix_data
-============================
-
-.. currentmodule:: biom.table
-
-.. autoattribute:: Table.matrix_data
diff --git a/doc/documentation/generated/biom.table.Table.max.rst b/doc/documentation/generated/biom.table.Table.max.rst
deleted file mode 100644
index 1a5cf9d..0000000
--- a/doc/documentation/generated/biom.table.Table.max.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table.max
-====================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table.max
diff --git a/doc/documentation/generated/biom.table.Table.merge.rst b/doc/documentation/generated/biom.table.Table.merge.rst
deleted file mode 100644
index d2caa31..0000000
--- a/doc/documentation/generated/biom.table.Table.merge.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table.merge
-======================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table.merge
diff --git a/doc/documentation/generated/biom.table.Table.metadata.rst b/doc/documentation/generated/biom.table.Table.metadata.rst
deleted file mode 100644
index 7e561d2..0000000
--- a/doc/documentation/generated/biom.table.Table.metadata.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table.metadata
-=========================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table.metadata
diff --git a/doc/documentation/generated/biom.table.Table.min.rst b/doc/documentation/generated/biom.table.Table.min.rst
deleted file mode 100644
index 8bc98bf..0000000
--- a/doc/documentation/generated/biom.table.Table.min.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table.min
-====================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table.min
diff --git a/doc/documentation/generated/biom.table.Table.nnz.rst b/doc/documentation/generated/biom.table.Table.nnz.rst
deleted file mode 100644
index 24764b9..0000000
--- a/doc/documentation/generated/biom.table.Table.nnz.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table.nnz
-====================
-
-.. currentmodule:: biom.table
-
-.. autoattribute:: Table.nnz
diff --git a/doc/documentation/generated/biom.table.Table.nonzero.rst b/doc/documentation/generated/biom.table.Table.nonzero.rst
deleted file mode 100644
index cd73096..0000000
--- a/doc/documentation/generated/biom.table.Table.nonzero.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table.nonzero
-========================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table.nonzero
diff --git a/doc/documentation/generated/biom.table.Table.nonzero_counts.rst b/doc/documentation/generated/biom.table.Table.nonzero_counts.rst
deleted file mode 100644
index bb278d2..0000000
--- a/doc/documentation/generated/biom.table.Table.nonzero_counts.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table.nonzero_counts
-===============================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table.nonzero_counts
diff --git a/doc/documentation/generated/biom.table.Table.norm.rst b/doc/documentation/generated/biom.table.Table.norm.rst
deleted file mode 100644
index a010b96..0000000
--- a/doc/documentation/generated/biom.table.Table.norm.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table.norm
-=====================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table.norm
diff --git a/doc/documentation/generated/biom.table.Table.pa.rst b/doc/documentation/generated/biom.table.Table.pa.rst
deleted file mode 100644
index 542d536..0000000
--- a/doc/documentation/generated/biom.table.Table.pa.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table.pa
-===================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table.pa
diff --git a/doc/documentation/generated/biom.table.Table.partition.rst b/doc/documentation/generated/biom.table.Table.partition.rst
deleted file mode 100644
index f3e64b5..0000000
--- a/doc/documentation/generated/biom.table.Table.partition.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table.partition
-==========================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table.partition
diff --git a/doc/documentation/generated/biom.table.Table.reduce.rst b/doc/documentation/generated/biom.table.Table.reduce.rst
deleted file mode 100644
index 55c3a36..0000000
--- a/doc/documentation/generated/biom.table.Table.reduce.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table.reduce
-=======================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table.reduce
diff --git a/doc/documentation/generated/biom.table.Table.rst b/doc/documentation/generated/biom.table.Table.rst
deleted file mode 100644
index 26c7076..0000000
--- a/doc/documentation/generated/biom.table.Table.rst
+++ /dev/null
@@ -1,98 +0,0 @@
-biom.table.Table
-================
-
-.. currentmodule:: biom.table
-
-.. autoclass:: Table
-
-
-
- .. HACK -- the point here is that we don't want this to appear in the output, but the autosummary should still generate the pages.
- .. autosummary::
- :toctree:
-
- Table.__eq__
- Table.__format__
- Table.__getitem__
- Table.__init__
- Table.__iter__
- Table.__ne__
- Table.__new__
- Table.__reduce__
- Table.__reduce_ex__
- Table.__repr__
- Table.__sizeof__
- Table.__str__
- Table.__subclasshook__
- Table._axis_to_num
- Table._cast_metadata
- Table._conv_to_self_type
- Table._data_equality
- Table._extract_data_from_tsv
- Table._get_col
- Table._get_row
- Table._get_sparse_data
- Table._index
- Table._index_ids
- Table._intersect_id_order
- Table._invert_axis
- Table._iter_obs
- Table._iter_samp
- Table._to_dense
- Table._to_sparse
- Table._union_id_order
- Table._verify_metadata
- Table.add_group_metadata
- Table.add_metadata
- Table.collapse
- Table.copy
- Table.data
- Table.delimited_self
- Table.descriptive_equality
- Table.exists
- Table.filter
- Table.from_hdf5
- Table.from_json
- Table.from_tsv
- Table.get_table_density
- Table.get_value_by_ids
- Table.group_metadata
- Table.ids
- Table.index
- Table.is_empty
- Table.iter
- Table.iter_data
- Table.iter_pairwise
- Table.max
- Table.merge
- Table.metadata
- Table.min
- Table.nonzero
- Table.nonzero_counts
- Table.norm
- Table.pa
- Table.partition
- Table.reduce
- Table.sort
- Table.sort_order
- Table.subsample
- Table.sum
- Table.to_hdf5
- Table.to_json
- Table.to_tsv
- Table.transform
- Table.transpose
-
-
-
-
-
- .. HACK -- the point here is that we don't want this to appear in the output, but the autosummary should still generate the pages.
- .. autosummary::
- :toctree:
-
- Table.dtype
- Table.matrix_data
- Table.nnz
- Table.shape
-
diff --git a/doc/documentation/generated/biom.table.Table.shape.rst b/doc/documentation/generated/biom.table.Table.shape.rst
deleted file mode 100644
index 2d7155a..0000000
--- a/doc/documentation/generated/biom.table.Table.shape.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table.shape
-======================
-
-.. currentmodule:: biom.table
-
-.. autoattribute:: Table.shape
diff --git a/doc/documentation/generated/biom.table.Table.sort.rst b/doc/documentation/generated/biom.table.Table.sort.rst
deleted file mode 100644
index 5b49329..0000000
--- a/doc/documentation/generated/biom.table.Table.sort.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table.sort
-=====================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table.sort
diff --git a/doc/documentation/generated/biom.table.Table.sort_order.rst b/doc/documentation/generated/biom.table.Table.sort_order.rst
deleted file mode 100644
index a2c4389..0000000
--- a/doc/documentation/generated/biom.table.Table.sort_order.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table.sort_order
-===========================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table.sort_order
diff --git a/doc/documentation/generated/biom.table.Table.subsample.rst b/doc/documentation/generated/biom.table.Table.subsample.rst
deleted file mode 100644
index 1ebaca4..0000000
--- a/doc/documentation/generated/biom.table.Table.subsample.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table.subsample
-==========================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table.subsample
diff --git a/doc/documentation/generated/biom.table.Table.sum.rst b/doc/documentation/generated/biom.table.Table.sum.rst
deleted file mode 100644
index d206871..0000000
--- a/doc/documentation/generated/biom.table.Table.sum.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table.sum
-====================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table.sum
diff --git a/doc/documentation/generated/biom.table.Table.to_hdf5.rst b/doc/documentation/generated/biom.table.Table.to_hdf5.rst
deleted file mode 100644
index 61d9bb2..0000000
--- a/doc/documentation/generated/biom.table.Table.to_hdf5.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table.to_hdf5
-========================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table.to_hdf5
diff --git a/doc/documentation/generated/biom.table.Table.to_json.rst b/doc/documentation/generated/biom.table.Table.to_json.rst
deleted file mode 100644
index c5db3eb..0000000
--- a/doc/documentation/generated/biom.table.Table.to_json.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table.to_json
-========================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table.to_json
diff --git a/doc/documentation/generated/biom.table.Table.to_tsv.rst b/doc/documentation/generated/biom.table.Table.to_tsv.rst
deleted file mode 100644
index 73baf4c..0000000
--- a/doc/documentation/generated/biom.table.Table.to_tsv.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table.to_tsv
-=======================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table.to_tsv
diff --git a/doc/documentation/generated/biom.table.Table.transform.rst b/doc/documentation/generated/biom.table.Table.transform.rst
deleted file mode 100644
index 8a4bb92..0000000
--- a/doc/documentation/generated/biom.table.Table.transform.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table.transform
-==========================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table.transform
diff --git a/doc/documentation/generated/biom.table.Table.transpose.rst b/doc/documentation/generated/biom.table.Table.transpose.rst
deleted file mode 100644
index 916b350..0000000
--- a/doc/documentation/generated/biom.table.Table.transpose.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-biom.table.Table.transpose
-==========================
-
-.. currentmodule:: biom.table
-
-.. automethod:: Table.transpose
diff --git a/doc/index.rst b/doc/index.rst
index 704c2ac..92308ed 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -9,9 +9,13 @@ The `BIOM file format <http://www.biom-format.org>`_ (canonically pronounced `bi
The `BIOM format <http://www.biom-format.org>`_ is designed for general use in broad areas of comparative -omics. For example, in marker-gene surveys, the primary use of this format is to represent OTU tables: the observations in this case are OTUs and the matrix contains counts corresponding to the number of times each OTU is observed in each sample. With respect to metagenome data, this format would be used to represent metagenome tables: the observations in this case might correspond [...]
-There are two components to the BIOM project: first is the `definition of the BIOM format <./documentation/biom_format.html>`_, and second is `development of support objects <./documentation/table_objects.html>`_ in multiple programming languages to support the use of BIOM in diverse bioinformatics applications. The version of the BIOM file format is independent of the version of the `biom-format` software.
+The BIOM project consists of the following components:
-There are official implementations of BIOM format support objects (APIs) in the Python and R programming languages. The rest of this site contains details about the BIOM file format (which is independent of the API) and the Python ``biom-format`` API. For more details about the R API, please see the `CRAN biom package <http://cran.r-project.org/web/packages/biom/index.html>`_.
+* `definition of the BIOM file format <./documentation/biom_format.html>`_;
+* command line interface (CLI) for working with BIOM files, including `converting between file formats <./documentation/biom_conversion.html>`_, `adding metadata to BIOM files <./documentation/adding_metadata.html>`_, and `summarizing BIOM files <./documentation/summarizing_biom_tables.html>`_ (run ``biom`` to see the full list of commands);
+* application programming interface (API) for working with BIOM files in multiple programming languages (including Python and R).
+
+The ``biom-format`` package provides a command line interface and Python API for working with BIOM files. The rest of this site contains details about the BIOM file format (which is independent of the API) and the Python ``biom-format`` package. For more details about the R API, please see the `CRAN biom package <http://cran.r-project.org/web/packages/biom/index.html>`_.
Projects using the BIOM format
==============================
@@ -25,6 +29,7 @@ Projects using the BIOM format
* `VAMPS <http://vamps.mbl.edu/>`_
* `metagenomeSeq <http://www.bioconductor.org/packages/release/bioc/html/metagenomeSeq.html>`_
* `Phinch <http://phinch.org>`_
+* `RDP Classifier <https://github.com/rdpstaff/classifier>`_
If you are using BIOM in your project, and would like your project to be listed, please submit a `pull request <https://github.com/biocore/biom-format/pulls>`_ to the BIOM project. More information on `submitting pull requests can be found here <https://help.github.com/articles/using-pull-requests>`_.
@@ -42,77 +47,30 @@ BIOM version
The latest official version of the biom-format project is |release| and of the BIOM file format is 2.0. Details on the `file format can be found here <./documentation/biom_format.html>`_.
-Installing the biom-format project
-==================================
-
-To install the ``biom-format`` project, you can download the `latest version here <https://pypi.python.org/pypi/biom-format/>`_, or work with the development version. Generally we recommend working with the release version as it will be more stable, but if you want access to the latest features (and can tolerate some instability) you should work with the development version.
-
-The biom-format project has the following dependencies:
-
- * `Python <http://www.python.org/>`_ >= 2.7 and < 3.0
- * `numpy <http://www.numpy.org/>`_ >= 1.7.0
- * `pyqi <http://pyqi.readthedocs.org>`_ 0.3.2
- * `scipy <http://www.scipy.org/>`_ >= 0.13.0
- * `h5py <http://www.h5py.org/>`_ >= 2.20.0 (optional; must be installed if creating or reading HDF5 formatted files)
-
-The easiest way to install the latest version of the biom-format project and its required dependencies is via pip::
-
- pip install numpy
- pip install biom-format
-
-That's it!
-
-If you decided not to install biom-format using pip, it is also possible to manually install the latest release. We'll illustrate the install process in the ``$HOME/code`` directory. You can either work in this directory on your system (creating it, if necessary, by running ``mkdir $HOME/code``) or replace all occurrences of ``$HOME/code`` in the following instructions with your working directory. Please note that ``numpy`` must be in your installed prior to installing ``biom-format``. C [...]
-
- cd $HOME/code
-
-Download the `latest release, which can be found here <https://pypi.python.org/pypi/biom-format>`_. After downloading, unpack and install (note: x.y.z refers to the downloaded version)::
+Installing the ``biom-format`` Python package
+=============================================
- tar xzf biom-format-x.y.z.tar.gz
- cd $HOME/code/biom-format-x.y.z
+To install the latest release of the ``biom-format`` Python package::
-Alternatively, to install the development version, pull it from GitHub, and change to the resulting directory::
+ pip install numpy
+ pip install biom-format
- git clone git://github.com/biocore/biom-format.git
- cd $HOME/code/biom-format
+To work with BIOM 2.0+ files::
-To install (either the development or release version), follow these steps::
+ pip install h5py
- sudo python setup.py install
+To see a list of all ``biom`` commands, run::
-If you do not have sudo access on your system (or don't want to install the ``biom-format`` project in the default location) you'll need to install the library code and scripts in specified directories, and then tell your system where to look for those files. You can do this as follows::
+ biom
- echo "export PATH=$HOME/bin/:$PATH" >> $HOME/.bashrc
- echo "export PYTHONPATH=$HOME/lib/:$PYTHONPATH" >> $HOME/.bashrc
- mkdir -p $HOME/bin $HOME/lib/
- source $HOME/.bashrc
- python setup.py install --install-scripts=$HOME/bin/ --install-purelib=$HOME/lib/ --install-lib=$HOME/lib/
+To enable Bash tab completion of ``biom`` commands, add the following line to ``$HOME/.bashrc`` (if on Linux) or ``$HOME/.bash_profile`` (if on Mac OS X)::
-You should then have access to the biom-format project. You can test this by running the following command::
-
- python -c "from biom import __version__; print __version__"
+ eval "$(_BIOM_COMPLETE=source biom)"
-You should see the current version of the biom-format project.
-
-Next you can run::
-
- which biom
-
-You should get a file path ending with ``biom`` printed to your screen if it is installed correctly. Finally, to see a list of all ``biom`` commands, run::
-
- biom
-
-Enabling tab completion of biom commands
-----------------------------------------
-
-The ``biom`` command referenced in the previous section is a driver for commands in biom-format, powered by `the pyqi project <http://biocore.github.io/pyqi>`_. You can enable tab completion of biom command names and command options (meaning that when you begin typing the name of a command or option you can auto-complete it by hitting the *tab* key) by following a few simple steps from the pyqi documentation. While this step is optional, tab completion is very convenient so it's worth enabling.
-
-To enable tab completion, follow the steps outlined under `Configuring bash completion <http://biocore.github.io/pyqi/doc/tutorials/defining_your_command_driver.html#configuring-bash-completion>`_ in the pyqi install documentation, substituting ``biom`` for ``my-project`` and ``my_project`` in all commands. After completing those steps and closing and re-opening your terminal, auto-completion should be enabled.
-
-BIOM format in R
-================
+Installing the ``biom`` R package
+=================================
-There is also a BIOM format package for R, called ``biom``. This package includes basic tools for reading biom-format files, accessing and subsetting data tables from a biom object, as well as limited support for writing a biom-object back to a biom-format file. The design of this API is intended to match the python API and other tools included with the biom-format project, but with a decidedly "R flavor" that should be familiar to R users. This includes S4 classes and methods, as well a [...]
+There is also a BIOM format package for R called ``biom``. This package includes basic tools for reading biom-format files, accessing and subsetting data tables from a biom object, as well as limited support for writing a biom-object back to a biom-format file. The design of this API is intended to match the python API and other tools included with the biom-format project, but with a decidedly "R flavor" that should be familiar to R users. This includes S4 classes and methods, as well as [...]
To install the latest stable release of the ``biom`` package enter the following command from within an R session::
@@ -128,7 +86,7 @@ Please post any support or feature requests and bugs to `the biom issue tracker
See `the biom project on GitHub <https://github.com/joey711/biom/>`_ for further details, or if you would like to contribute.
-Note that the licenses between the ``biom`` R package (GPL-2) and the other biom-format software (Modified BSD) are different.
+Note that the licenses between the ``biom`` R package (GPL-2) and the ``biom-format`` Python package (Modified BSD) are different.
Citing the BIOM project
=======================
@@ -142,4 +100,4 @@ You can cite the BIOM format as follows (`link <http://www.gigasciencejournal.co
Development team
================
-The biom-format project was conceived of and developed by the `QIIME <http://www.qiime.org>`_, `MG-RAST <http://metagenomics.anl.gov>`_, and `VAMPS <http://vamps.mbl.edu/>`_ development groups to support interoperability of our software packages. If you have questions about the biom-format project you can contact gregcaporaso at gmail.com.
+The biom-format project was conceived of and developed by the `QIIME <http://www.qiime.org>`_, `MG-RAST <http://metagenomics.anl.gov>`_, and `VAMPS <http://vamps.mbl.edu/>`_ development groups to support interoperability of our software packages. If you have questions about the biom-format project please post them on the `QIIME Forum <http://forum.qiime.org>`_.
diff --git a/examples/asasd b/examples/asasd
deleted file mode 100644
index 8fbee69..0000000
--- a/examples/asasd
+++ /dev/null
@@ -1,7 +0,0 @@
-# Constructed from biom file
-#OTU ID Sample1 Sample2 Sample3 Sample4 Sample5 Sample6 taxonomy
-GG_OTU_1 0.0 0.0 1.0 0.0 0.0 0.0 k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Enterobacteriales; f__Enterobacteriaceae; g__Escherichia; s__
-GG_OTU_2 5.0 1.0 0.0 2.0 3.0 1.0 k__Bacteria; p__Cyanobacteria; c__Nostocophycideae; o__Nostocales; f__Nostocaceae; g__Dolichospermum; s__
-GG_OTU_3 0.0 0.0 1.0 4.0 0.0 2.0 k__Archaea; p__Euryarchaeota; c__Methanomicrobia; o__Methanosarcinales; f__Methanosarcinaceae; g__Methanosarcina; s__
-GG_OTU_4 2.0 1.0 1.0 0.0 0.0 1.0 k__Bacteria; p__Firmicutes; c__Clostridia; o__Halanaerobiales; f__Halanaerobiaceae; g__Halanaerobium; s__Halanaerobiumsaccharolyticum
-GG_OTU_5 0.0 1.0 1.0 0.0 0.0 0.0 k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Enterobacteriales; f__Enterobacteriaceae; g__Escherichia; s__
diff --git a/examples/asd.hdf5 b/examples/asd.hdf5
deleted file mode 100644
index 88ba1a2..0000000
Binary files a/examples/asd.hdf5 and /dev/null differ
diff --git a/examples/asdasdsd b/examples/asdasdsd
deleted file mode 100644
index 53a7b6c..0000000
Binary files a/examples/asdasdsd and /dev/null differ
diff --git a/examples/bar_hdf5 b/examples/bar_hdf5
deleted file mode 100644
index 7b2832d..0000000
Binary files a/examples/bar_hdf5 and /dev/null differ
diff --git a/examples/bar_json b/examples/bar_json
deleted file mode 100644
index 97d6234..0000000
--- a/examples/bar_json
+++ /dev/null
@@ -1 +0,0 @@
-{"id": "No Table ID","format": "Biological Observation Matrix 1.0.0","format_url": "http://biom-format.org","generated_by": "BIOM-Format 2.0.0-dev","date": "2014-06-03T16:06:23.407122","type": "OTU table","matrix_element_type": "float","shape": [5, 6],"data": [[0,2,1.0],[1,0,5.0],[1,1,1.0],[1,3,2.0],[1,4,3.0],[1,5,1.0],[2,2,1.0],[2,3,4.0],[2,5,2.0],[3,0,2.0],[3,1,1.0],[3,2,1.0],[3,5,1.0],[4,1,1.0],[4,2,1.0]],"rows": [{"id": "GG_OTU_1", "metadata": {"taxonomy": ["k__Bacteria", "p__Proteob [...]
\ No newline at end of file
diff --git a/examples/bar_tsv b/examples/bar_tsv
deleted file mode 100644
index e64257d..0000000
--- a/examples/bar_tsv
+++ /dev/null
@@ -1,7 +0,0 @@
-# Constructed from biom file
-#OTU ID Sample1 Sample2 Sample3 Sample4 Sample5 Sample6
-GG_OTU_1 0.0 0.0 1.0 0.0 0.0 0.0
-GG_OTU_2 5.0 1.0 0.0 2.0 3.0 1.0
-GG_OTU_3 0.0 0.0 1.0 4.0 0.0 2.0
-GG_OTU_4 2.0 1.0 1.0 0.0 0.0 1.0
-GG_OTU_5 0.0 1.0 1.0 0.0 0.0 0.0
diff --git a/examples/foo_hdf5 b/examples/foo_hdf5
deleted file mode 100644
index bc8bb1a..0000000
Binary files a/examples/foo_hdf5 and /dev/null differ
diff --git a/examples/foo_json b/examples/foo_json
deleted file mode 100644
index 697578d..0000000
--- a/examples/foo_json
+++ /dev/null
@@ -1 +0,0 @@
-{"id": "None","format": "Biological Observation Matrix 1.0.0","format_url": "http://biom-format.org","generated_by": "BIOM-Format 2.0.0-dev","date": "2014-06-03T16:05:29.974507","type": "OTU table","matrix_element_type": "float","shape": [5, 6],"data": [[0,2,1.0],[1,0,5.0],[1,1,1.0],[1,3,2.0],[1,4,3.0],[1,5,1.0],[2,2,1.0],[2,3,4.0],[2,5,2.0],[3,0,2.0],[3,1,1.0],[3,2,1.0],[3,5,1.0],[4,1,1.0],[4,2,1.0]],"rows": [{"id": "GG_OTU_1", "metadata": {"taxonomy": ["k__Bacteria", "p__Proteobacteria [...]
\ No newline at end of file
diff --git a/examples/foo_tsv b/examples/foo_tsv
deleted file mode 100644
index e64257d..0000000
--- a/examples/foo_tsv
+++ /dev/null
@@ -1,7 +0,0 @@
-# Constructed from biom file
-#OTU ID Sample1 Sample2 Sample3 Sample4 Sample5 Sample6
-GG_OTU_1 0.0 0.0 1.0 0.0 0.0 0.0
-GG_OTU_2 5.0 1.0 0.0 2.0 3.0 1.0
-GG_OTU_3 0.0 0.0 1.0 4.0 0.0 2.0
-GG_OTU_4 2.0 1.0 1.0 0.0 0.0 1.0
-GG_OTU_5 0.0 1.0 1.0 0.0 0.0 0.0
diff --git a/examples/foobar_hdf5 b/examples/foobar_hdf5
deleted file mode 100644
index 75696a2..0000000
Binary files a/examples/foobar_hdf5 and /dev/null differ
diff --git a/examples/foobar_json b/examples/foobar_json
deleted file mode 100644
index 836bbba..0000000
--- a/examples/foobar_json
+++ /dev/null
@@ -1 +0,0 @@
-{"id": "None","format": "Biological Observation Matrix 1.0.0","format_url": "http://biom-format.org","generated_by": "BIOM-Format 2.0.0-dev","date": "2014-06-03T16:11:59.025688","type": "OTU table","matrix_element_type": "float","shape": [5, 6],"data": [[0,2,1.0],[1,0,5.0],[1,1,1.0],[1,3,2.0],[1,4,3.0],[1,5,1.0],[2,2,1.0],[2,3,4.0],[2,5,2.0],[3,0,2.0],[3,1,1.0],[3,2,1.0],[3,5,1.0],[4,1,1.0],[4,2,1.0]],"rows": [{"id": "GG_OTU_1", "metadata": {"taxonomy": ["k__Bacteria", "p__Proteobacteria [...]
\ No newline at end of file
diff --git a/examples/foobar_json2 b/examples/foobar_json2
deleted file mode 100644
index a29ca9e..0000000
--- a/examples/foobar_json2
+++ /dev/null
@@ -1 +0,0 @@
-{"id": "None","format": "Biological Observation Matrix 1.0.0","format_url": "http://biom-format.org","generated_by": "BIOM-Format 2.0.0-dev","date": "2014-06-03T16:15:49.313597","type": "OTU table","matrix_element_type": "float","shape": [5, 6],"data": [[0,2,1.0],[1,0,5.0],[1,1,1.0],[1,3,2.0],[1,4,3.0],[1,5,1.0],[2,2,1.0],[2,3,4.0],[2,5,2.0],[3,0,2.0],[3,1,1.0],[3,2,1.0],[3,5,1.0],[4,1,1.0],[4,2,1.0]],"rows": [{"id": "GG_OTU_1", "metadata": {"taxonomy": ["k__Bacteria", "p__Proteobacteria [...]
\ No newline at end of file
diff --git a/examples/foobar_tsv b/examples/foobar_tsv
deleted file mode 100644
index 8fbee69..0000000
--- a/examples/foobar_tsv
+++ /dev/null
@@ -1,7 +0,0 @@
-# Constructed from biom file
-#OTU ID Sample1 Sample2 Sample3 Sample4 Sample5 Sample6 taxonomy
-GG_OTU_1 0.0 0.0 1.0 0.0 0.0 0.0 k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Enterobacteriales; f__Enterobacteriaceae; g__Escherichia; s__
-GG_OTU_2 5.0 1.0 0.0 2.0 3.0 1.0 k__Bacteria; p__Cyanobacteria; c__Nostocophycideae; o__Nostocales; f__Nostocaceae; g__Dolichospermum; s__
-GG_OTU_3 0.0 0.0 1.0 4.0 0.0 2.0 k__Archaea; p__Euryarchaeota; c__Methanomicrobia; o__Methanosarcinales; f__Methanosarcinaceae; g__Methanosarcina; s__
-GG_OTU_4 2.0 1.0 1.0 0.0 0.0 1.0 k__Bacteria; p__Firmicutes; c__Clostridia; o__Halanaerobiales; f__Halanaerobiaceae; g__Halanaerobium; s__Halanaerobiumsaccharolyticum
-GG_OTU_5 0.0 1.0 1.0 0.0 0.0 0.0 k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Enterobacteriales; f__Enterobacteriaceae; g__Escherichia; s__
diff --git a/examples/qweqweqwasd b/examples/qweqweqwasd
deleted file mode 100644
index 5688466..0000000
--- a/examples/qweqweqwasd
+++ /dev/null
@@ -1 +0,0 @@
-{"id": "None","format": "Biological Observation Matrix 1.0.0","format_url": "http://biom-format.org","generated_by": "BIOM-Format 1.3.1-dev","date": "2014-05-13T16:36:42.187537","matrix_element_type": "float","shape": [5, 6],"data": [[0,2,1.0],[1,0,5.0],[1,1,1.0],[1,3,2.0],[1,4,3.0],[1,5,1.0],[2,2,1.0],[2,3,4.0],[2,5,2.0],[3,0,2.0],[3,1,1.0],[3,2,1.0],[3,5,1.0],[4,1,1.0],[4,2,1.0]],"rows": [{"id": "GG_OTU_1", "metadata": {"taxonomy": ["k__Bacteria", "p__Proteobacteria", "c__Gammaproteoba [...]
\ No newline at end of file
diff --git a/examples/rich_sparse_otu_table.biom.gz b/examples/rich_sparse_otu_table.biom.gz
deleted file mode 100644
index 1dd8c5a..0000000
Binary files a/examples/rich_sparse_otu_table.biom.gz and /dev/null differ
diff --git a/examples/wtf b/examples/wtf
deleted file mode 100644
index 0715bc8..0000000
Binary files a/examples/wtf and /dev/null differ
diff --git a/examples/wtf2 b/examples/wtf2
deleted file mode 100644
index 66a9626..0000000
--- a/examples/wtf2
+++ /dev/null
@@ -1 +0,0 @@
-{"id": "No Table ID","format": "Biological Observation Matrix 1.0.0","format_url": "http://biom-format.org","matrix_type": "sparse","generated_by": "BIOM-Format 2.0.0-dev","date": "2014-06-03T17:27:36.407665","type": "otu table","matrix_element_type": "float","shape": [5, 6],"data": [[0,2,1.0],[1,0,5.0],[1,1,1.0],[1,3,2.0],[1,4,3.0],[1,5,1.0],[2,2,1.0],[2,3,4.0],[2,5,2.0],[3,0,2.0],[3,1,1.0],[3,2,1.0],[3,5,1.0],[4,1,1.0],[4,2,1.0]],"rows": [{"id": "GG_OTU_1", "metadata": {"taxonomy": ["k [...]
\ No newline at end of file
diff --git a/scripts/biom b/scripts/biom
deleted file mode 100755
index 6351050..0000000
--- a/scripts/biom
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/bin/sh
-
-#-----------------------------------------------------------------------------
-# Copyright (c) 2011-2013, The BIOM Format Development Team.
-#
-# Distributed under the terms of the Modified BSD License.
-#
-# The full license is in the file COPYING.txt, distributed with this software.
-#-----------------------------------------------------------------------------
-
-# __author__ = "Greg Caporaso"
-# __copyright__ = "Copyright 2011-2013, The BIOM Format Development Team"
-# __credits__ = ["Daniel McDonald",
-# "Greg Caporaso",
-# "Jai Ram Rideout"]
-# __license__ = "BSD"
-# __url__ = "http://biom-format.org"
-# __version__ = "2.0.1-dev"
-# __maintainer__ = "Greg Caporaso"
-# __email__ = "gregcaporaso at gmail.com"
-
-exec pyqi --driver-name biom --command-config-module biom.interfaces.optparse.config -- "$@"
diff --git a/scripts/serve-biom b/scripts/serve-biom
deleted file mode 100644
index a4ca323..0000000
--- a/scripts/serve-biom
+++ /dev/null
@@ -1,11 +0,0 @@
-#!/bin/sh
-
-#-----------------------------------------------------------------------------
-# Copyright (c) 2011-2013, The BIOM Format Development Team.
-#
-# Distributed under the terms of the Modified BSD License.
-#
-# The full license is in the file COPYING.txt, distributed with this software.
-#-----------------------------------------------------------------------------
-
-exec pyqi serve-html-interface -m biom.interfaces.html.config "$@"
diff --git a/setup.cfg b/setup.cfg
index 861a9f5..72f9d44 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
[egg_info]
-tag_build =
-tag_date = 0
tag_svn_revision = 0
+tag_date = 0
+tag_build =
diff --git a/setup.py b/setup.py
index 6a043d2..0734278 100644
--- a/setup.py
+++ b/setup.py
@@ -2,7 +2,7 @@
# -*- coding: utf-8 -*-
# ----------------------------------------------------------------------------
-# Copyright (c) 2011-2013, The BIOM Format Development Team.
+# Copyright (c) 2011-2015, The BIOM Format Development Team.
#
# Distributed under the terms of the Modified BSD License.
#
@@ -10,9 +10,9 @@
# ----------------------------------------------------------------------------
import os
-from setuptools import setup
+import sys
+from setuptools import setup, find_packages
from setuptools.extension import Extension
-from glob import glob
try:
import numpy as np
@@ -33,9 +33,9 @@ for m in ('multiprocessing', 'logging'):
__author__ = "Daniel McDonald"
__copyright__ = "Copyright 2011-2013, The BIOM Format Development Team"
__credits__ = ["Greg Caporaso", "Daniel McDonald", "Jose Clemente",
- "Jai Ram Rideout", "Jorge Cañardo Alastuey"]
+ "Jai Ram Rideout", "Jorge Cañardo Alastuey", "Michael Hall"]
__license__ = "BSD"
-__version__ = "2.1"
+__version__ = "2.1.5"
__maintainer__ = "Daniel McDonald"
__email__ = "mcdonadt at colorado.edu"
@@ -79,6 +79,13 @@ if USE_CYTHON:
from Cython.Build import cythonize
extensions = cythonize(extensions)
+install_requires = ["click", "numpy >= 1.3.0", "future >= 0.14.3",
+ "scipy >= 0.13.0"]
+# HACK: for backward-compatibility with QIIME 1.9.x, pyqi must be installed.
+# pyqi is not used anymore in this project.
+if sys.version_info[0] < 3:
+ install_requires.append("pyqi")
+
setup(name='biom-format',
version=__version__,
description='Biological Observation Matrix (BIOM) format',
@@ -90,22 +97,16 @@ setup(name='biom-format',
maintainer_email=__email__,
url='http://www.biom-format.org',
test_suite='nose.collector',
- packages=['biom',
- 'biom/commands',
- 'biom/interfaces',
- 'biom/interfaces/optparse',
- 'biom/interfaces/optparse/config',
- 'biom/interfaces/html',
- 'biom/interfaces/html/config'
- ],
+ packages=find_packages(),
+ include_package_data=True,
ext_modules=extensions,
include_dirs=[np.get_include()],
- scripts=glob('scripts/*'),
- install_requires=["numpy >= 1.3.0",
- "pyqi == 0.3.2",
- "scipy >= 0.13.0"],
- extras_require={'test': ["nose >= 0.10.1", "pep8", "flake8"],
+ install_requires=install_requires,
+ extras_require={'test': ["nose >= 0.10.1", "flake8"],
'hdf5': ["h5py >= 2.2.0"]
},
- classifiers=classifiers
- )
+ classifiers=classifiers,
+ entry_points='''
+ [console_scripts]
+ biom=biom.cli:cli
+ ''')
diff --git a/biom/commands/__init__.py b/tests/__init__.py
similarity index 100%
rename from biom/commands/__init__.py
rename to tests/__init__.py
diff --git a/biom/interfaces/html/output_handler.py b/tests/test_cli/__init__.py
similarity index 78%
rename from biom/interfaces/html/output_handler.py
rename to tests/test_cli/__init__.py
index 63ce058..1e84e01 100644
--- a/biom/interfaces/html/output_handler.py
+++ b/tests/test_cli/__init__.py
@@ -1,7 +1,5 @@
-#!/usr/bin/env python
-
# ----------------------------------------------------------------------------
-# Copyright (c) 2011-2013, The BIOM Format Development Team.
+# Copyright (c) 2011-2015, The BIOM Format Development Team.
#
# Distributed under the terms of the Modified BSD License.
#
diff --git a/tests/test_cli/test_add_metadata.py b/tests/test_cli/test_add_metadata.py
new file mode 100644
index 0000000..90111d3
--- /dev/null
+++ b/tests/test_cli/test_add_metadata.py
@@ -0,0 +1,134 @@
+#!/usr/bin/env python
+
+# -----------------------------------------------------------------------------
+# Copyright (c) 2011-2015, The BIOM Format Development Team.
+#
+# Distributed under the terms of the Modified BSD License.
+#
+# The full license is in the file COPYING.txt, distributed with this software.
+# -----------------------------------------------------------------------------
+
+import tempfile
+from unittest import TestCase, main
+
+import biom
+from biom.cli.metadata_adder import _add_metadata
+
+
+class TestAddMetadata(TestCase):
+
+ def setUp(self):
+ """Set up data for use in unit tests."""
+ self.cmd = _add_metadata
+ with tempfile.NamedTemporaryFile('w') as fh:
+ fh.write(biom1)
+ fh.flush()
+ self.biom_table1 = biom.load_table(fh.name)
+ self.sample_md_lines1 = sample_md1.split('\n')
+ self.obs_md_lines1 = obs_md1.split('\n')
+
+ def test_add_sample_metadata_no_casting(self):
+ """Correctly adds sample metadata without casting it."""
+ # Add a subset of sample metadata to a table that doesn't have any
+ # sample metadata to begin with. Don't perform any casting.
+ obs = self.cmd(table=self.biom_table1,
+ sample_metadata=self.sample_md_lines1)
+
+ self.assertEqual(obs.metadata()[obs.index('f4', 'sample')],
+ {'bar': '0.23', 'foo': '9', 'baz': 'abc;123'})
+ self.assertEqual(obs.metadata()[obs.index('not16S.1', 'sample')],
+ {'bar': '-4.2', 'foo': '0', 'baz': '123;abc'})
+ self.assertEqual(obs.metadata()[obs.index('f2', 'sample')], {})
+
+ def test_add_sample_metadata_with_casting(self):
+ """Correctly adds sample metadata with casting."""
+ obs = self.cmd(table=self.biom_table1,
+ sample_metadata=self.sample_md_lines1,
+ sc_separated=['baz'], int_fields=['foo'],
+ float_fields=['bar'])
+
+ self.assertEqual(obs.metadata()[obs.index('f4', 'sample')],
+ {'bar': 0.23, 'foo': 9, 'baz': ['abc', '123']})
+ self.assertEqual(obs.metadata()[obs.index('not16S.1', 'sample')],
+ {'bar': -4.2, 'foo': 0, 'baz': ['123', 'abc']})
+ self.assertEqual(obs.metadata()[obs.index('f2', 'sample')], {})
+
+ def test_add_observation_metadata_no_casting(self):
+ """Correctly adds observation metadata without casting it."""
+ # Add observation metadata to a table that already has observation
+ # metadata. Some observations won't be modified, and metadata for
+ # observations that aren't in the table are included. Don't perform any
+ # casting.
+ obs = self.cmd(table=self.biom_table1,
+ observation_metadata=self.obs_md_lines1)
+
+ metadata = obs.metadata(axis='observation')
+ self.assertEqual(
+ metadata[obs.index('None7', 'observation')],
+ {'foo': '6', 'taxonomy': 'abc;123|def;456'})
+ self.assertEqual(
+ metadata[obs.index('879972', 'observation')],
+ {'foo': '3', 'taxonomy': '123;abc|456;def'})
+ self.assertEqual(
+ metadata[obs.index('None8', 'observation')],
+ {'taxonomy': ['k__Bacteria']})
+
+ def test_add_observation_metadata_with_casting(self):
+ """Correctly adds observation metadata with casting."""
+ obs = self.cmd(table=self.biom_table1,
+ observation_metadata=self.obs_md_lines1,
+ sc_pipe_separated=['taxonomy'], int_fields=['foo'])
+
+ metadata = obs.metadata(axis='observation')
+ self.assertEqual(
+ metadata[obs.index('None7', 'observation')],
+ {'foo': 6, 'taxonomy': [['abc', '123'], ['def', '456']]})
+ self.assertEqual(
+ metadata[obs.index('879972', 'observation')],
+ {'foo': 3, 'taxonomy': [['123', 'abc'], ['456', 'def']]})
+ self.assertEqual(
+ metadata[obs.index('None8', 'observation')],
+ {'taxonomy': ['k__Bacteria']})
+
+
+biom1 = ('{"id": "None","format": "Biological Observation Matrix 1.0.0","form'
+ 'at_url": "http://biom-format.org","type": "OTU table","generated_by'
+ '": "QIIME 1.6.0-dev","date": "2013-02-09T09:30:11.550590","matrix_t'
+ 'ype": "sparse","matrix_element_type": "int","shape": [14, 9],"data"'
+ ': [[0,0,20],[0,1,18],[0,2,18],[0,3,22],[0,4,4],[1,4,1],[2,0,1],[2,4'
+ ',1],[2,5,1],[3,6,1],[4,4,1],[5,7,20],[6,4,1],[7,4,1],[7,5,1],[8,4,1'
+ '],[8,6,2],[8,8,3],[9,7,2],[10,5,1],[11,4,9],[11,5,20],[11,6,1],[11,'
+ '8,4],[12,4,3],[12,6,19],[12,8,15],[13,0,1],[13,1,4],[13,2,4]],"rows'
+ '": [{"id": "295053", "metadata": {"taxonomy": ["k__Bacteria"]}},{"i'
+ 'd": "42684", "metadata": {"taxonomy": ["k__Bacteria", "p__Proteobac'
+ 'teria"]}},{"id": "None11", "metadata": {"taxonomy": ["Unclassified"'
+ ']}},{"id": "None10", "metadata": {"taxonomy": ["Unclassified"]}},{"'
+ 'id": "None7", "metadata": {"taxonomy": ["Unclassified"]}},{"id": "N'
+ 'one6", "metadata": {"taxonomy": ["Unclassified"]}},{"id": "None5", '
+ '"metadata": {"taxonomy": ["k__Bacteria"]}},{"id": "None4", "metadat'
+ 'a": {"taxonomy": ["Unclassified"]}},{"id": "None3", "metadata": {"t'
+ 'axonomy": ["k__Bacteria"]}},{"id": "None2", "metadata": {"taxonomy"'
+ ': ["k__Bacteria"]}},{"id": "None1", "metadata": {"taxonomy": ["Uncl'
+ 'assified"]}},{"id": "879972", "metadata": {"taxonomy": ["k__Bacteri'
+ 'a"]}},{"id": "None9", "metadata": {"taxonomy": ["Unclassified"]}},{'
+ '"id": "None8", "metadata": {"taxonomy": ["k__Bacteria"]}}],"columns'
+ '": [{"id": "f2", "metadata": null},{"id": "f1", "metadata": null},{'
+ '"id": "f3", "metadata": null},{"id": "f4", "metadata": null},{"id":'
+ ' "p2", "metadata": null},{"id": "p1", "metadata": null},{"id": "t1"'
+ ', "metadata": null},{"id": "not16S.1", "metadata": null},{"id": "t2'
+ '", "metadata": null}]}')
+
+sample_md1 = """#SampleID\tfoo\tbar\tbaz
+f4\t9\t0.23\tabc;123
+not16S.1\t0\t-4.2\t123;abc
+"""
+
+obs_md1 = """#OTUID\tfoo\ttaxonomy
+None7\t6\tabc;123|def;456
+best-observation\t8\tghi;789|jkl;101112
+879972\t3\t123;abc|456;def
+"""
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tests/test_cli/test_show_install_info.py b/tests/test_cli/test_show_install_info.py
new file mode 100644
index 0000000..35cd63a
--- /dev/null
+++ b/tests/test_cli/test_show_install_info.py
@@ -0,0 +1,24 @@
+# -----------------------------------------------------------------------------
+# Copyright (c) 2011-2015, The BIOM Format Development Team.
+#
+# Distributed under the terms of the Modified BSD License.
+#
+# The full license is in the file COPYING.txt, distributed with this software.
+# -----------------------------------------------------------------------------
+
+import unittest
+
+from biom.cli.installation_informer import _show_install_info
+
+
+class TestShowInstallInfo(unittest.TestCase):
+ def test_default(self):
+ # Not really sure what to specifically test here, as this information
+ # will change on a per-install basis. Just make sure the code is being
+ # exercised and we have some output.
+ obs = _show_install_info()
+ self.assertTrue(len(obs) > 0)
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/tests/test_cli/test_subset_table.py b/tests/test_cli/test_subset_table.py
new file mode 100644
index 0000000..f3d8265
--- /dev/null
+++ b/tests/test_cli/test_subset_table.py
@@ -0,0 +1,122 @@
+# -----------------------------------------------------------------------------
+# Copyright (c) 2011-2015, The BIOM Format Development Team.
+#
+# Distributed under the terms of the Modified BSD License.
+#
+# The full license is in the file COPYING.txt, distributed with this software.
+# -----------------------------------------------------------------------------
+
+import os
+import unittest
+
+import numpy.testing as npt
+
+from biom.cli.table_subsetter import _subset_table
+from biom.parse import parse_biom_table
+from biom.util import HAVE_H5PY
+
+
+class TestSubsetTable(unittest.TestCase):
+ def setUp(self):
+ """Set up data for use in unit tests."""
+ self.biom_str1 = biom1
+
+ def test_subset_samples(self):
+ """Correctly subsets samples in a table."""
+ obs = _subset_table(json_table_str=self.biom_str1, axis='sample',
+ ids=['f4', 'f2'], hdf5_biom=None)
+ obs = parse_biom_table(list(obs[0]))
+ self.assertEqual(len(obs.ids()), 2)
+ self.assertEqual(len(obs.ids(axis='observation')), 14)
+ self.assertTrue('f4' in obs.ids())
+ self.assertTrue('f2' in obs.ids())
+
+ def test_subset_observations(self):
+ """Correctly subsets observations in a table."""
+ obs = _subset_table(json_table_str=self.biom_str1, axis='observation',
+ ids=['None2', '879972'], hdf5_biom=None)
+ obs = parse_biom_table(list(obs[0]))
+ self.assertEqual(len(obs.ids()), 9)
+ self.assertEqual(len(obs.ids(axis='observation')), 2)
+ self.assertTrue('None2' in obs.ids(axis='observation'))
+ self.assertTrue('879972' in obs.ids(axis='observation'))
+
+ def test_invalid_input(self):
+ """Correctly raises politically correct error upon invalid input."""
+ with self.assertRaises(ValueError):
+ _subset_table(hdf5_biom=None, json_table_str=self.biom_str1, axis='foo',
+ ids=['f2', 'f4'])
+
+ with self.assertRaises(ValueError):
+ _subset_table(hdf5_biom=None, json_table_str=None, axis='sample', ids=['f2', 'f4'])
+
+ with self.assertRaises(ValueError):
+ _subset_table(json_table_str=self.biom_str1, hdf5_biom='foo',
+ axis='sample', ids=['f2', 'f4'])
+
+ @npt.dec.skipif(HAVE_H5PY is False, msg='H5PY is not installed')
+ def test_subset_samples_hdf5(self):
+ """Correctly subsets samples in a hdf5 table"""
+ cwd = os.getcwd()
+ if '/' in __file__:
+ os.chdir(__file__.rsplit('/', 1)[0])
+ obs = _subset_table(hdf5_biom='test_data/test.biom', axis='sample',
+ ids=[u'Sample1', u'Sample2', u'Sample3'],
+ json_table_str=None)
+ os.chdir(cwd)
+ obs = obs[0]
+ self.assertEqual(len(obs.ids()), 3)
+ self.assertEqual(len(obs.ids(axis='observation')), 5)
+ self.assertTrue(u'Sample1' in obs.ids())
+ self.assertTrue(u'Sample2' in obs.ids())
+ self.assertTrue(u'Sample3' in obs.ids())
+
+ @npt.dec.skipif(HAVE_H5PY is False, msg='H5PY is not installed')
+ def test_subset_observations_hdf5(self):
+ """Correctly subsets samples in a hdf5 table"""
+ cwd = os.getcwd()
+ if '/' in __file__:
+ os.chdir(__file__.rsplit('/', 1)[0])
+ obs = _subset_table(hdf5_biom='test_data/test.biom', axis='observation',
+ ids=[u'GG_OTU_1', u'GG_OTU_3', u'GG_OTU_5'],
+ json_table_str=None)
+ os.chdir(cwd)
+ obs = obs[0]
+ self.assertEqual(len(obs.ids()), 4)
+ self.assertEqual(len(obs.ids(axis='observation')), 3)
+ self.assertTrue(u'GG_OTU_1' in obs.ids(axis='observation'))
+ self.assertTrue(u'GG_OTU_3' in obs.ids(axis='observation'))
+ self.assertTrue(u'GG_OTU_5' in obs.ids(axis='observation'))
+
+
+biom1 = ('{"id": "None","format": "Biological Observation Matrix 1.0.0",'
+ '"format_url": "http://biom-format.org","type": "OTU table",'
+ '"generated_by": "QIIME 1.6.0-dev","date": '
+ '"2013-02-09T09:30:11.550590","matrix_type": "sparse",'
+ '"matrix_element_type": "int","shape": [14, 9],"data": '
+ '[[0,0,20],[0,1,18],[0,2,18],[0,3,22],[0,4,4],[1,4,1],[2,0,1],[2,4,1]'
+ ',[2,5,1],[3,6,1],[4,4,1],[5,7,20],[6,4,1],[7,4,1],[7,5,1],[8,4,1],'
+ '[8,6,2],[8,8,3],[9,7,2],[10,5,1],[11,4,9],[11,5,20],[11,6,1],'
+ '[11,8,4],[12,4,3],[12,6,19],[12,8,15],[13,0,1],[13,1,4],[13,2,4]],'
+ '"rows": [{"id": "295053", "metadata": {"taxonomy": ["k__Bacteria"]}}'
+ ',{"id": "42684", "metadata": {"taxonomy": ["k__Bacteria", '
+ '"p__Proteobacteria"]}},{"id": "None11", "metadata": {"taxonomy": '
+ '["Unclassified"]}},{"id": "None10", "metadata": {"taxonomy": '
+ '["Unclassified"]}},{"id": "None7", "metadata": {"taxonomy": '
+ '["Unclassified"]}},{"id": "None6", "metadata": {"taxonomy": '
+ '["Unclassified"]}},{"id": "None5", "metadata": {"taxonomy": '
+ '["k__Bacteria"]}},{"id": "None4", "metadata": {"taxonomy": '
+ '["Unclassified"]}},{"id": "None3", "metadata": {"taxonomy": '
+ '["k__Bacteria"]}},{"id": "None2", "metadata": {"taxonomy": '
+ '["k__Bacteria"]}},{"id": "None1", "metadata": {"taxonomy": '
+ '["Unclassified"]}},{"id": "879972", "metadata": {"taxonomy": '
+ '["k__Bacteria"]}},{"id": "None9", "metadata": {"taxonomy": '
+ '["Unclassified"]}},{"id": "None8", "metadata": {"taxonomy": '
+ '["k__Bacteria"]}}],"columns": [{"id": "f2", "metadata": null},'
+ '{"id": "f1", "metadata": null},{"id": "f3", "metadata": null},'
+ '{"id": "f4", "metadata": null},{"id": "p2", "metadata": null},{"id":'
+ ' "p1", "metadata": null},{"id": "t1", "metadata": null},{"id": '
+ '"not16S.1", "metadata": null},{"id": "t2", "metadata": null}]}')
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/tests/test_cli/test_summarize_table.py b/tests/test_cli/test_summarize_table.py
new file mode 100644
index 0000000..ff85bc4
--- /dev/null
+++ b/tests/test_cli/test_summarize_table.py
@@ -0,0 +1,122 @@
+#!/usr/bin/env python
+
+# -----------------------------------------------------------------------------
+# Copyright (c) 2011-2015, The BIOM Format Development Team.
+#
+# Distributed under the terms of the Modified BSD License.
+#
+# The full license is in the file COPYING.txt, distributed with this software.
+# -----------------------------------------------------------------------------
+
+from biom.cli.table_summarizer import _summarize_table
+from biom.parse import load_table
+
+import tempfile
+from unittest import TestCase, main
+
+
+class TestSummarizeTable(TestCase):
+
+ def setUp(self):
+ with tempfile.NamedTemporaryFile(mode='w') as fh:
+ fh.write(biom1)
+ fh.flush()
+ self.biom1 = load_table(fh.name)
+
+ def test_default(self):
+ """ TableSummarizer functions as expected
+
+ """
+ result = _summarize_table(self.biom1)
+ # test same alphanumeric content, order of samples is runtime
+ # dependent
+ self.assertEqual(sorted(result), sorted(summary_default))
+
+ def test_qualitative(self):
+ """ TableSummarizer functions as expected with qualitative=True
+
+ """
+ result = _summarize_table(self.biom1, qualitative=True)
+ # test same alphanumeric content, order of samples is runtime
+ # dependent
+ self.assertEqual(sorted(result), sorted(summary_qualitative))
+
+biom1 = ('{"id": "None","format": "Biological Observation Matrix 1.0.0",'
+ '"format_url": "http://biom-format.org","type": "OTU table",'
+ '"generated_by": "QIIME 1.6.0-dev","date": '
+ '"2013-02-09T09:30:11.550590","matrix_type": "sparse",'
+ '"matrix_element_type": "int","shape": [14, 9],"data": [[0,0,20],'
+ '[0,1,18],[0,2,18],[0,3,22],[0,4,4],[1,4,1],[2,0,1],[2,4,1],[2,5,1],'
+ '[3,6,1],[4,4,1],[5,7,20],[6,4,1],[7,4,1],[7,5,1],[8,4,1],[8,6,2],'
+ '[8,8,3],[9,7,2],[10,5,1],[11,4,9],[11,5,20],[11,6,1],[11,8,4],'
+ '[12,4,3],[12,6,19],[12,8,15],[13,0,1],[13,1,4],[13,2,4]],"rows": '
+ '[{"id": "295053", "metadata": {"taxonomy": ["k__Bacteria"]}},{"id": '
+ '"42684", "metadata": {"taxonomy": ["k__Bacteria", '
+ '"p__Proteobacteria"]}},{"id": "None11", "metadata": {"taxonomy": '
+ '["Unclassified"]}},{"id": "None10", "metadata": {"taxonomy": '
+ '["Unclassified"]}},{"id": "None7", "metadata": {"taxonomy": '
+ '["Unclassified"]}},{"id": "None6", "metadata": {"taxonomy": '
+ '["Unclassified"]}},{"id": "None5", "metadata": {"taxonomy": '
+ '["k__Bacteria"]}},{"id": "None4", "metadata": {"taxonomy": '
+ '["Unclassified"]}},{"id": "None3", "metadata": {"taxonomy": '
+ '["k__Bacteria"]}},{"id": "None2", "metadata": {"taxonomy": '
+ '["k__Bacteria"]}},{"id": "None1", "metadata": {"taxonomy": '
+ '["Unclassified"]}},{"id": "879972", "metadata": {"taxonomy": '
+ '["k__Bacteria"]}},{"id": "None9", "metadata": {"taxonomy": '
+ '["Unclassified"]}},{"id": "None8", "metadata": {"taxonomy": '
+ '["k__Bacteria"]}}],"columns": [{"id": "f2", "metadata": null},'
+ '{"id": "f1", "metadata": null},{"id": "f3", "metadata": null},'
+ '{"id": "f4", "metadata": null},{"id": "p2", "metadata": null},{"id":'
+ ' "p1", "metadata": null},{"id": "t1", "metadata": null},{"id": '
+ '"not16S.1", "metadata": null},{"id": "t2", "metadata": null}]}')
+
+summary_default = """Num samples: 9
+Num observations: 14
+Total count: 200
+Table density (fraction of non-zero values): 0.238
+
+Counts/sample summary:
+ Min: 22.0
+ Max: 23.0
+ Median: 22.000
+ Mean: 22.222
+ Std. dev.: 0.416
+ Sample Metadata Categories: None provided
+ Observation Metadata Categories: taxonomy
+
+Counts/sample detail:
+p2: 22.0
+f1: 22.0
+f2: 22.0
+f3: 22.0
+f4: 22.0
+t2: 22.0
+not16S.1: 22.0
+t1: 23.0
+p1: 23.0"""
+
+summary_qualitative = """Num samples: 9
+Num observations: 14
+
+Observations/sample summary:
+ Min: 1
+ Max: 9
+ Median: 3.000
+ Mean: 3.333
+ Std. dev.: 2.211
+ Sample Metadata Categories: None provided
+ Observation Metadata Categories: taxonomy
+
+Observations/sample detail:
+f4: 1
+f1: 2
+f3: 2
+not16S.1: 2
+f2: 3
+t2: 3
+t1: 4
+p1: 4
+p2: 9"""
+
+if __name__ == "__main__":
+ main()
diff --git a/tests/test_cli/test_table_converter.py b/tests/test_cli/test_table_converter.py
new file mode 100644
index 0000000..be25e35
--- /dev/null
+++ b/tests/test_cli/test_table_converter.py
@@ -0,0 +1,292 @@
+#!/usr/bin/env python
+
+# -----------------------------------------------------------------------------
+# Copyright (c) 2011-2015, The BIOM Format Development Team.
+#
+# Distributed under the terms of the Modified BSD License.
+#
+# The full license is in the file COPYING.txt, distributed with this software.
+# -----------------------------------------------------------------------------
+
+from os.path import abspath, dirname, join
+import tempfile
+
+import numpy as np
+
+from biom.cli.table_converter import _convert
+from biom.cli.util import write_biom_table
+from biom.parse import MetadataMap, load_table
+from biom.table import Table
+from biom import load_table
+from biom.parse import biom_open, parse_biom_table
+from unittest import TestCase, main
+from io import StringIO
+
+
+class TableConverterTests(TestCase):
+
+ def setUp(self):
+ """Set up data for use in unit tests."""
+ self.cmd = _convert
+ self.output_filepath = tempfile.NamedTemporaryFile().name
+
+ with tempfile.NamedTemporaryFile('w') as fh:
+ fh.write(biom1)
+ fh.flush()
+ self.biom_table1 = load_table(fh.name)
+
+ self.biom_lines1 = biom1.split('\n')
+ with tempfile.NamedTemporaryFile('w') as fh:
+ fh.write(classic1)
+ fh.flush()
+ self.classic_biom1 = load_table(fh.name)
+
+ self.sample_md1 = MetadataMap.from_file(sample_md1.split('\n'))
+
+ test_data_dir = join(dirname(abspath(__file__)), 'test_data')
+ self.json_collapsed_obs = join(test_data_dir,
+ 'json_obs_collapsed.biom')
+ self.json_collapsed_samples = join(test_data_dir,
+ 'json_sample_collapsed.biom')
+
+ def test_classic_to_biom(self):
+ """Correctly converts classic to biom."""
+ self.cmd(table=self.classic_biom1,
+ output_filepath=self.output_filepath,
+ to_json=True, table_type='OTU table')
+
+ obs = load_table(self.output_filepath)
+ self.assertEqual(type(obs), Table)
+ self.assertEqual(len(obs.ids()), 9)
+ self.assertEqual(len(obs.ids(axis='observation')), 14)
+ self.assertEqual(obs.metadata(), None)
+ self.assertNotEqual(obs.metadata(axis='observation'), None)
+
+ def test_classic_to_biom_with_metadata(self):
+ """Correctly converts classic to biom with metadata."""
+ # No processing of metadata.
+ obs = self.cmd(table=self.classic_biom1,
+ output_filepath=self.output_filepath,
+ sample_metadata=self.sample_md1, to_json=True,
+ table_type='OTU table', process_obs_metadata='naive')
+
+ obs = load_table(self.output_filepath)
+ self.assertEqual(type(obs), Table)
+ self.assertEqual(len(obs.ids()), 9)
+ self.assertEqual(len(obs.ids(axis='observation')), 14)
+ self.assertNotEqual(obs.metadata(), None)
+ self.assertNotEqual(obs.metadata(axis='observation'), None)
+ self.assertEqual(obs.metadata()[obs.index(u'p2', u'sample')],
+ {'foo': 'c;b;a'})
+ self.assertEqual(obs.metadata()[obs.index('not16S.1', 'sample')],
+ {'foo': 'b;c;d'})
+ self.assertEqual(obs.metadata(axis='observation')[
+ obs.index('None11', 'observation')],
+ {'taxonomy': 'Unclassified'})
+
+ # With processing of metadata (currently only supports observation md).
+ obs = self.cmd(table=self.classic_biom1,
+ output_filepath=self.output_filepath,
+ sample_metadata=self.sample_md1, table_type='OTU table',
+ process_obs_metadata='sc_separated', to_json=True)
+
+ obs = load_table(self.output_filepath)
+ self.assertEqual(type(obs), Table)
+ self.assertEqual(len(obs.ids()), 9)
+ self.assertEqual(len(obs.ids(axis='observation')), 14)
+ self.assertNotEqual(obs.metadata(), None)
+ self.assertNotEqual(obs.metadata(axis='observation'), None)
+ self.assertEqual(obs.metadata()[obs.index('p2', 'sample')],
+ {'foo': 'c;b;a'})
+ self.assertEqual(obs.metadata()[obs.index('not16S.1', 'sample')],
+ {'foo': 'b;c;d'})
+ self.assertEqual(obs.metadata(axis='observation')[
+ obs.index('None11', 'observation')],
+ {'taxonomy': ['Unclassified']})
+
+ def test_biom_to_classic1(self):
+ """Correctly converts biom to classic."""
+ self.cmd(table=self.biom_table1,
+ output_filepath=self.output_filepath,
+ to_tsv=True, header_key='taxonomy')
+
+ self.assertEqual(load_table(self.output_filepath), self.classic_biom1)
+
+ def test_biom_to_classic2(self):
+ """Correctly converts biom to classic with metadata renaming."""
+ self.cmd(table=self.biom_table1,
+ output_filepath=self.output_filepath, to_tsv=True,
+ header_key='taxonomy', output_metadata_id='foo')
+ obs = load_table(self.output_filepath)
+ self.assertTrue('foo' in obs.metadata(axis='observation')[0])
+
+ def test_json_to_hdf5_collapsed_samples(self):
+ """Correctly converts json to HDF5 changing the sample metadata"""
+ with biom_open(self.json_collapsed_samples) as f:
+ obs = self.cmd(table=parse_biom_table(f),
+ output_filepath=self.output_filepath, to_hdf5=True,
+ collapsed_samples=True)
+ obs = load_table(self.output_filepath)
+ exp = Table(np.array([[0., 1.], [6., 6.], [6., 1.],
+ [1., 4.], [0., 2.]]),
+ observation_ids=[u'GG_OTU_1', u'GG_OTU_2', u'GG_OTU_3',
+ u'GG_OTU_4', u'GG_OTU_5'],
+ sample_ids=[u'skin', u'gut'],
+ observation_metadata=[
+ {u'taxonomy': [u'k__Bacteria', u'p__Proteobacteria',
+ u'c__Gammaproteobacteria',
+ u'o__Enterobacteriales',
+ u'f__Enterobacteriaceae',
+ u'g__Escherichia', u's__']},
+ {u'taxonomy': [u'k__Bacteria', u'p__Cyanobacteria',
+ u'c__Nostocophycideae',
+ u'o__Nostocales', u'f__Nostocaceae',
+ u'g__Dolichospermum', u's__']},
+ {u'taxonomy': [u'k__Archaea', u'p__Euryarchaeota',
+ u'c__Methanomicrobia',
+ u'o__Methanosarcinales',
+ u'f__Methanosarcinaceae',
+ u'g__Methanosarcina', u's__']},
+ {u'taxonomy': [u'k__Bacteria', u'p__Firmicutes',
+ u'c__Clostridia', u'o__Halanaerobiales',
+ u'f__Halanaerobiaceae',
+ u'g__Halanaerobium',
+ u's__Halanaerobiumsaccharolyticum']},
+ {u'taxonomy': [u'k__Bacteria', u'p__Proteobacteria',
+ u'c__Gammaproteobacteria',
+ u'o__Enterobacteriales',
+ u'f__Enterobacteriaceae',
+ u'g__Escherichia', u's__']}],
+ sample_metadata=[
+ {u'collapsed_ids': [u'Sample4', u'Sample5',
+ u'Sample6']},
+ {u'collapsed_ids': [u'Sample1', u'Sample2',
+ u'Sample3']}
+ ],
+ type=u'OTU table')
+ self.assertEqual(obs, exp)
+
+ def test_json_to_hdf5_collapsed_metadata(self):
+ """Correctly converts json to HDF5 changing the observation metadata"""
+ with biom_open(self.json_collapsed_obs) as f:
+ t = parse_biom_table(f)
+ obs = self.cmd(table=t,
+ output_filepath=self.output_filepath, to_hdf5=True,
+ collapsed_observations=True)
+ obs = load_table(self.output_filepath)
+ exp = Table(np.array([[2., 1., 1., 0., 0., 1.],
+ [0., 0., 1., 4., 0., 2.],
+ [5., 1., 0., 2., 3., 1.],
+ [0., 1., 2., 0., 0., 0.]]),
+ observation_ids=[u'p__Firmicutes', u'p__Euryarchaeota',
+ u'p__Cyanobacteria',
+ u'p__Proteobacteria'],
+ sample_ids=[u'Sample1', u'Sample2', u'Sample3',
+ u'Sample4', u'Sample5', u'Sample6'],
+ observation_metadata=[
+ {u'collapsed_ids': [u'GG_OTU_4']},
+ {u'collapsed_ids': [u'GG_OTU_3']},
+ {u'collapsed_ids': [u'GG_OTU_2']},
+ {u'collapsed_ids': [u'GG_OTU_1', u'GG_OTU_5']}],
+ sample_metadata=[
+ {u'LinkerPrimerSequence': u'CATGCTGCCTCCCGTAGGAGT',
+ u'BarcodeSequence': u'CGCTTATCGAGA',
+ u'Description': u'human gut',
+ u'BODY_SITE': u'gut'},
+ {u'LinkerPrimerSequence': u'CATGCTGCCTCCCGTAGGAGT',
+ u'BarcodeSequence': u'CATACCAGTAGC',
+ u'Description': u'human gut',
+ u'BODY_SITE': u'gut'},
+ {u'LinkerPrimerSequence': u'CATGCTGCCTCCCGTAGGAGT',
+ u'BarcodeSequence': u'CTCTCTACCTGT',
+ u'Description': u'human gut',
+ u'BODY_SITE': u'gut'},
+ {u'LinkerPrimerSequence': u'CATGCTGCCTCCCGTAGGAGT',
+ u'BarcodeSequence': u'CTCTCGGCCTGT',
+ u'Description': u'human skin',
+ u'BODY_SITE': u'skin'},
+ {u'LinkerPrimerSequence': u'CATGCTGCCTCCCGTAGGAGT',
+ u'BarcodeSequence': u'CTCTCTACCAAT',
+ u'Description': u'human skin',
+ u'BODY_SITE': u'skin'},
+ {u'LinkerPrimerSequence': u'CATGCTGCCTCCCGTAGGAGT',
+ u'BarcodeSequence': u'CTAACTACCAAT',
+ u'Description': u'human skin',
+ u'BODY_SITE': u'skin'}],
+ type=u'OTU table')
+
+ self.assertEqual(obs, exp)
+
+
+biom1 = """
+{"id": "None",
+ "format": "Biological Observation Matrix 1.0.0",
+ "format_url": "http://biom-format.org",
+ "type": "OTU table",
+ "generated_by": "QIIME 1.6.0-dev",
+ "date": "2013-02-09T09:30:11.550590",
+ "matrix_type": "sparse",
+ "matrix_element_type": "float",
+ "shape": [14, 9],
+ "data": [[0,0,20],[0,1,18],[0,2,18],[0,3,22],[0,4,4],[1,4,1],[2,0,1],
+ [2,4,1],[2,5,1],[3,6,1],[4,4,1],[5,7,20],[6,4,1],[7,4,1],[7,5,1],
+ [8,4,1],[8,6,2],[8,8,3],[9,7,2],[10,5,1],[11,4,9],[11,5,20],
+ [11,6,1],[11,8,4],[12,4,3],[12,6,19],[12,8,15],[13,0,1],[13,1,4],
+ [13,2,4]],
+ "rows": [{"id": "295053",
+ "metadata": {"taxonomy": ["k__Bacteria"]}},
+ {"id": "42684", "metadata": {"taxonomy": ["k__Bacteria",
+ "p__Proteobacteria"]}},
+ {"id": "None11", "metadata": {"taxonomy": ["Unclassified"]}},
+ {"id": "None10", "metadata": {"taxonomy": ["Unclassified"]}},
+ {"id": "None7", "metadata": {"taxonomy": ["Unclassified"]}},
+ {"id": "None6", "metadata": {"taxonomy": ["Unclassified"]}},
+ {"id": "None5", "metadata": {"taxonomy": ["k__Bacteria"]}},
+ {"id": "None4", "metadata": {"taxonomy": ["Unclassified"]}},
+ {"id": "None3", "metadata": {"taxonomy": ["k__Bacteria"]}},
+ {"id": "None2", "metadata": {"taxonomy": ["k__Bacteria"]}},
+ {"id": "None1", "metadata": {"taxonomy": ["Unclassified"]}},
+ {"id": "879972", "metadata": {"taxonomy": ["k__Bacteria"]}},
+ {"id": "None9", "metadata": {"taxonomy": ["Unclassified"]}},
+ {"id": "None8", "metadata": {"taxonomy": ["k__Bacteria"]}}],
+ "columns": [{"id": "f2", "metadata": null},
+ {"id": "f1", "metadata": null},{"id": "f3", "metadata": null},
+ {"id": "f4", "metadata": null},{"id": "p2", "metadata": null},
+ {"id": "p1", "metadata": null},{"id": "t1", "metadata": null},
+ {"id": "not16S.1", "metadata": null},
+ {"id": "t2", "metadata": null}]
+ }"""
+
+classic1 = """# Constructed from biom file
+#OTU ID\tf2\tf1\tf3\tf4\tp2\tp1\tt1\tnot16S.1\tt2\ttaxonomy
+295053\t20.0\t18.0\t18.0\t22.0\t4.0\t0.0\t0.0\t0.0\t0.0\tk__Bacteria
+42684\t0.0\t0.0\t0.0\t0.0\t1.0\t0.0\t0.0\t0.0\t0.0\tk__Bacteria; """ + \
+ """p__Proteobacteria
+None11\t1.0\t0.0\t0.0\t0.0\t1.0\t1.0\t0.0\t0.0\t0.0\tUnclassified
+None10\t0.0\t0.0\t0.0\t0.0\t0.0\t0.0\t1.0\t0.0\t0.0\tUnclassified
+None7\t0.0\t0.0\t0.0\t0.0\t1.0\t0.0\t0.0\t0.0\t0.0\tUnclassified
+None6\t0.0\t0.0\t0.0\t0.0\t0.0\t0.0\t0.0\t20.0\t0.0\tUnclassified
+None5\t0.0\t0.0\t0.0\t0.0\t1.0\t0.0\t0.0\t0.0\t0.0\tk__Bacteria
+None4\t0.0\t0.0\t0.0\t0.0\t1.0\t1.0\t0.0\t0.0\t0.0\tUnclassified
+None3\t0.0\t0.0\t0.0\t0.0\t1.0\t0.0\t2.0\t0.0\t3.0\tk__Bacteria
+None2\t0.0\t0.0\t0.0\t0.0\t0.0\t0.0\t0.0\t2.0\t0.0\tk__Bacteria
+None1\t0.0\t0.0\t0.0\t0.0\t0.0\t1.0\t0.0\t0.0\t0.0\tUnclassified
+879972\t0.0\t0.0\t0.0\t0.0\t9.0\t20.0\t1.0\t0.0\t4.0\tk__Bacteria
+None9\t0.0\t0.0\t0.0\t0.0\t3.0\t0.0\t19.0\t0.0\t15.0\tUnclassified
+None8\t1.0\t4.0\t4.0\t0.0\t0.0\t0.0\t0.0\t0.0\t0.0\tk__Bacteria"""
+
+sample_md1 = """#SampleID\tfoo
+f4\ta;b;c
+not16S.1\tb;c;d
+f2\ta;c;d
+f1\ta;b;c
+p2\tc;b;a
+f3\ta;b;c
+t1\tt;b;c
+p1\tp;b;c
+t2\tt;2;z
+"""
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tests/test_cli/test_table_normalizer.py b/tests/test_cli/test_table_normalizer.py
new file mode 100755
index 0000000..8e4feaf
--- /dev/null
+++ b/tests/test_cli/test_table_normalizer.py
@@ -0,0 +1,50 @@
+#!/usr/bin/env python
+
+# -----------------------------------------------------------------------------
+# Copyright (c) 2011-2013, The BIOM Format Development Team.
+#
+# Distributed under the terms of the Modified BSD License.
+#
+# The full license is in the file COPYING.txt, distributed with this software.
+# -----------------------------------------------------------------------------
+
+from unittest import TestCase, main
+
+import os
+
+import biom
+from biom.cli.table_normalizer import _normalize_table
+from biom.parse import parse_biom_table
+from biom.util import HAVE_H5PY
+from biom.exception import UnknownAxisError
+
+
+class TableNormalizerTests(TestCase):
+
+ def setUp(self):
+ """initialize objects for use in tests"""
+ self.cmd = _normalize_table
+
+ cwd = os.getcwd()
+ if '/' in __file__:
+ os.chdir(__file__.rsplit('/', 1)[0])
+ self.table = biom.load_table('test_data/test.json')
+ os.chdir(cwd)
+
+ def test_bad_inputs(self):
+ # relative_abund and pa
+ with self.assertRaises(ValueError):
+ self.cmd(self.table, relative_abund=True,
+ presence_absence=True, axis="sample")
+ # no normalization type
+ with self.assertRaises(ValueError):
+ self.cmd(self.table, relative_abund=False,
+ presence_absence=False, axis="sample")
+ # bad axis
+ with self.assertRaises(UnknownAxisError):
+ self.cmd(self.table, relative_abund=True,
+ axis="nonsense")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tests/test_cli/test_uc_processor.py b/tests/test_cli/test_uc_processor.py
new file mode 100644
index 0000000..6c4493d
--- /dev/null
+++ b/tests/test_cli/test_uc_processor.py
@@ -0,0 +1,109 @@
+#!/usr/bin/env python
+
+# -----------------------------------------------------------------------------
+# Copyright (c) 2011-2015, The BIOM Format Development Team.
+#
+# Distributed under the terms of the Modified BSD License.
+#
+# The full license is in the file COPYING.txt, distributed with this software.
+# -----------------------------------------------------------------------------
+
+import tempfile
+from unittest import TestCase, main
+
+import numpy as np
+
+import biom
+from biom.cli.uc_processor import _from_uc
+
+class TestUcProcessor(TestCase):
+
+ def setUp(self):
+ """Set up data for use in unit tests."""
+ self.cmd = _from_uc
+ self.uc_minimal = uc_minimal.split('\n')
+ self.uc = uc.split('\n')
+ self.rep_set = rep_set.split('\n')
+ self.rep_set_no_mapping = rep_set_no_mapping.split('\n')
+ self.rep_set_missing_id = rep_set_missing_id.split('\n')
+
+ def test_basic(self):
+ obs = self.cmd(self.uc_minimal)
+ expected = biom.Table(np.array([[1.0]]),
+ observation_ids=['f2_1539'],
+ sample_ids=['f2'])
+ self.assertEqual(obs, expected)
+
+ def test_basic_w_mapping(self):
+ obs = self.cmd(self.uc_minimal, self.rep_set)
+ expected = biom.Table(np.array([[1.0]]),
+ observation_ids=['otu1'],
+ sample_ids=['f2'])
+ self.assertEqual(obs, expected)
+
+ def test_rep_set_no_mapping(self):
+ self.assertRaises(ValueError, self.cmd, self.uc_minimal,
+ self.rep_set_no_mapping)
+
+ def test_rep_set_missing_id(self):
+ self.assertRaises(ValueError, self.cmd, self.uc_minimal,
+ self.rep_set_missing_id)
+
+ def test_uc(self):
+ obs = self.cmd(self.uc)
+ expected = biom.Table(np.array([[1.0, 1.0], [0.0, 1.0]]),
+ observation_ids=['f2_1539', 'f3_1540'],
+ sample_ids=['f2', 'f3'])
+ self.assertEqual(obs, expected)
+
+ def test_uc_w_mapping(self):
+ obs = self.cmd(self.uc, self.rep_set)
+ expected = biom.Table(np.array([[1.0, 1.0], [0.0, 1.0]]),
+ observation_ids=['otu1', 'otu2'],
+ sample_ids=['f2', 'f3'])
+ self.assertEqual(obs, expected)
+
+uc_minimal = """# uclust --input /var/folders/xq/0kh93ng53bs6zzk091w_bbsr0000gn/T/UclustExactMatchFilterrW47Ju.fasta --id 0.97 --tmpdir /var/folders/xq/0kh93ng53bs6zzk091w_bbsr0000gn/T --w 8 --stepwords 8 --usersort --maxaccepts 1 --stable_sort --maxrejects 8 --uc dn-otus/uclust_picked_otus/seqs_clusters.uc
+# version=1.2.22
+# Tab-separated fields:
+# 1=Type, 2=ClusterNr, 3=SeqLength or ClusterSize, 4=PctId, 5=Strand, 6=QueryStart, 7=SeedStart, 8=Alignment, 9=QueryLabel, 10=TargetLabel
+# Record types (field 1): L=LibSeed, S=NewSeed, H=Hit, R=Reject, D=LibCluster, C=NewCluster, N=NoHit
+# For C and D types, PctId is average id with seed.
+# QueryStart and SeedStart are zero-based relative to start of sequence.
+# If minus strand, SeedStart is relative to reverse-complemented seed.
+S 0 133 * * * * * f2_1539 *
+"""
+
+uc = """# uclust --input /var/folders/xq/0kh93ng53bs6zzk091w_bbsr0000gn/T/UclustExactMatchFilterrW47Ju.fasta --id 0.97 --tmpdir /var/folders/xq/0kh93ng53bs6zzk091w_bbsr0000gn/T --w 8 --stepwords 8 --usersort --maxaccepts 1 --stable_sort --maxrejects 8 --uc dn-otus/uclust_picked_otus/seqs_clusters.uc
+# version=1.2.22
+# Tab-separated fields:
+# 1=Type, 2=ClusterNr, 3=SeqLength or ClusterSize, 4=PctId, 5=Strand, 6=QueryStart, 7=SeedStart, 8=Alignment, 9=QueryLabel, 10=TargetLabel
+# Record types (field 1): L=LibSeed, S=NewSeed, H=Hit, R=Reject, D=LibCluster, C=NewCluster, N=NoHit
+# For C and D types, PctId is average id with seed.
+# QueryStart and SeedStart are zero-based relative to start of sequence.
+# If minus strand, SeedStart is relative to reverse-complemented seed.
+S 0 133 * * * * * f2_1539 *
+S 0 133 * * * * * f3_1540 *
+H 0 141 100.0 + 0 0 133M8D f3_42 f2_1539
+"""
+
+rep_set = """>otu1 f2_1539
+ACGT
+>otu2 f3_1540
+ACCT
+"""
+
+rep_set_no_mapping = """>otu1
+ACGT
+>otu2
+ACCT
+"""
+
+rep_set_missing_id = """>otu1 f99_1539
+ACGT
+>otu2 f99_1539
+ACCT
+"""
+
+if __name__ == '__main__':
+ main()
diff --git a/tests/test_cli/test_validate_table.py b/tests/test_cli/test_validate_table.py
new file mode 100644
index 0000000..ab3df4f
--- /dev/null
+++ b/tests/test_cli/test_validate_table.py
@@ -0,0 +1,718 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# -----------------------------------------------------------------------------
+# Copyright (c) 2011-2013, The BIOM Format Development Team.
+#
+# Distributed under the terms of the Modified BSD License.
+#
+# The full license is in the file COPYING.txt, distributed with this software.
+# -----------------------------------------------------------------------------
+
+__author__ = "Jai Ram Rideout"
+__copyright__ = "Copyright 2011-2013, The BIOM Format Development Team"
+__credits__ = ["Jai Ram Rideout", "Daniel McDonald",
+ "Jorge Cañardo Alastuey"]
+__license__ = "BSD"
+__url__ = "http://biom-format.org"
+__maintainer__ = "Jai Ram Rideout"
+__email__ = "jai.rideout at gmail.com"
+
+import os
+import json
+from unittest import TestCase, main
+from shutil import copy
+
+import numpy.testing as npt
+
+from biom.cli.table_validator import TableValidator
+from biom.util import HAVE_H5PY
+
+
+if HAVE_H5PY:
+ import h5py
+
+
+class TableValidatorTests(TestCase):
+
+ def setUp(self):
+ """Set up data for use in unit tests."""
+ self.cmd = TableValidator()
+ self.min_sparse_otu = json.loads(min_sparse_otu)
+ self.rich_sparse_otu = json.loads(rich_sparse_otu)
+ self.rich_dense_otu = json.loads(rich_dense_otu)
+ self.min_dense_otu = json.loads(min_dense_otu)
+ self.to_remove = []
+
+ cur_path = os.path.split(os.path.abspath(__file__))[0]
+ examples_path = os.path.join(cur_path.rsplit('/', 2)[0], 'examples')
+ self.hdf5_file_valid = os.path.join(examples_path,
+ 'min_sparse_otu_table_hdf5.biom')
+ self.hdf5_file_valid_md = os.path.join(examples_path,
+ ('rich_sparse_otu_table_hdf5'
+ '.biom'))
+
+ def tearDown(self):
+ for f in self.to_remove:
+ os.remove(f)
+
+ @npt.dec.skipif(HAVE_H5PY == False, msg='H5PY is not installed')
+ def test_valid_hdf5_metadata_v210(self):
+ exp = {'valid_table': True, 'report_lines': []}
+ obs = self.cmd(table=self.hdf5_file_valid,
+ format_version='2.1')
+ self.assertEqual(obs, exp)
+ obs = self.cmd(table=self.hdf5_file_valid_md,
+ format_version='2.1')
+ self.assertEqual(obs, exp)
+
+ @npt.dec.skipif(HAVE_H5PY == False, msg='H5PY is not installed')
+ def test_valid_hdf5_metadata_v200(self):
+ pass # omitting, not a direct way to test at this time using the repo
+
+ @npt.dec.skipif(HAVE_H5PY == False, msg='H5PY is not installed')
+ def test_valid_hdf5(self):
+ """Test a valid HDF5 table"""
+ exp = {'valid_table': True,
+ 'report_lines': []}
+
+ obs = self.cmd(table=self.hdf5_file_valid)
+ self.assertEqual(obs, exp)
+
+ @npt.dec.skipif(HAVE_H5PY == False, msg='H5PY is not installed')
+ def test_invalid_hdf5(self):
+ """Test an invalid HDF5 table"""
+ exp = {'valid_table': False,
+ 'report_lines': ["Missing attribute: 'creation-date'"]}
+
+ copy(self.hdf5_file_valid, 'invalid.hdf5')
+ self.to_remove.append('invalid.hdf5')
+
+ f = h5py.File('invalid.hdf5', 'a')
+ del f.attrs['creation-date']
+
+ f.close()
+ obs = self.cmd(table='invalid.hdf5')
+ self.assertEqual(obs, exp)
+
+ def test_valid(self):
+ """Correctly validates a table that is indeed... valid."""
+ exp = {'valid_table': True, 'report_lines': []}
+
+ f = open('valid_test1', 'w')
+ f.write(json.dumps(self.min_sparse_otu))
+ f.close()
+ self.to_remove.append('valid_test1')
+
+ obs = self.cmd(table='valid_test1')
+ self.assertEqual(obs, exp)
+
+ f = open('valid_test2', 'w')
+ f.write(json.dumps(self.rich_sparse_otu))
+ f.close()
+ self.to_remove.append('valid_test2')
+
+ obs = self.cmd(table='valid_test2')
+ self.assertEqual(obs, exp)
+
+ # Soldier, report!!
+ f = open('valid_test3', 'w')
+ f.write(json.dumps(self.rich_sparse_otu))
+ f.close()
+ self.to_remove.append('valid_test3')
+
+ obs = self.cmd(table='valid_test3', detailed_report=True)
+ self.assertTrue(obs['valid_table'])
+ self.assertTrue(len(obs['report_lines']) > 0)
+
+ def test_invalid(self):
+ """Correctly invalidates a table that is... invalid."""
+ del self.min_sparse_otu['date']
+ exp = {'valid_table': False, 'report_lines': ["Missing field: 'date'"]}
+
+ f = open('invalid_test1', 'w')
+ f.write(json.dumps(self.min_sparse_otu))
+ f.close()
+ self.to_remove.append('invalid_test1')
+
+ obs = self.cmd(table='invalid_test1')
+ self.assertEqual(obs, exp)
+
+ self.rich_dense_otu['shape'][1] = 42
+ exp = {'valid_table': False,
+ 'report_lines': ['Incorrect number of cols: [0, 0, 1, 0, 0, 0]',
+ "Number of columns in 'columns' is not equal "
+ "to 'shape'"]}
+
+ f = open('invalid_test2', 'w')
+ f.write(json.dumps(self.rich_dense_otu))
+ f.close()
+ self.to_remove.append('invalid_test2')
+
+ obs = self.cmd(table='invalid_test2')
+ self.assertEqual(obs, exp)
+
+ def test_valid_format_url(self):
+ """validates format url"""
+ table = self.min_sparse_otu
+
+ obs = self.cmd._valid_format_url(table)
+ self.assertTrue(len(obs) == 0)
+
+ table['format_url'] = 'foo'
+ obs = self.cmd._valid_format_url(table)
+ self.assertTrue(len(obs) > 0)
+
+ def test_valid_format(self):
+ """Should match format string"""
+ table = self.min_sparse_otu
+
+ self.cmd._format_version = '1.0.0'
+ obs = self.cmd._valid_format(table)
+ self.assertTrue(len(obs) == 0)
+
+ table['format'] = 'foo'
+ obs = self.cmd._valid_format(table)
+ self.assertTrue(len(obs) > 0)
+
+ def test_valid_type(self):
+ """Should be valid table type"""
+ table = self.min_sparse_otu
+
+ table['type'] = 'otu table' # should not be case sensitive
+ obs = self.cmd._valid_type(table)
+ self.assertTrue(len(obs) == 0)
+
+ table['type'] = 'Pathway table'
+ obs = self.cmd._valid_type(table)
+ self.assertTrue(len(obs) == 0)
+
+ table['type'] = 'Function table'
+ obs = self.cmd._valid_type(table)
+ self.assertTrue(len(obs) == 0)
+
+ table['type'] = 'Ortholog table'
+ obs = self.cmd._valid_type(table)
+ self.assertTrue(len(obs) == 0)
+
+ table['type'] = 'Gene table'
+ obs = self.cmd._valid_type(table)
+ self.assertTrue(len(obs) == 0)
+
+ table['type'] = 'Metabolite table'
+ obs = self.cmd._valid_type(table)
+ self.assertTrue(len(obs) == 0)
+
+ table['type'] = 'OTU table'
+ obs = self.cmd._valid_type(table)
+ self.assertTrue(len(obs) == 0)
+
+ table['type'] = 'Taxon table'
+ obs = self.cmd._valid_type(table)
+ self.assertTrue(len(obs) == 0)
+
+ table['type'] = 'foo'
+ obs = self.cmd._valid_type(table)
+ self.assertTrue(len(obs) > 0)
+
+ def test_valid_generated_by(self):
+ """Should have some string for generated by"""
+ table = self.min_sparse_otu
+ obs = self.cmd._valid_generated_by(table)
+ self.assertTrue(len(obs) == 0)
+
+ table['generated_by'] = None
+ obs = self.cmd._valid_generated_by(table)
+ self.assertTrue(len(obs) > 0)
+
+ def test_valid_nullable_id(self):
+ """Should just work."""
+ pass
+
+ def test_valid_metadata(self):
+ """Can be nullable or an object"""
+ table = self.min_sparse_otu
+
+ table['rows'][2]['metadata'] = None
+ obs = self.cmd._valid_metadata(table['rows'][2])
+ self.assertTrue(len(obs) == 0)
+
+ table['rows'][2]['metadata'] = {10: 20}
+ obs = self.cmd._valid_metadata(table['rows'][2])
+ self.assertTrue(len(obs) == 0)
+
+ table['rows'][2]['metadata'] = ""
+ obs = self.cmd._valid_metadata(table['rows'][2])
+ self.assertTrue(len(obs) > 0)
+
+ table['rows'][2]['metadata'] = "asdasda"
+ obs = self.cmd._valid_metadata(table['rows'][2])
+ self.assertTrue(len(obs) > 0)
+
+ table['rows'][2]['metadata'] = [{'a': 'b'}, {'c': 'd'}]
+ obs = self.cmd._valid_metadata(table['rows'][2])
+ self.assertTrue(len(obs) > 0)
+
+ def test_valid_matrix_type(self):
+ """Make sure we have a valid matrix type"""
+ obs = self.cmd._valid_matrix_type(self.min_dense_otu)
+ self.assertTrue(len(obs) == 0)
+
+ obs = self.cmd._valid_matrix_type(self.min_sparse_otu)
+ self.assertTrue(len(obs) == 0)
+
+ table = self.min_dense_otu
+
+ table['matrix_type'] = 'spARSe'
+ obs = self.cmd._valid_matrix_type(table)
+ self.assertTrue(len(obs) > 0)
+
+ table['matrix_type'] = 'sparse_asdasd'
+ obs = self.cmd._valid_matrix_type(table)
+ self.assertTrue(len(obs) > 0)
+
+ def test_valid_matrix_element_type(self):
+ """Make sure we have a valid matrix type"""
+ table = self.min_sparse_otu
+
+ obs = self.cmd._valid_matrix_element_type(table)
+ self.assertTrue(len(obs) == 0)
+
+ table['matrix_element_type'] = u'int'
+ obs = self.cmd._valid_matrix_element_type(table)
+ self.assertTrue(len(obs) == 0)
+
+ table['matrix_element_type'] = 'float'
+ obs = self.cmd._valid_matrix_element_type(table)
+ self.assertTrue(len(obs) == 0)
+
+ table['matrix_element_type'] = u'float'
+ obs = self.cmd._valid_matrix_element_type(table)
+ self.assertTrue(len(obs) == 0)
+
+ table['matrix_element_type'] = 'str'
+ obs = self.cmd._valid_matrix_element_type(table)
+ self.assertTrue(len(obs) == 0)
+
+ table['matrix_element_type'] = u'str'
+ obs = self.cmd._valid_matrix_element_type(table)
+ self.assertTrue(len(obs) == 0)
+
+ table['matrix_element_type'] = 'obj'
+ obs = self.cmd._valid_matrix_element_type(table)
+ self.assertTrue(len(obs) > 0)
+
+ table['matrix_element_type'] = u'asd'
+ obs = self.cmd._valid_matrix_element_type(table)
+ self.assertTrue(len(obs) > 0)
+
+ def test_valid_datetime(self):
+ """Make sure we have a datetime stamp"""
+ table = self.min_sparse_otu
+
+ obs = self.cmd._valid_datetime(table)
+ self.assertTrue(len(obs) == 0)
+
+ table['date'] = "1999-11-11T10:11:12"
+ obs = self.cmd._valid_datetime(table)
+ self.assertTrue(len(obs) == 0)
+
+ def test_valid_sparse_data(self):
+ """Takes a sparse matrix field and validates"""
+ table = self.min_sparse_otu
+
+ obs = self.cmd._valid_sparse_data(table)
+ self.assertTrue(len(obs) == 0)
+
+ # incorrect type
+ table['matrix_element_type'] = 'float'
+ obs = self.cmd._valid_sparse_data(table)
+ self.assertTrue(len(obs) > 0)
+
+ # not balanced
+ table['matrix_element_type'] = 'int'
+ table['data'][5] = [0, 10]
+ obs = self.cmd._valid_sparse_data(table)
+ self.assertTrue(len(obs) > 0)
+
+ # odd type for index
+ table['data'][5] = [1.2, 5, 10]
+ obs = self.cmd._valid_sparse_data(table)
+ self.assertTrue(len(obs) > 0)
+
+ def test_valid_dense_data(self):
+ """Takes a dense matrix field and validates"""
+ table = self.min_dense_otu
+
+ obs = self.cmd._valid_dense_data(table)
+ self.assertTrue(len(obs) == 0)
+
+ # incorrect type
+ table['matrix_element_type'] = 'float'
+ obs = self.cmd._valid_dense_data(table)
+ self.assertTrue(len(obs) > 0)
+
+ # not balanced
+ table['matrix_element_type'] = 'int'
+ table['data'][1] = [0, 10]
+ obs = self.cmd._valid_dense_data(table)
+ self.assertTrue(len(obs) > 0)
+
+ # bad type in a field
+ table['data'][1] = [5, 1, 0, 2.3, 3, 1]
+ obs = self.cmd._valid_dense_data(table)
+ self.assertTrue(len(obs) > 0)
+
+ def test_valid_shape(self):
+ """validates shape information"""
+ obs = self.cmd._valid_shape(self.min_sparse_otu)
+ self.assertTrue(len(obs) == 0)
+
+ obs = self.cmd._valid_shape(self.rich_sparse_otu)
+ self.assertTrue(len(obs) == 0)
+
+ bad_shape = self.min_sparse_otu.copy()
+ bad_shape['shape'] = ['asd', 10]
+ obs = self.cmd._valid_shape(bad_shape)
+ self.assertTrue(len(obs) > 0)
+
+ def test_valid_rows(self):
+ """validates rows: field"""
+ table = self.rich_dense_otu
+
+ obs = self.cmd._valid_rows(table)
+ self.assertTrue(len(obs) == 0)
+
+ table['rows'][0]['id'] = ""
+ obs = self.cmd._valid_rows(table)
+ self.assertTrue(len(obs) > 0)
+
+ table['rows'][0]['id'] = None
+ obs = self.cmd._valid_rows(table)
+ self.assertTrue(len(obs) > 0)
+
+ del table['rows'][0]['id']
+ obs = self.cmd._valid_rows(table)
+ self.assertTrue(len(obs) > 0)
+
+ table['rows'][0]['id'] = 'asd'
+ table['rows'][0]['metadata'] = None
+ obs = self.cmd._valid_rows(table)
+ self.assertTrue(len(obs) == 0)
+
+ # since this is an OTU table, metadata is a required key
+ del table['rows'][0]['metadata']
+ obs = self.cmd._valid_rows(table)
+ self.assertTrue(len(obs) > 0)
+
+ def test_valid_columns(self):
+ """validates table:columns: fields"""
+ table = self.rich_dense_otu
+
+ obs = self.cmd._valid_columns(table)
+ self.assertTrue(len(obs) == 0)
+
+ table['columns'][0]['id'] = ""
+ obs = self.cmd._valid_columns(table)
+ self.assertTrue(len(obs) > 0)
+
+ table['columns'][0]['id'] = None
+ obs = self.cmd._valid_columns(table)
+ self.assertTrue(len(obs) > 0)
+
+ del table['columns'][0]['id']
+ obs = self.cmd._valid_columns(table)
+ self.assertTrue(len(obs) > 0)
+
+ table['columns'][0]['id'] = 'asd'
+ table['columns'][0]['metadata'] = None
+ obs = self.cmd._valid_columns(table)
+ self.assertTrue(len(obs) == 0)
+
+ # since this is an OTU table, metadata is a required key
+ del table['columns'][0]['metadata']
+ obs = self.cmd._valid_columns(table)
+ self.assertTrue(len(obs) > 0)
+
+ def test_valid_data(self):
+ """validates data: fields"""
+ # the burden of validating data is passed on to valid_sparse_data
+ # and valid_dense_data
+ table = self.rich_sparse_otu
+
+ obs = self.cmd._valid_data(table)
+ self.assertTrue(len(obs) == 0)
+
+ table['matrix_type'] = 'foo'
+ obs = self.cmd._valid_data(table)
+ self.assertTrue(len(obs) > 0)
+
+
+rich_sparse_otu = """{
+"id":null,
+"format": "1.0.0",
+"format_url": "http://biom-format.org",
+"type": "OTU table",
+"generated_by": "QIIME revision XYZ",
+"date": "2011-12-19T19:00:00",
+"rows":[{"id":"GG_OTU_1",
+ "metadata":{"taxonomy":["k__Bacteria",
+ "p__Proteobacteria",
+ "c__Gammaproteobacteria",
+ "o__Enterobacteriales",
+ "f__Enterobacteriaceae",
+ "g__Escherichia",
+ "s__"]}},
+ {"id":"GG_OTU_2",
+ "metadata":{"taxonomy":["k__Bacteria",
+ "p__Cyanobacteria",
+ "c__Nostocophycideae",
+ "o__Nostocales",
+ "f__Nostocaceae",
+ "g__Dolichospermum",
+ "s__"]}},
+ {"id":"GG_OTU_3",
+ "metadata":{"taxonomy":["k__Archaea",
+ "p__Euryarchaeota",
+ "c__Methanomicrobia",
+ "o__Methanosarcinales",
+ "f__Methanosarcinaceae",
+ "g__Methanosarcina",
+ "s__"]}},
+ {"id":"GG_OTU_4",
+ "metadata":{"taxonomy":["k__Bacteria",
+ "p__Firmicutes",
+ "c__Clostridia",
+ "o__Halanaerobiales",
+ "f__Halanaerobiaceae",
+ "g__Halanaerobium",
+ "s__Halanaerobiumsaccharolyticum"]}},
+ {"id":"GG_OTU_5",
+ "metadata":{"taxonomy":["k__Bacteria",
+ "p__Proteobacteria",
+ "c__Gammaproteobacteria",
+ "o__Enterobacteriales",
+ "f__Enterobacteriaceae",
+ "g__Escherichia",
+ "s__"]}}
+ ],
+ "columns":[
+ {"id":"Sample1", "metadata":{
+ "BarcodeSequence":"CGCTTATCGAGA",
+ "LinkerPrimerSequence":"CATGCTGCCTCCCGTAGGAGT",
+ "BODY_SITE":"gut",
+ "Description":"human gut"}},
+ {"id":"Sample2", "metadata":{
+ "BarcodeSequence":"CATACCAGTAGC",
+ "LinkerPrimerSequence":"CATGCTGCCTCCCGTAGGAGT",
+ "BODY_SITE":"gut",
+ "Description":"human gut"}},
+ {"id":"Sample3", "metadata":{
+ "BarcodeSequence":"CTCTCTACCTGT",
+ "LinkerPrimerSequence":"CATGCTGCCTCCCGTAGGAGT",
+ "BODY_SITE":"gut",
+ "Description":"human gut"}},
+ {"id":"Sample4", "metadata":{
+ "BarcodeSequence":"CTCTCGGCCTGT",
+ "LinkerPrimerSequence":"CATGCTGCCTCCCGTAGGAGT",
+ "BODY_SITE":"skin",
+ "Description":"human skin"}},
+ {"id":"Sample5", "metadata":{
+ "BarcodeSequence":"CTCTCTACCAAT",
+ "LinkerPrimerSequence":"CATGCTGCCTCCCGTAGGAGT",
+ "BODY_SITE":"skin",
+ "Description":"human skin"}},
+ {"id":"Sample6", "metadata":{
+ "BarcodeSequence":"CTAACTACCAAT",
+ "LinkerPrimerSequence":"CATGCTGCCTCCCGTAGGAGT",
+ "BODY_SITE":"skin",
+ "Description":"human skin"}}
+ ],
+ "matrix_type": "sparse",
+ "matrix_element_type": "int",
+ "shape": [5, 6],
+ "data":[[0,2,1],
+ [1,0,5],
+ [1,1,1],
+ [1,3,2],
+ [1,4,3],
+ [1,5,1],
+ [2,2,1],
+ [2,3,4],
+ [2,5,2],
+ [3,0,2],
+ [3,1,1],
+ [3,2,1],
+ [3,5,1],
+ [4,1,1],
+ [4,2,1]
+ ]
+ }"""
+
+min_sparse_otu = """{
+ "id":null,
+ "format": "1.0.0",
+ "format_url": "http://biom-format.org",
+ "type": "OTU table",
+ "generated_by": "QIIME revision XYZ",
+ "date": "2011-12-19T19:00:00",
+ "rows":[
+ {"id":"GG_OTU_1", "metadata":null},
+ {"id":"GG_OTU_2", "metadata":null},
+ {"id":"GG_OTU_3", "metadata":null},
+ {"id":"GG_OTU_4", "metadata":null},
+ {"id":"GG_OTU_5", "metadata":null}
+ ],
+ "columns": [
+ {"id":"Sample1", "metadata":null},
+ {"id":"Sample2", "metadata":null},
+ {"id":"Sample3", "metadata":null},
+ {"id":"Sample4", "metadata":null},
+ {"id":"Sample5", "metadata":null},
+ {"id":"Sample6", "metadata":null}
+ ],
+ "matrix_type": "sparse",
+ "matrix_element_type": "int",
+ "shape": [5, 6],
+ "data":[[0,2,1],
+ [1,0,5],
+ [1,1,1],
+ [1,3,2],
+ [1,4,3],
+ [1,5,1],
+ [2,2,1],
+ [2,3,4],
+ [2,5,2],
+ [3,0,2],
+ [3,1,1],
+ [3,2,1],
+ [3,5,1],
+ [4,1,1],
+ [4,2,1]
+ ]
+ }"""
+
+rich_dense_otu = """{
+ "id":null,
+ "format": "1.0.0",
+ "format_url": "http://biom-format.org",
+ "type": "OTU table",
+ "generated_by": "QIIME revision XYZ",
+ "date": "2011-12-19T19:00:00",
+ "rows":[{"id":"GG_OTU_1",
+ "metadata":{"taxonomy":["k__Bacteria",
+ "p__Proteobacteria",
+ "c__Gammaproteobacteria",
+ "o__Enterobacteriales",
+ "f__Enterobacteriaceae",
+ "g__Escherichia",
+ "s__"]}},
+ {"id":"GG_OTU_2",
+ "metadata":{"taxonomy":["k__Bacteria",
+ "p__Cyanobacteria",
+ "c__Nostocophycideae",
+ "o__Nostocales",
+ "f__Nostocaceae",
+ "g__Dolichospermum",
+ "s__"]}},
+ {"id":"GG_OTU_3",
+ "metadata":{"taxonomy":["k__Archaea",
+ "p__Euryarchaeota",
+ "c__Methanomicrobia",
+ "o__Methanosarcinales",
+ "f__Methanosarcinaceae",
+ "g__Methanosarcina",
+ "s__"]}},
+ {"id":"GG_OTU_4",
+ "metadata":{"taxonomy":["k__Bacteria",
+ "p__Firmicutes",
+ "c__Clostridia",
+ "o__Halanaerobiales",
+ "f__Halanaerobiaceae",
+ "g__Halanaerobium",
+ "s__Halanaerobiumsaccharolyticum"]}},
+ {"id":"GG_OTU_5",
+ "metadata":{"taxonomy":["k__Bacteria",
+ "p__Proteobacteria",
+ "c__Gammaproteobacteria",
+ "o__Enterobacteriales",
+ "f__Enterobacteriaceae",
+ "g__Escherichia",
+ "s__"]}}
+ ],
+ "columns":[
+ {"id":"Sample1", "metadata":{
+ "BarcodeSequence":"CGCTTATCGAGA",
+ "LinkerPrimerSequence":"CATGCTGCCTCCCGTAGGAGT",
+ "BODY_SITE":"gut",
+ "Description":"human gut"}},
+ {"id":"Sample2", "metadata":{
+ "BarcodeSequence":"CATACCAGTAGC",
+ "LinkerPrimerSequence":"CATGCTGCCTCCCGTAGGAGT",
+ "BODY_SITE":"gut",
+ "Description":"human gut"}},
+ {"id":"Sample3", "metadata":{
+ "BarcodeSequence":"CTCTCTACCTGT",
+ "LinkerPrimerSequence":"CATGCTGCCTCCCGTAGGAGT",
+ "BODY_SITE":"gut",
+ "Description":"human gut"}},
+ {"id":"Sample4", "metadata":{
+ "BarcodeSequence":"CTCTCGGCCTGT",
+ "LinkerPrimerSequence":"CATGCTGCCTCCCGTAGGAGT",
+ "BODY_SITE":"skin",
+ "Description":"human skin"}},
+ {"id":"Sample5", "metadata":{
+ "BarcodeSequence":"CTCTCTACCAAT",
+ "LinkerPrimerSequence":"CATGCTGCCTCCCGTAGGAGT",
+ "BODY_SITE":"skin",
+ "Description":"human skin"}},
+ {"id":"Sample6", "metadata":{
+ "BarcodeSequence":"CTAACTACCAAT",
+ "LinkerPrimerSequence":"CATGCTGCCTCCCGTAGGAGT",
+ "BODY_SITE":"skin",
+ "Description":"human skin"}}
+ ],
+ "matrix_type": "dense",
+ "matrix_element_type": "int",
+ "shape": [5,6],
+ "data": [[0,0,1,0,0,0],
+ [5,1,0,2,3,1],
+ [0,0,1,4,2,0],
+ [2,1,1,0,0,1],
+ [0,1,1,0,0,0]]
+ }"""
+
+min_dense_otu = """ {
+ "id":null,
+ "format": "1.0.0",
+ "format_url": "http://biom-format.org",
+ "type": "OTU table",
+ "generated_by": "QIIME revision XYZ",
+ "date": "2011-12-19T19:00:00",
+ "rows":[
+ {"id":"GG_OTU_1", "metadata":null},
+ {"id":"GG_OTU_2", "metadata":null},
+ {"id":"GG_OTU_3", "metadata":null},
+ {"id":"GG_OTU_4", "metadata":null},
+ {"id":"GG_OTU_5", "metadata":null}
+ ],
+ "columns": [
+ {"id":"Sample1", "metadata":null},
+ {"id":"Sample2", "metadata":null},
+ {"id":"Sample3", "metadata":null},
+ {"id":"Sample4", "metadata":null},
+ {"id":"Sample5", "metadata":null},
+ {"id":"Sample6", "metadata":null}
+ ],
+ "matrix_type": "dense",
+ "matrix_element_type": "int",
+ "shape": [5,6],
+ "data": [[0,0,1,0,0,0],
+ [5,1,0,2,3,1],
+ [0,0,1,4,2,0],
+ [2,1,1,0,0,1],
+ [0,1,1,0,0,0]]
+ }"""
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tests/test_err.py b/tests/test_err.py
new file mode 100644
index 0000000..f21e994
--- /dev/null
+++ b/tests/test_err.py
@@ -0,0 +1,218 @@
+#!/usr/bin/env python
+
+# -----------------------------------------------------------------------------
+# Copyright (c) 2011-2013, The BIOM Format Development Team.
+#
+# Distributed under the terms of the Modified BSD License.
+#
+# The full license is in the file COPYING.txt, distributed with this software.
+# -----------------------------------------------------------------------------
+
+from unittest import TestCase, main
+from copy import deepcopy
+
+from biom import example_table, Table
+from biom.exception import TableException
+from biom.err import (_test_empty, _test_obssize, _test_sampsize, _test_obsdup,
+ _test_sampdup, _test_obsmdsize, _test_sampmdsize,
+ errstate, geterr, seterr, geterrcall, seterrcall,
+ errcheck, errstate, __errprof)
+
+
+runtime_ep = __errprof
+runtime_ep_profile = deepcopy(runtime_ep._profile)
+runtime_ep_state = runtime_ep._state.copy()
+runtime_ep_test = runtime_ep._test.copy()
+
+
+class ErrModeTests(TestCase):
+ def setUp(self):
+ self.ex_table = example_table.copy()
+
+ def test_test_empty(self):
+ self.assertTrue(_test_empty(Table([], [], [])))
+ self.assertFalse(_test_empty(self.ex_table))
+
+ def test_test_obssize(self):
+ self.assertFalse(_test_obssize(self.ex_table))
+ self.ex_table._observation_ids = self.ex_table._observation_ids[:-1]
+ self.assertTrue(_test_obssize(self.ex_table))
+
+ def test_test_sampsize(self):
+ self.assertFalse(_test_sampsize(self.ex_table))
+ self.ex_table._sample_ids = self.ex_table._sample_ids[:-1]
+ self.assertTrue(_test_sampsize(self.ex_table))
+
+ def test_test_obsdup(self):
+ self.assertFalse(_test_obsdup(self.ex_table))
+ self.ex_table._observation_ids[0] = self.ex_table._observation_ids[1]
+ self.assertTrue(_test_obsdup(self.ex_table))
+
+ def test_test_sampdup(self):
+ self.assertFalse(_test_sampdup(self.ex_table))
+ self.ex_table._sample_ids[0] = self.ex_table._sample_ids[1]
+ self.assertTrue(_test_sampdup(self.ex_table))
+
+ def test_test_obsmdsize(self):
+ self.assertFalse(_test_obsdup(self.ex_table))
+ self.ex_table._observation_metadata = \
+ self.ex_table._observation_metadata[:-1]
+ self.assertTrue(_test_obsmdsize(self.ex_table))
+
+ def test_test_sampmdsize(self):
+ self.assertFalse(_test_sampdup(self.ex_table))
+ self.ex_table._sample_metadata = \
+ self.ex_table._sample_metadata[:-1]
+ self.assertTrue(_test_sampmdsize(self.ex_table))
+
+
+class ErrorProfileTests(TestCase):
+ def setUp(self):
+ self.ex_table = example_table.copy()
+ self.ep = runtime_ep
+ self.ep.state = {'all': 'raise'}
+
+ def tearDown(self):
+ self.ep._profile = deepcopy(runtime_ep_profile.copy())
+ self.ep._state = runtime_ep_state.copy()
+ self.ep._test = runtime_ep_test.copy()
+
+ def test_test(self):
+ self.ep.test(self.ex_table)
+ self.ep.test(self.ex_table, 'empty')
+ self.ep.test(self.ex_table, 'empty', 'obssize')
+
+ self.ex_table._observation_ids = self.ex_table._observation_ids[:-1]
+ self.ep.test(self.ex_table, 'empty')
+ self.assertTrue(isinstance(self.ep.test(self.ex_table, 'obssize'),
+ TableException))
+
+ def test_state(self):
+ self.ep.state = {'all': 'ignore'}
+ self.assertEqual(set(self.ep._state.values()), set(['ignore']))
+ self.ep.state = {'empty': 'call'}
+ self.assertEqual(set(self.ep._state.values()), set(['ignore', 'call']))
+ self.assertEqual(self.ep.state['empty'], 'call')
+
+ with self.assertRaises(KeyError):
+ self.ep.state = {'empty': 'missing'}
+
+ with self.assertRaises(KeyError):
+ self.ep.state = {'emptyasdasd': 'ignore'}
+
+ def test_contains(self):
+ self.assertTrue('empty' in self.ep)
+ self.assertFalse('emptyfoo' in self.ep)
+
+ def test_handle_error(self):
+ def callback(foo):
+ return 10
+
+ self.ep.setcall('empty', callback)
+
+ self.assertTrue(isinstance(self.ep._handle_error('empty', None),
+ TableException))
+ self.ep.state = {'empty': 'call'}
+ self.assertEqual(self.ep._handle_error('empty', None), 10)
+
+ def test_setcall(self):
+ def callback(foo):
+ return 10
+
+ self.assertEqual(self.ep._profile['empty']['call'](None), None)
+ self.ep.setcall('empty', callback)
+ self.assertEqual(self.ep._profile['empty']['call'](None), 10)
+
+ with self.assertRaises(KeyError):
+ self.ep.setcall('emptyfoo', callback)
+
+ def test_getcall(self):
+ def callback(foo):
+ return 10
+ self.ep.setcall('empty', callback)
+ self.assertEqual(self.ep.getcall('empty'), callback)
+
+ with self.assertRaises(KeyError):
+ self.ep.getcall('emptyfoo')
+
+ def test_register_unregister(self):
+ cb = lambda x: 123
+ test = lambda x: x == 5
+
+ self.ep.register('foo', 'bar', 'ignore', test, callback=cb)
+ self.assertTrue('foo' in self.ep)
+ self.ep.state = {'foo': 'call'}
+ self.assertEqual(self.ep._handle_error('foo', None), 123)
+
+ foo_prof = self.ep._profile['foo'].copy()
+ prof, func, state = self.ep.unregister('foo')
+
+ self.assertEqual(func, test)
+ self.assertEqual(state, 'call')
+ self.assertEqual(prof, foo_prof)
+
+ with self.assertRaises(KeyError):
+ self.ep.register('empty', 1, 2, lambda: None)
+
+ with self.assertRaises(KeyError):
+ self.ep.register('foo', 'missing', 2, lambda: None)
+
+ with self.assertRaises(KeyError):
+ self.ep.unregister('non_existant')
+
+
+class SupportTests(TestCase):
+ def setUp(self):
+ self.ex_table = example_table.copy()
+
+ def test_geterr(self):
+ state = geterr()
+ self.assertEqual(state, runtime_ep._state)
+ old = seterr(all='call')
+ self.assertNotEqual(geterr(), state)
+ seterr(**old)
+
+ def test_seterr(self):
+ existing = seterr(empty='warn')
+ self.assertEqual(runtime_ep._state['empty'], 'warn')
+ self.assertNotEqual(runtime_ep._state['empty'], existing)
+ seterr(empty=existing['empty'])
+ self.assertNotEqual(runtime_ep._state['empty'], 'warn')
+ self.assertEqual(runtime_ep._state, existing)
+
+ def test_geterrcall(self):
+ exp = runtime_ep._profile['sampsize']['call']
+ obs = geterrcall('sampsize')
+ self.assertEqual(obs, exp)
+
+ with self.assertRaises(KeyError):
+ geterrcall('asdasd')
+
+ def test_seterrcall(self):
+ def foo(x):
+ return 10
+
+ seterrcall('sampmdsize', foo)
+ obs = geterrcall('sampmdsize')
+ self.assertEqual(obs, foo)
+
+ def test_errcheck(self):
+ self.assertEqual(errcheck(self.ex_table), None)
+ self.ex_table._sample_ids = self.ex_table._sample_ids[:-1]
+ with self.assertRaises(TableException):
+ errcheck(self.ex_table)
+
+ def test_errstate(self):
+ def foo(item):
+ return "the callback called"
+
+ table = Table([], [], [])
+ seterrcall('empty', foo)
+ self.assertNotEqual(geterr()['empty'], 'call')
+ with errstate(empty='call'):
+ result = errcheck(table)
+ self.assertEqual(result, "the callback called")
+ self.assertNotEqual(geterr()['empty'], 'call')
+
+if __name__ == '__main__':
+ main()
diff --git a/tests/test_parse.py b/tests/test_parse.py
new file mode 100644
index 0000000..5be8d62
--- /dev/null
+++ b/tests/test_parse.py
@@ -0,0 +1,1692 @@
+#!/usr/bin/env python
+
+# -----------------------------------------------------------------------------
+# Copyright (c) 2011-2013, The BIOM Format Development Team.
+#
+# Distributed under the terms of the Modified BSD License.
+#
+# The full license is in the file COPYING.txt, distributed with this software.
+# -----------------------------------------------------------------------------
+
+import os
+from io import StringIO
+import json
+from unittest import TestCase, main
+
+import numpy as np
+import numpy.testing as npt
+
+from biom.parse import generatedby, MetadataMap, parse_biom_table, parse_uc
+from biom.table import Table
+from biom.util import HAVE_H5PY, __version__
+if HAVE_H5PY:
+ import h5py
+
+__author__ = "Justin Kuczynski"
+__copyright__ = "Copyright 2011-2013, The BIOM Format Development Team"
+__credits__ = ["Justin Kuczynski", "Daniel McDonald", "Adam Robbins-Pianka",
+ "Jose Antonio Navas Molina"]
+__license__ = "BSD"
+__url__ = "http://biom-format.org"
+__maintainer__ = "Justin Kuczynski"
+__email__ = "justinak at gmail.com"
+
+
+class ParseTests(TestCase):
+
+ """Tests of parse functions"""
+
+ def setUp(self):
+ """define some top-level data"""
+ self.legacy_otu_table1 = legacy_otu_table1
+ self.otu_table1 = otu_table1
+ self.otu_table1_floats = otu_table1_floats
+ self.files_to_remove = []
+ self.biom_minimal_sparse = biom_minimal_sparse
+
+ self.classic_otu_table1_w_tax = classic_otu_table1_w_tax.split('\n')
+ self.classic_otu_table1_no_tax = classic_otu_table1_no_tax.split('\n')
+ self.classic_table_with_complex_metadata = \
+ classic_table_with_complex_metadata.split('\n')
+
+ def test_generatedby(self):
+ """get a generatedby string"""
+ exp = "BIOM-Format %s" % __version__
+ obs = generatedby()
+ self.assertEqual(obs, exp)
+
+ def test_metadata_map(self):
+ """MetadataMap functions as expected
+
+ This method is ported from QIIME (http://www.qiime.org). QIIME is a GPL
+ project, but we obtained permission from the authors of this method to
+ port it to the BIOM Format project (and keep it under BIOM's BSD
+ license).
+ """
+ s1 = ['#sample\ta\tb', '#comment line to skip',
+ 'x \t y \t z ', ' ', '#more skip', 'i\tj\tk']
+ exp = ([['x', 'y', 'z'], ['i', 'j', 'k']],
+ ['sample', 'a', 'b'],
+ ['comment line to skip', 'more skip'])
+ exp = {'x': {'a': 'y', 'b': 'z'}, 'i': {'a': 'j', 'b': 'k'}}
+ obs = MetadataMap.from_file(s1)
+ self.assertEqual(obs, exp)
+
+ # check that we strip double quotes by default
+ s2 = ['#sample\ta\tb', '#comment line to skip',
+ '"x "\t" y "\t z ', ' ', '"#more skip"', 'i\t"j"\tk']
+ obs = MetadataMap.from_file(s2)
+ self.assertEqual(obs, exp)
+
+ def test_metadata_map_w_map_fs(self):
+ """MetadataMap functions as expected w process_fns
+
+ This method is ported from QIIME (http://www.qiime.org). QIIME is a GPL
+ project, but we obtained permission from the authors of this method to
+ port it to the BIOM Format project (and keep it under BIOM's BSD
+ license).
+ """
+ s1 = ['#sample\ta\tb', '#comment line to skip',
+ 'x \t y \t z ', ' ', '#more skip', 'i\tj\tk']
+ exp = ([['x', 'y', 'z'], ['i', 'j', 'k']],
+ ['sample', 'a', 'b'],
+ ['comment line to skip', 'more skip'])
+ exp = {'x': {'a': 'y', 'b': 'zzz'}, 'i': {'a': 'j', 'b': 'kkk'}}
+ process_fns = {'b': lambda x: x * 3}
+ obs = MetadataMap.from_file(s1, process_fns=process_fns)
+ self.assertEqual(obs, exp)
+
+ def test_metadata_map_w_header(self):
+ """MetadataMap functions as expected w user-provided header
+
+ This method is ported from QIIME (http://www.qiime.org). QIIME is a GPL
+ project, but we obtained permission from the authors of this method to
+ port it to the BIOM Format project (and keep it under BIOM's BSD
+ license).
+ """
+ # number of user-provided headers matches number of columns, and no
+ # header line in file
+ s1 = ['#comment line to skip',
+ 'x \t y \t z ', ' ', '#more skip', 'i\tj\tk']
+ exp = ([['x', 'y', 'z'], ['i', 'j', 'k']],
+ ['sample', 'a', 'b'],
+ ['comment line to skip', 'more skip'])
+ exp = {'x': {'a': 'y', 'b': 'z'}, 'i': {'a': 'j', 'b': 'k'}}
+ header = ['sample', 'a', 'b']
+ obs = MetadataMap.from_file(s1, header=header)
+ self.assertEqual(obs, exp)
+
+ # number of user-provided headers is fewer than number of columns, and
+ # no header line in file
+ s1 = ['#comment line to skip',
+ 'x \t y \t z ', ' ', '#more skip', 'i\tj\tk']
+ exp = ([['x', 'y', 'z'], ['i', 'j', 'k']],
+ ['sample', 'a'],
+ ['comment line to skip', 'more skip'])
+ exp = {'x': {'a': 'y'}, 'i': {'a': 'j'}}
+ header = ['sample', 'a']
+ obs = MetadataMap.from_file(s1, header=header)
+ self.assertEqual(obs, exp)
+
+ # number of user-provided headers is fewer than number of columns, and
+ # header line in file (overridden by user-provided)
+ s1 = ['#sample\ta\tb', '#comment line to skip',
+ 'x \t y \t z ', ' ', '#more skip', 'i\tj\tk']
+ exp = ([['x', 'y', 'z'], ['i', 'j', 'k']],
+ ['sample', 'a'],
+ ['comment line to skip', 'more skip'])
+ exp = {'x': {'a': 'y'}, 'i': {'a': 'j'}}
+ header = ['sample', 'a']
+ obs = MetadataMap.from_file(s1, header=header)
+ self.assertEqual(obs, exp)
+
+ def test_parse_biom_json(self):
+ """test the biom otu table parser"""
+ # light test. this code is used thoroughly within the other
+ # parse_biom_table methods
+ tab1_fh = json.load(StringIO(self.biom_minimal_sparse))
+ tab = Table.from_json(tab1_fh)
+ npt.assert_equal(tab.ids(), ('Sample1', 'Sample2', 'Sample3',
+ 'Sample4', 'Sample5', 'Sample6'))
+ npt.assert_equal(tab.ids(axis='observation'),
+ ('GG_OTU_1', 'GG_OTU_2', 'GG_OTU_3',
+ 'GG_OTU_4', 'GG_OTU_5'))
+ self.assertEqual(tab.metadata(), None)
+ self.assertEqual(tab.metadata(axis='observation'), None)
+
+ tab = parse_biom_table(StringIO(self.biom_minimal_sparse))
+ npt.assert_equal(tab.ids(), ('Sample1', 'Sample2', 'Sample3',
+ 'Sample4', 'Sample5', 'Sample6'))
+ npt.assert_equal(tab.ids(axis='observation'),
+ ('GG_OTU_1', 'GG_OTU_2', 'GG_OTU_3',
+ 'GG_OTU_4', 'GG_OTU_5'))
+ self.assertEqual(tab.metadata(), None)
+ self.assertEqual(tab.metadata(axis='observation'), None)
+
+ tablestring = u'''{
+ "id":null,
+ "format": "Biological Observation Matrix 0.9.1-dev",
+ "format_url": "http://biom-format.org",
+ "type": "OTU table",
+ "generated_by": "QIIME revision XYZ",
+ "date": "2011-12-19T19:00:00",
+ "rows":[
+ {"id":"GG_OTU_1", "metadata":null},
+ {"id":"GG_OTU_2", "metadata":null},
+ {"id":"GG_OTU_3", "metadata":null},
+ {"id":"GG_OTU_4", "metadata":null},
+ {"id":"GG_OTU_5", "metadata":null}
+ ],
+ "columns": [
+ {"id":"Sample1", "metadata":null},
+ {"id":"Sample2", "metadata":null},
+ {"id":"Sample3", "metadata":null},
+ {"id":"Sample4", "metadata":null},
+ {"id":"Sample5", "metadata":null},
+ {"id":"Sample6", "metadata":null}
+ ],
+ "matrix_type": "dense",
+ "matrix_element_type": "int",
+ "shape": [5,6],
+ "data": [[0,0,1,0,0,0],
+ [5,1,0,2,3,1],
+ [0,0,1,4,2,0],
+ [2,1,1,0,0,1],
+ [0,1,1,0,0,0]]
+ }'''
+ tbs_fh = json.load(StringIO(tablestring))
+ tab1 = Table.from_json(tbs_fh)
+ tab2 = parse_biom_table(tablestring)
+ self.assertEqual(tab1, tab2)
+
+ def test_parse_biom_table_subset(self):
+ """test the biom table parser subsetting"""
+ tab = parse_biom_table(StringIO(self.biom_minimal_sparse),
+ ids=['Sample1', 'Sample3', 'Sample5',
+ 'Sample6'])
+ npt.assert_equal(tab.ids(), ('Sample1', 'Sample3', 'Sample5',
+ 'Sample6'))
+ npt.assert_equal(tab.ids(axis='observation'),
+ ('GG_OTU_1', 'GG_OTU_2', 'GG_OTU_3', 'GG_OTU_4',
+ 'GG_OTU_5'))
+ self.assertEqual(tab.metadata(), None)
+ self.assertEqual(tab.metadata(axis='observation'), None)
+
+ tab = parse_biom_table(StringIO(self.biom_minimal_sparse),
+ ids=['GG_OTU_2', 'GG_OTU_3', 'GG_OTU_5'],
+ axis='observation')
+ npt.assert_equal(tab.ids(), ('Sample1', 'Sample2', 'Sample3',
+ 'Sample4', 'Sample5', 'Sample6',))
+ npt.assert_equal(tab.ids(axis='observation'),
+ ('GG_OTU_2', 'GG_OTU_3', 'GG_OTU_5'))
+ self.assertEqual(tab.metadata(), None)
+ self.assertEqual(tab.metadata(axis='observation'), None)
+
+ @npt.dec.skipif(HAVE_H5PY is False, msg='H5PY is not installed')
+ def test_parse_biom_table_hdf5(self):
+ """Make sure we can parse a HDF5 table through the same loader"""
+ cwd = os.getcwd()
+ if '/' in __file__[1:]:
+ os.chdir(__file__.rsplit('/', 1)[0])
+ Table.from_hdf5(h5py.File('test_data/test.biom'))
+ os.chdir(cwd)
+
+ def test_parse_biom_table(self):
+ """tests for parse_biom_table when we do not have h5py"""
+ # This is a TSV as a list of lines
+ t = parse_biom_table(self.classic_otu_table1_no_tax)
+
+ # Test TSV as a list of lines
+ t_tsv_str = t.to_tsv()
+ t_tsv_lines = t_tsv_str.splitlines()
+ t_tsv = parse_biom_table(t_tsv_lines)
+ self.assertEqual(t, t_tsv)
+ # Test TSV as a file-like object
+ t_tsv_stringio = StringIO(t_tsv_str)
+ t_tsv = parse_biom_table(t_tsv_stringio)
+ self.assertEqual(t, t_tsv)
+
+ # Test JSON as a list of lines
+ t_json_str = t.to_json('asd')
+ t_json_lines = t_json_str.splitlines()
+ t_json = parse_biom_table(t_json_lines)
+ self.assertEqual(t, t_json)
+ # Test JSON as a file-like object
+ t_json_str = t.to_json('asd')
+ t_json_stringio = StringIO(t_json_str)
+ t_json = parse_biom_table(t_json_stringio)
+ self.assertEqual(t, t_json)
+
+ @npt.dec.skipif(HAVE_H5PY is False, msg='H5PY is not installed')
+ def test_parse_biom_table_with_hdf5(self):
+ """tests for parse_biom_table when we have h5py"""
+ # We will round-trip the HDF5 file to several different formats, and
+ # make sure we can recover the same table using parse_biom_table
+ cwd = os.getcwd()
+ if '/' in __file__[1:]:
+ os.chdir(__file__.rsplit('/', 1)[0])
+
+ t = parse_biom_table(h5py.File('test_data/test.biom'))
+
+ # These things are not round-trippable using the general-purpose
+ # parse_biom_table function
+ t._sample_metadata = None
+ t._observation_metadata = None
+ t.type = None
+
+ # Test TSV as a list of lines
+ t_tsv_str = t.to_tsv()
+ t_tsv_lines = t_tsv_str.splitlines()
+ t_tsv = parse_biom_table(t_tsv_lines)
+ self.assertEqual(t, t_tsv)
+ # Test TSV as a file-like object
+ t_tsv_stringio = StringIO(t_tsv_str)
+ t_tsv = parse_biom_table(t_tsv_stringio)
+ self.assertEqual(t, t_tsv)
+
+ # Test JSON as a list of lines
+ t_json_str = t.to_json('asd')
+ t_json_lines = t_json_str.splitlines()
+ t_json = parse_biom_table(t_json_lines)
+ self.assertEqual(t, t_json)
+ # Test JSON as a file-like object
+ t_json_str = t.to_json('asd')
+ t_json_stringio = StringIO(t_json_str)
+ t_json = parse_biom_table(t_json_stringio)
+ self.assertEqual(t, t_json)
+
+
+legacy_otu_table1 = """# some comment goes here
+#OTU ID Fing Key NA Consensus Lineage
+0 19111 44536 42 Bacteria; Actinobacteria; Actinobacteridae; Propioniba\
+cterineae; Propionibacterium
+
+1 1216 3500 6 Bacteria; Firmicutes; Alicyclobacillaceae; Bacilli; La\
+ctobacillales; Lactobacillales; Streptococcaceae; Streptococcus
+7 1803 1184 2 Bacteria; Actinobacteria; Actinobacteridae; Gordoniace\
+ae; Corynebacteriaceae
+3 1722 4903 17 Bacteria; Firmicutes; Alicyclobacillaceae; Bacilli; St\
+aphylococcaceae
+4 589 2074 34 Bacteria; Cyanobacteria; Chloroplasts; vectors
+"""
+
+otu_table1 = """# Some comment
+
+OTU ID Fing Key NA Consensus Lineage
+0 19111 44536 42 Bacteria; Actinobacteria; Actinobacteridae; Propioniba\
+cterineae; Propionibacterium
+# some other comment
+1 1216 3500 6 Bacteria; Firmicutes; Alicyclobacillaceae; Bacilli; La\
+ctobacillales; Lactobacillales; Streptococcaceae; Streptococcus
+7 1803 1184 2 Bacteria; Actinobacteria; Actinobacteridae; Gordoniace\
+ae; Corynebacteriaceae
+# comments
+# everywhere!
+3 1722 4903 17 Bacteria; Firmicutes; Alicyclobacillaceae; Bacilli; St\
+aphylococcaceae
+4 589 2074 34 Bacteria; Cyanobacteria; Chloroplasts; vectors
+"""
+
+otu_table1_floats = """# Some comment
+
+OTU ID Fing Key NA Consensus Lineage
+0 19111.0 44536.0 42.0 Bacteria; Actinobacteria; Actinobacteridae; Propio\
+nibacterineae; Propionibacterium
+# some other comment
+1 1216.0 3500.0 6.0 Bacteria; Firmicutes; Alicyclobacillaceae; Bacilli; La\
+ctobacillales; Lactobacillales; Streptococcaceae; Streptococcus
+7 1803.0 1184.0 2.0 Bacteria; Actinobacteria; Actinobacteridae; Gordoniace\
+ae; Corynebacteriaceae
+# comments
+# everywhere!
+3 1722.1 4903.2 17 Bacteria; Firmicutes; Alicyclobacillaceae; Bacilli; St\
+aphylococcaceae
+4 589.6 2074.4 34.5 Bacteria; Cyanobacteria; Chloroplasts; vectors
+"""
+
+classic_table_with_complex_metadata = """# some comment
+#OTU ID sample1 sample2 KEGG_Pathways
+K05842 1.0 3.5 rank1A; rank2A|rank1B; rank2B
+K05841 2.0 4.5 Environmental Information Processing;
+K00508 0.0 0.0 Metabolism; Lipid Metabolism; Linoleic acid metabolism
+K00500 0.5 0.5 Metabolism; Amino Acid Metabolism; Phenylalanine metabolism|Me\
+tabolism; Amino Acid Metabolism; Phenylalanine, tyrosine and tryptophan biosyn\
+thesis
+K00507 0.0 0.0 Metabolism; Lipid Metabolism; Biosynthesis of unsaturated fatt\
+y acids|Organismal Systems; Endocrine System; PPAR signaling pathway
+"""
+
+biom_minimal_sparse = u"""
+ {
+ "id":null,
+ "format": "Biological Observation Matrix v0.9",
+ "format_url": "http://some_website/QIIME_MGRAST_dataformat_v0.9.html",
+ "type": "OTU table",
+ "generated_by": "QIIME revision XYZ",
+ "date": "2011-12-19T19:00:00",
+ "rows":[
+ {"id":"GG_OTU_1", "metadata":null},
+ {"id":"GG_OTU_2", "metadata":null},
+ {"id":"GG_OTU_3", "metadata":null},
+ {"id":"GG_OTU_4", "metadata":null},
+ {"id":"GG_OTU_5", "metadata":null}
+ ],
+ "columns": [
+ {"id":"Sample1", "metadata":null},
+ {"id":"Sample2", "metadata":null},
+ {"id":"Sample3", "metadata":null},
+ {"id":"Sample4", "metadata":null},
+ {"id":"Sample5", "metadata":null},
+ {"id":"Sample6", "metadata":null}
+ ],
+ "matrix_type": "sparse",
+ "matrix_element_type": "int",
+ "shape": [5, 6],
+ "data":[[0,2,1],
+ [1,0,5],
+ [1,1,1],
+ [1,3,2],
+ [1,4,3],
+ [1,5,1],
+ [2,2,1],
+ [2,3,4],
+ [2,4,2],
+ [3,0,2],
+ [3,1,1],
+ [3,2,1],
+ [3,5,1],
+ [4,1,1],
+ [4,2,1]
+ ]
+ }
+"""
+
+classic_otu_table1_w_tax = """#Full OTU Counts
+#OTU ID PC.354 PC.355 PC.356 PC.481 PC.593 PC.607 PC.634 PC.635 PC.636\
+\tConsensus Lineage
+0 0 0 0 0 0 0 0 1 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+1 0 0 0 0 0 1 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+2 0 0 0 0 0 0 0 0 1 Root;Bacteria;Bacteroidetes;Bacteroide\
+tes;Bacteroidales;Porphyromonadaceae;Parabacteroides
+3 2 1 0 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae;Lachnospiraceae Incertae Sedis
+4 1 0 0 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+5 0 0 0 0 0 0 0 0 1 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+6 0 0 0 0 0 0 0 1 0 Root;Bacteria;Actinobacteria;Actinobac\
+teria
+7 0 0 2 0 0 0 0 0 2 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+8 1 1 0 2 4 0 0 0 0 Root;Bacteria;Firmicutes;Bacilli;Lacto\
+bacillales;Lactobacillaceae;Lactobacillus
+9 0 0 2 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+10 0 1 0 0 0 0 0 0 0 Root;Bacteria
+11 0 0 0 0 0 0 1 0 0 Root;Bacteria;Bacteroidetes;Bacteroide\
+tes;Bacteroidales;Bacteroidaceae;Bacteroides
+12 0 0 0 0 0 0 1 0 0 Root;Bacteria;Bacteroidetes
+13 1 0 0 1 0 1 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+14 0 0 1 1 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+15 0 0 0 0 1 0 0 0 0 Root;Bacteria
+16 1 0 2 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+17 0 0 0 1 0 0 4 10 37 Root;Bacteria;Bacteroidetes
+18 0 1 0 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+19 0 0 0 0 0 0 0 0 1 Root;Bacteria;Bacteroidetes
+20 0 0 0 0 1 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+21 0 0 0 0 0 0 2 3 2 Root;Bacteria;Bacteroidetes
+22 0 0 0 0 2 0 1 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+23 14 1 14 1 0 0 0 0 0 Root;Bacteria;Firmicutes;Bacilli;Lacto\
+bacillales;Lactobacillaceae;Lactobacillus
+24 1 0 0 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+25 0 0 0 1 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae;Lachnospiraceae Incertae Sedis
+26 0 0 0 0 0 0 0 1 1 Root;Bacteria;Bacteroidetes
+27 0 0 0 0 0 0 0 0 1 Root;Bacteria;Bacteroidetes
+28 0 1 0 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+29 6 0 4 0 2 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+30 0 0 0 0 0 1 0 0 0 Root;Bacteria;Firmicutes
+31 1 0 0 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+32 0 0 0 0 1 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Ruminococcaceae
+33 0 0 0 1 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+34 0 0 0 0 0 0 8 10 2 Root;Bacteria
+35 1 0 1 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+36 1 0 1 0 0 0 0 1 1 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+37 0 0 0 0 0 1 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+38 0 0 1 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+39 0 0 0 0 0 0 0 1 0 Root;Bacteria;Bacteroidetes;Bacteroid\
+etes;Bacteroidales;Rikenellaceae;Alistipes
+40 0 0 1 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+41 0 0 1 0 0 0 0 1 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+42 0 0 0 0 0 1 0 0 0 Root;Bacteria;Firmicutes
+43 0 0 0 0 0 1 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+44 0 0 1 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+45 1 0 0 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Erysipelotric\
+hi;Erysipelotrichales;Erysipelotrichaceae;Coprobacillus
+46 0 0 0 0 0 0 0 0 1 Root;Bacteria;Bacteroidetes
+47 0 0 0 1 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+48 0 0 0 0 1 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+49 0 0 0 1 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+50 0 1 0 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+51 0 1 0 0 0 0 0 0 0 Root;Bacteria;Bacteroidetes;Bacteroide\
+tes;Bacteroidales;Bacteroidaceae;Bacteroides
+52 0 2 0 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+53 0 0 0 0 0 0 2 0 1 Root;Bacteria;Proteobacteria;Deltaprot\
+eobacteria
+54 0 0 0 0 0 0 5 0 0 Root;Bacteria;Bacteroidetes;Bacteroide\
+tes;Bacteroidales;Porphyromonadaceae;Parabacteroides
+55 0 0 0 0 0 0 1 0 0 Root;Bacteria;Bacteroidetes;Bacteroide\
+tes;Bacteroidales;Rikenellaceae;Alistipes
+56 0 0 0 0 0 1 0 0 0 Root;Bacteria;Bacteroidetes
+57 0 0 0 0 0 0 0 1 0 Root;Bacteria
+58 1 0 1 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+59 0 0 0 0 0 0 0 0 1 Root;Bacteria;Deferribacteres;Deferrib\
+acteres;Deferribacterales;Deferribacteraceae;Mucispirillum
+60 0 0 0 0 0 0 0 1 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+61 0 0 1 0 0 0 0 1 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Ruminococcaceae
+62 0 0 1 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+63 1 0 1 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+64 0 0 0 0 0 0 0 0 1 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+65 0 0 0 6 0 0 0 1 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+66 0 0 1 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+67 0 0 1 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+68 1 0 0 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+69 0 0 1 0 0 0 0 0 0 Root;Bacteria
+70 0 0 0 0 0 1 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+71 0 0 1 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+72 0 0 0 0 0 1 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+73 0 0 0 0 0 5 0 0 0 Root;Bacteria;Bacteroidetes
+74 0 0 0 1 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+75 1 0 1 0 0 0 0 0 0 Root;Bacteria;Bacteroidetes
+76 0 0 0 1 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+77 0 0 0 1 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+78 1 0 1 1 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+79 2 3 8 0 1 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+80 0 0 0 0 0 0 0 0 1 Root;Bacteria;Bacteroidetes;Bacteroide\
+tes;Bacteroidales;Porphyromonadaceae;Parabacteroides
+81 1 0 0 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae;Lachnospiraceae Incertae Sedis
+82 0 0 0 0 0 2 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+83 0 0 0 1 0 0 0 1 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+84 1 0 0 0 0 0 0 2 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Ruminococcaceae;Ruminococcus
+85 0 0 0 0 0 0 0 0 1 Root;Bacteria;Bacteroidetes;Bacteroide\
+tes;Bacteroidales;Rikenellaceae;Alistipes
+86 0 0 0 0 0 0 0 1 0 Root;Bacteria
+87 0 0 1 0 0 2 0 1 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+88 0 0 0 0 0 0 0 1 0 Root;Bacteria
+89 0 0 1 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+90 0 0 0 9 0 0 3 0 0 Root;Bacteria;Firmicutes;Erysipelotric\
+hi;Erysipelotrichales;Erysipelotrichaceae;Turicibacter
+91 0 0 0 1 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae;Butyrivibrio
+92 0 0 0 0 0 0 1 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+93 0 0 0 0 0 0 2 1 0 Root;Bacteria;Bacteroidetes
+94 0 0 0 0 0 0 0 1 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+95 0 0 0 2 0 0 0 0 0 Root;Bacteria;Bacteroidetes
+96 0 0 0 1 0 1 0 1 1 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Ruminococcaceae
+97 0 0 0 0 0 1 0 0 0 Root;Bacteria
+98 0 0 0 0 0 0 0 1 0 Root;Bacteria
+99 0 0 0 1 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+100 0 0 0 1 0 0 0 0 0 Root;Bacteria
+101 0 0 0 3 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+102 0 1 0 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+103 0 1 0 0 0 0 1 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+104 0 0 0 0 0 1 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+105 0 1 0 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+106 0 0 0 0 0 1 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+107 0 0 0 0 0 1 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+108 0 0 0 0 0 0 1 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Incertae Sedis XIII;Anaerovorax
+109 0 0 0 1 0 0 1 5 2 Root;Bacteria;Bacteroidetes;Bacteroide\
+tes;Bacteroidales;Rikenellaceae;Alistipes
+110 0 0 0 0 0 2 0 0 0 Root;Bacteria;Actinobacteria;Actinobac\
+teria;Coriobacteridae;Coriobacteriales;Coriobacterineae;Coriobacteriaceae;Olse\
+nella
+111 0 0 0 0 0 0 1 0 0 Root;Bacteria;Bacteroidetes;Bacteroide\
+tes;Bacteroidales;Bacteroidaceae;Bacteroides
+112 0 0 0 0 0 0 1 0 0 Root;Bacteria;Bacteroidetes;Bacteroide\
+tes;Bacteroidales;Bacteroidaceae;Bacteroides
+113 0 0 0 0 0 1 0 0 0 Root;Bacteria
+114 0 0 0 0 0 1 0 0 0 Root;Bacteria
+115 0 0 0 0 0 1 0 0 0 Root;Bacteria;Firmicutes
+116 0 1 0 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae;Lachnospiraceae Incertae Sedis
+117 1 0 2 0 0 6 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+118 0 0 0 1 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+119 0 0 0 0 0 0 0 1 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+120 1 3 1 2 1 9 2 4 5 Root;Bacteria;Bacteroidetes
+121 0 0 0 0 0 0 0 1 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+122 0 0 0 1 0 2 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+123 0 0 0 0 0 0 1 0 0 Root;Bacteria;Actinobacteria;Actinobac\
+teria;Coriobacteridae;Coriobacteriales;Coriobacterineae;Coriobacteriaceae
+124 0 0 0 0 0 0 1 0 0 Root;Bacteria;Actinobacteria;Actinobac\
+teria;Coriobacteridae;Coriobacteriales;Coriobacterineae;Coriobacteriaceae
+125 0 0 0 0 0 0 1 0 0 Root;Bacteria;Bacteroidetes
+126 0 0 2 0 0 0 0 1 0 Root;Bacteria
+127 0 0 0 0 0 1 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+128 0 0 0 0 0 0 1 0 0 Root;Bacteria;Bacteroidetes;Bacteroide\
+tes;Bacteroidales;Bacteroidaceae;Bacteroides
+129 0 0 0 1 0 0 0 0 0 Root;Bacteria
+130 0 0 0 0 5 2 0 0 0 Root;Bacteria;Proteobacteria;Epsilonpr\
+oteobacteria;Campylobacterales;Helicobacteraceae;Helicobacter
+131 0 0 1 3 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae;Lachnospiraceae Incertae Sedis
+132 0 0 0 0 1 0 0 0 0 Root;Bacteria
+133 0 0 1 0 0 0 0 0 0 Root;Bacteria
+134 0 0 0 0 0 0 0 0 1 Root;Bacteria;Bacteroidetes;Bacteroide\
+tes;Bacteroidales;Bacteroidaceae;Bacteroides
+135 0 0 1 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+136 1 0 0 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae;Lachnospiraceae Incertae Sedis
+137 0 0 0 0 0 0 0 1 0 Root;Bacteria
+138 0 0 1 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+139 1 0 0 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+140 0 0 0 0 0 0 1 3 0 Root;Bacteria
+141 0 0 0 0 1 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+142 0 0 0 0 1 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+143 0 0 1 0 0 0 0 0 0 Root;Bacteria
+144 0 0 0 0 0 1 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+145 0 0 2 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+146 1 0 0 0 2 0 2 0 3 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Ruminococcaceae
+147 0 1 0 1 1 0 0 0 3 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+148 0 0 0 0 0 1 0 0 0 Root;Bacteria;Firmicutes
+149 0 0 0 0 0 0 0 1 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+150 0 0 0 0 1 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+151 0 0 0 1 0 0 0 1 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+152 0 0 0 1 0 0 1 2 19 Root;Bacteria;Bacteroidetes;Bacteroide\
+tes;Bacteroidales;Bacteroidaceae;Bacteroides
+153 0 2 1 2 0 0 1 1 1 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae;Lachnospiraceae Incertae Sedis
+154 2 18 0 1 0 0 21 4 4 Root;Bacteria;Bacteroidetes;Bacteroide\
+tes;Bacteroidales;Bacteroidaceae;Bacteroides
+155 0 0 0 0 0 5 9 5 3 Root;Bacteria;Bacteroidetes;Bacteroide\
+tes;Bacteroidales;Rikenellaceae;Alistipes
+156 0 0 1 0 0 0 0 1 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+157 0 0 1 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Ruminococcaceae
+158 1 0 1 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+159 0 0 0 0 0 0 0 1 1 Root;Bacteria;Bacteroidetes
+160 0 0 0 0 0 0 1 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+161 0 0 1 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+162 0 0 0 0 0 3 5 2 6 Root;Bacteria;Deferribacteres;Deferrib\
+acteres;Deferribacterales;Deferribacteraceae;Mucispirillum
+163 0 0 0 0 0 0 0 0 1 Root;Bacteria
+164 0 0 0 0 0 1 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+165 2 1 1 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+166 0 0 0 0 0 0 0 1 0 Root;Bacteria
+167 1 0 0 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+168 0 0 0 1 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+169 0 2 0 7 0 0 0 2 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+170 0 0 0 1 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+171 0 0 0 1 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+172 1 0 0 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae;Lachnospiraceae Incertae Sedis
+173 0 0 0 0 0 1 0 0 0 Root;Bacteria
+174 1 0 0 0 10 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Peptostreptococcaceae;Peptostreptococcaceae Incertae Sedis
+175 0 0 0 0 1 0 0 0 0 Root;Bacteria;Bacteroidetes
+176 0 0 0 0 0 1 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+177 0 0 0 1 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia
+178 0 0 0 2 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+179 0 0 0 1 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+180 0 0 0 0 1 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+181 1 4 2 6 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+182 0 0 0 0 0 1 0 0 0 Root;Bacteria
+183 0 0 0 0 0 0 1 0 0 Root;Bacteria;Firmicutes;Clostridia
+184 0 0 0 1 0 0 3 1 0 Root;Bacteria;Bacteroidetes
+185 0 0 0 0 0 0 0 0 1 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+186 0 0 1 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+187 0 1 0 0 0 0 0 0 1 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+188 0 0 0 0 0 0 0 1 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+189 0 0 0 1 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+190 0 0 0 0 0 0 0 1 0 Root;Bacteria
+191 2 1 10 2 24 0 0 1 1 Root;Bacteria
+192 0 0 0 0 0 1 0 0 0 Root;Bacteria;Firmicutes;Bacilli;Lacto\
+bacillales;Streptococcaceae;Streptococcus
+193 0 0 0 0 0 1 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae;Butyrivibrio
+194 0 0 2 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Ruminococcaceae;Acetanaerobacterium
+195 0 0 0 0 0 1 0 0 0 Root;Bacteria
+196 0 0 0 0 0 1 0 1 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+197 0 1 0 0 0 0 0 0 0 Root;Bacteria
+198 0 2 0 0 0 1 0 0 0 Root;Bacteria;Bacteroidetes;Bacteroide\
+tes;Bacteroidales
+199 0 0 0 0 0 1 1 0 0 Root;Bacteria
+200 0 0 0 2 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+201 0 0 0 1 0 1 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+202 0 0 0 0 0 0 1 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+203 0 2 2 4 0 5 1 5 0 Root;Bacteria;Bacteroidetes;Bacteroide\
+tes;Bacteroidales;Rikenellaceae;Alistipes
+204 1 4 0 1 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+205 0 0 0 0 0 0 0 1 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Ruminococcaceae
+206 0 1 0 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+207 0 0 0 0 0 0 0 1 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+208 0 2 0 2 0 0 0 1 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+209 0 0 1 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+210 0 0 0 0 0 0 0 0 1 Root;Bacteria
+211 1 0 0 1 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+212 0 0 0 0 0 0 0 0 1 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+213 0 0 0 0 0 0 0 2 0 Root;Bacteria;Firmicutes
+214 0 0 0 0 0 0 0 1 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+215 0 0 0 0 0 0 0 1 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+216 0 0 0 0 0 0 0 1 0 Root;Bacteria;Bacteroidetes
+217 0 0 0 0 0 2 0 1 0 Root;Bacteria
+218 0 0 0 0 9 1 0 0 0 Root;Bacteria;Bacteroidetes
+219 0 0 0 0 1 0 0 0 0 Root;Bacteria
+220 1 0 0 0 1 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+221 0 0 0 0 0 0 0 1 0 Root;Bacteria;Firmicutes
+222 0 1 0 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+223 0 0 0 0 0 0 0 2 2 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+224 0 0 0 1 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+225 0 2 1 0 0 0 0 0 0 Root;Bacteria;Bacteroidetes
+226 0 0 0 0 0 1 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+227 0 1 2 0 9 1 1 1 3 Root;Bacteria;Bacteroidetes
+228 16 0 0 0 12 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+229 0 0 0 0 0 1 1 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Incertae Sedis XIII
+230 0 0 0 1 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+231 0 19 2 0 2 0 3 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+232 0 0 0 0 0 0 1 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+233 0 0 0 0 1 0 0 0 0 Root;Bacteria;Bacteroidetes
+234 0 0 0 0 1 0 0 0 0 Root;Bacteria;Firmicutes;Bacilli;Lacto\
+bacillales;Lactobacillaceae;Lactobacillus
+235 0 1 1 0 1 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+236 0 0 0 0 0 2 0 0 0 Root;Bacteria;Bacteroidetes;Bacteroide\
+tes;Bacteroidales
+237 0 0 0 0 1 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+238 0 0 0 0 0 0 0 1 0 Root;Bacteria;Bacteroidetes;Bacteroide\
+tes;Bacteroidales;Rikenellaceae;Alistipes
+239 0 0 0 0 0 1 0 0 0 Root;Bacteria
+240 0 0 0 0 0 1 0 0 0 Root;Bacteria
+241 0 0 0 0 0 0 2 0 0 Root;Bacteria;TM7;TM7_genera_incertae_\
+sedis
+242 0 0 0 0 0 0 1 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+243 0 0 0 0 0 0 1 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+244 0 0 0 0 0 0 0 0 1 Root;Bacteria;Bacteroidetes
+245 0 0 0 1 0 0 0 1 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+246 0 0 0 0 0 0 0 1 0 Root;Bacteria
+247 0 0 1 0 0 0 0 0 0 Root;Bacteria;Bacteroidetes
+248 1 0 0 1 0 0 0 0 0 Root;Bacteria;Firmicutes;Bacilli;Lacto\
+bacillales;Lactobacillaceae;Lactobacillus
+249 1 0 0 0 0 0 0 0 0 Root;Bacteria
+250 1 0 0 0 0 0 0 1 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+251 0 0 0 1 4 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+252 0 0 0 1 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Ruminococcaceae
+253 0 0 0 0 2 0 0 5 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+254 11 13 6 13 2 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+255 0 0 0 0 0 1 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+256 0 0 0 0 0 0 1 0 0 Root;Bacteria
+257 0 0 0 0 0 0 5 0 0 Root;Bacteria;Bacteroidetes;Bacteroide\
+tes;Bacteroidales;Rikenellaceae;Alistipes
+258 0 0 1 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+259 0 0 0 0 0 0 0 1 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+260 0 0 0 0 0 0 0 1 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+261 0 0 0 0 0 0 0 1 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+262 0 1 0 0 0 0 0 0 1 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae;Bryantella
+263 0 0 0 0 1 0 0 0 0 Root;Bacteria
+264 0 0 0 0 0 1 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Ruminococcaceae
+265 0 0 0 0 0 2 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+266 0 0 0 2 0 0 0 0 0 Root;Bacteria;Bacteroidetes;Bacteroide\
+tes;Bacteroidales;Rikenellaceae;Alistipes
+267 1 0 0 5 17 20 0 0 0 Root;Bacteria
+268 0 0 0 0 0 0 1 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+269 0 0 0 1 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae;Lachnospiraceae Incertae Sedis
+270 0 0 1 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+271 0 0 0 0 0 0 0 0 1 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+272 0 0 0 1 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Ruminococcaceae
+273 0 0 0 0 0 0 1 0 0 Root;Bacteria
+274 0 0 0 0 0 0 1 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+275 0 0 0 0 0 0 1 0 0 Root;Bacteria;Verrucomicrobia;Verrucom\
+icrobiae;Verrucomicrobiales;Verrucomicrobiaceae;Akkermansia
+276 0 0 0 0 0 0 0 1 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Ruminococcaceae
+277 1 0 0 0 0 0 0 0 0 Root;Bacteria
+278 0 0 0 0 0 1 0 0 0 Root;Bacteria
+279 0 0 0 0 0 1 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+280 0 1 0 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+281 1 0 0 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae;Lachnospiraceae Incertae Sedis
+282 0 0 0 0 0 0 2 0 0 Root;Bacteria;Bacteroidetes;Bacteroide\
+tes;Bacteroidales;Porphyromonadaceae;Parabacteroides
+283 0 0 0 0 0 0 2 1 0 Root;Bacteria;Bacteroidetes;Bacteroide\
+tes;Bacteroidales;Bacteroidaceae;Bacteroides
+284 0 0 0 1 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+285 0 0 0 0 0 0 1 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+286 0 2 3 1 4 0 5 0 4 Root;Bacteria;Bacteroidetes
+287 0 0 0 0 0 0 1 1 1 Root;Bacteria;Bacteroidetes
+288 0 0 0 0 0 1 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+289 0 0 0 0 3 0 0 0 0 Root;Bacteria;Bacteroidetes;Bacteroide\
+tes;Bacteroidales;Bacteroidaceae;Bacteroides
+290 0 0 0 0 0 0 0 0 2 Root;Bacteria;Firmicutes;Bacilli;Bacil\
+lales;Staphylococcaceae;Staphylococcus
+291 0 0 0 0 1 0 0 0 0 Root;Bacteria
+292 0 0 0 0 1 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Ruminococcaceae
+293 0 0 0 0 0 1 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+294 0 1 0 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+295 29 1 10 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+296 0 0 0 0 1 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+297 0 0 0 1 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+298 0 0 0 0 0 0 1 0 0 Root;Bacteria;Actinobacteria;Actinobac\
+teria
+299 0 0 0 0 0 0 1 0 1 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+300 0 0 0 0 0 1 0 0 0 Root;Bacteria;Firmicutes;Clostridia
+301 0 0 0 0 0 0 2 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Ruminococcaceae
+302 0 0 0 0 0 1 0 0 0 Root;Bacteria
+303 0 0 0 0 0 0 0 0 1 Root;Bacteria
+304 0 0 0 0 0 0 0 1 0 Root;Bacteria;Bacteroidetes
+305 1 0 0 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+306 0 0 0 0 0 0 0 0 1 Root;Bacteria
+307 0 0 1 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+308 0 1 0 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Ruminococcaceae;Ruminococcaceae Incertae Sedis
+309 0 0 0 1 0 0 0 0 0 Root;Bacteria;Actinobacteria;Actinobac\
+teria;Coriobacteridae;Coriobacteriales;Coriobacterineae;Coriobacteriaceae;Deni\
+trobacterium
+310 0 0 1 0 0 0 0 0 0 Root;Bacteria
+311 0 0 0 0 0 1 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+312 0 0 1 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+313 0 1 0 0 0 0 0 0 1 Root;Bacteria;Bacteroidetes;Bacteroide\
+tes;Bacteroidales;Porphyromonadaceae;Parabacteroides
+314 0 0 1 0 0 0 0 0 0 Root;Bacteria;Bacteroidetes
+315 1 3 1 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+316 0 1 0 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+317 0 0 0 0 0 0 1 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Ruminococcaceae
+318 0 0 0 0 0 1 0 0 0 Root;Bacteria;Proteobacteria
+319 0 2 1 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+320 0 0 0 1 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+321 0 0 0 0 0 0 0 0 1 Root;Bacteria
+322 0 0 0 1 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+323 0 0 1 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+324 0 0 1 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+325 0 1 0 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+326 0 0 0 0 4 0 0 0 2 Root;Bacteria;Firmicutes;Erysipelotric\
+hi;Erysipelotrichales;Erysipelotrichaceae;Erysipelotrichaceae Incertae Sedis
+327 0 0 0 0 0 0 0 1 0 Root;Bacteria;Bacteroidetes
+328 0 0 0 1 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+329 2 2 0 1 0 0 0 0 0 Root;Bacteria;Bacteroidetes
+330 0 0 1 0 0 0 0 0 0 Root;Bacteria;Firmicutes
+331 0 0 0 0 1 0 0 0 0 Root;Bacteria;Firmicutes
+332 0 1 0 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+333 0 0 0 0 0 6 0 3 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+334 1 0 0 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Ruminococcaceae
+335 0 0 0 0 0 0 0 1 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+336 0 0 1 0 0 0 0 0 0 Root;Bacteria
+337 0 0 0 1 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+338 0 0 0 0 0 0 0 1 0 Root;Bacteria
+339 0 0 1 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+340 0 0 2 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+341 0 0 1 0 0 0 0 1 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+342 0 0 0 0 0 1 0 0 0 Root;Bacteria
+343 0 0 0 0 0 0 0 0 1 Root;Bacteria;Actinobacteria;Actinobac\
+teria;Coriobacteridae;Coriobacteriales;Coriobacterineae;Coriobacteriaceae
+344 0 0 1 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Ruminococcaceae
+345 1 0 0 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+346 0 1 0 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+347 0 0 0 1 0 0 0 0 0 Root;Bacteria
+348 0 0 0 1 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Ruminococcaceae
+349 0 0 0 0 0 0 1 0 1 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+350 1 0 0 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Ruminococcaceae
+351 0 0 0 0 2 2 1 4 1 Root;Bacteria;Bacteroidetes
+352 3 0 0 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+353 0 4 4 0 1 2 0 2 1 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+354 0 0 0 0 0 1 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+355 0 0 0 0 0 0 0 1 0 Root;Bacteria
+356 0 0 0 0 0 1 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Ruminococcaceae
+357 0 0 0 4 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+358 0 0 1 0 0 0 0 0 0 Root;Bacteria
+359 0 0 1 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+360 0 0 1 0 0 0 0 1 1 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+361 2 0 2 1 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+362 1 0 0 1 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Ruminococcaceae
+363 0 0 0 0 0 1 0 1 0 Root;Bacteria;Bacteroidetes;Bacteroide\
+tes;Bacteroidales;Rikenellaceae
+364 1 0 0 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+365 0 0 0 0 0 2 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+366 0 0 0 1 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae;Roseburia
+367 0 0 0 0 1 0 0 0 0 Root;Bacteria;Bacteroidetes;Bacteroide\
+tes;Bacteroidales;Bacteroidaceae;Bacteroides
+368 0 0 0 0 0 1 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+369 0 0 0 0 0 1 0 0 0 Root;Bacteria
+370 2 1 0 5 0 1 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+371 1 1 0 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+372 0 1 0 0 0 0 0 0 0 Root;Bacteria
+373 0 1 0 0 0 0 3 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Clostridiaceae;Clostridiaceae 1;Clostridium
+374 0 0 0 0 0 0 1 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+375 0 0 0 0 0 0 4 0 0 Root;Bacteria;Firmicutes;Erysipelotric\
+hi;Erysipelotrichales;Erysipelotrichaceae;Erysipelotrichaceae Incertae Sedis
+376 0 0 0 0 0 0 0 0 1 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+377 0 0 0 0 0 0 0 1 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Ruminococcaceae
+378 0 0 0 0 0 0 0 0 1 Root;Bacteria;Bacteroidetes
+379 0 0 0 0 0 1 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Ruminococcaceae
+380 0 0 0 0 0 0 0 0 1 Root;Bacteria;Firmicutes;Bacilli;Bacil\
+lales;Staphylococcaceae;Staphylococcus
+381 0 0 2 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+382 0 0 0 0 0 0 0 1 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+383 4 9 0 2 0 0 0 2 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+384 0 0 1 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+385 0 0 0 0 0 0 0 0 1 Root;Bacteria;Firmicutes;Bacilli;Lacto\
+bacillales;Carnobacteriaceae;Carnobacteriaceae 1
+386 0 0 1 0 0 0 0 0 0 Root;Bacteria
+387 0 0 1 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+388 0 0 0 1 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+389 0 1 0 0 0 0 0 0 0 Root;Bacteria
+390 0 0 0 0 0 0 0 0 1 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+391 0 0 0 0 0 0 0 0 1 Root;Bacteria;Firmicutes
+392 0 1 0 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+393 0 0 0 0 0 1 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+394 0 0 1 0 0 0 0 0 0 Root;Bacteria
+395 1 1 1 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+396 2 0 0 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+397 0 0 0 0 0 0 0 1 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+398 0 0 0 0 0 0 0 1 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+399 0 0 0 0 0 0 13 0 0 Root;Bacteria;Bacteroidetes;Bacteroide\
+tes;Bacteroidales;Bacteroidaceae;Bacteroides
+400 0 0 0 0 0 0 1 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+401 0 1 0 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+402 0 1 0 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+403 0 0 0 0 0 0 0 1 0 Root;Bacteria;Bacteroidetes;Bacteroide\
+tes;Bacteroidales;Prevotellaceae
+404 0 0 0 0 0 0 0 1 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae;Lachnospiraceae Incertae Sedis
+405 0 0 0 0 0 0 0 1 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+406 0 0 0 0 0 1 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+407 1 0 0 0 0 4 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+408 1 5 3 2 0 0 0 0 1 Root;Bacteria;Bacteroidetes
+409 0 0 0 0 0 0 0 1 1 Root;Bacteria;Bacteroidetes
+410 0 0 0 0 1 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+411 0 0 0 1 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+412 0 0 0 0 2 0 0 0 0 Root;Bacteria;Bacteroidetes
+413 0 0 0 0 0 0 0 1 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales
+414 1 0 1 0 0 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales;Lachnospiraceae
+415 0 0 0 0 0 7 0 2 2 Root;Bacteria;Bacteroidetes
+416 0 1 0 0 1 0 0 0 0 Root;Bacteria;Firmicutes;Clostridia;Cl\
+ostridiales"""
+
+classic_otu_table1_no_tax = """#Full OTU Counts
+#OTU ID PC.354 PC.355 PC.356 PC.481 PC.593 PC.607 PC.634 PC.635 PC.636
+0 0 0 0 0 0 0 0 1 0
+1 0 0 0 0 0 1 0 0 0
+2 0 0 0 0 0 0 0 0 1
+3 2 1 0 0 0 0 0 0 0
+4 1 0 0 0 0 0 0 0 0
+5 0 0 0 0 0 0 0 0 1
+6 0 0 0 0 0 0 0 1 0
+7 0 0 2 0 0 0 0 0 2
+8 1 1 0 2 4 0 0 0 0
+9 0 0 2 0 0 0 0 0 0
+10 0 1 0 0 0 0 0 0 0
+11 0 0 0 0 0 0 1 0 0
+12 0 0 0 0 0 0 1 0 0
+13 1 0 0 1 0 1 0 0 0
+14 0 0 1 1 0 0 0 0 0
+15 0 0 0 0 1 0 0 0 0
+16 1 0 2 0 0 0 0 0 0
+17 0 0 0 1 0 0 4 10 37
+18 0 1 0 0 0 0 0 0 0
+19 0 0 0 0 0 0 0 0 1
+20 0 0 0 0 1 0 0 0 0
+21 0 0 0 0 0 0 2 3 2
+22 0 0 0 0 2 0 1 0 0
+23 14 1 14 1 0 0 0 0 0
+24 1 0 0 0 0 0 0 0 0
+25 0 0 0 1 0 0 0 0 0
+26 0 0 0 0 0 0 0 1 1
+27 0 0 0 0 0 0 0 0 1
+28 0 1 0 0 0 0 0 0 0
+29 6 0 4 0 2 0 0 0 0
+30 0 0 0 0 0 1 0 0 0
+31 1 0 0 0 0 0 0 0 0
+32 0 0 0 0 1 0 0 0 0
+33 0 0 0 1 0 0 0 0 0
+34 0 0 0 0 0 0 8 10 2
+35 1 0 1 0 0 0 0 0 0
+36 1 0 1 0 0 0 0 1 1
+37 0 0 0 0 0 1 0 0 0
+38 0 0 1 0 0 0 0 0 0
+39 0 0 0 0 0 0 0 1 0
+40 0 0 1 0 0 0 0 0 0
+41 0 0 1 0 0 0 0 1 0
+42 0 0 0 0 0 1 0 0 0
+43 0 0 0 0 0 1 0 0 0
+44 0 0 1 0 0 0 0 0 0
+45 1 0 0 0 0 0 0 0 0
+46 0 0 0 0 0 0 0 0 1
+47 0 0 0 1 0 0 0 0 0
+48 0 0 0 0 1 0 0 0 0
+49 0 0 0 1 0 0 0 0 0
+50 0 1 0 0 0 0 0 0 0
+51 0 1 0 0 0 0 0 0 0
+52 0 2 0 0 0 0 0 0 0
+53 0 0 0 0 0 0 2 0 1
+54 0 0 0 0 0 0 5 0 0
+55 0 0 0 0 0 0 1 0 0
+56 0 0 0 0 0 1 0 0 0
+57 0 0 0 0 0 0 0 1 0
+58 1 0 1 0 0 0 0 0 0
+59 0 0 0 0 0 0 0 0 1
+60 0 0 0 0 0 0 0 1 0
+61 0 0 1 0 0 0 0 1 0
+62 0 0 1 0 0 0 0 0 0
+63 1 0 1 0 0 0 0 0 0
+64 0 0 0 0 0 0 0 0 1
+65 0 0 0 6 0 0 0 1 0
+66 0 0 1 0 0 0 0 0 0
+67 0 0 1 0 0 0 0 0 0
+68 1 0 0 0 0 0 0 0 0
+69 0 0 1 0 0 0 0 0 0
+70 0 0 0 0 0 1 0 0 0
+71 0 0 1 0 0 0 0 0 0
+72 0 0 0 0 0 1 0 0 0
+73 0 0 0 0 0 5 0 0 0
+74 0 0 0 1 0 0 0 0 0
+75 1 0 1 0 0 0 0 0 0
+76 0 0 0 1 0 0 0 0 0
+77 0 0 0 1 0 0 0 0 0
+78 1 0 1 1 0 0 0 0 0
+79 2 3 8 0 1 0 0 0 0
+80 0 0 0 0 0 0 0 0 1
+81 1 0 0 0 0 0 0 0 0
+82 0 0 0 0 0 2 0 0 0
+83 0 0 0 1 0 0 0 1 0
+84 1 0 0 0 0 0 0 2 0
+85 0 0 0 0 0 0 0 0 1
+86 0 0 0 0 0 0 0 1 0
+87 0 0 1 0 0 2 0 1 0
+88 0 0 0 0 0 0 0 1 0
+89 0 0 1 0 0 0 0 0 0
+90 0 0 0 9 0 0 3 0 0
+91 0 0 0 1 0 0 0 0 0
+92 0 0 0 0 0 0 1 0 0
+93 0 0 0 0 0 0 2 1 0
+94 0 0 0 0 0 0 0 1 0
+95 0 0 0 2 0 0 0 0 0
+96 0 0 0 1 0 1 0 1 1
+97 0 0 0 0 0 1 0 0 0
+98 0 0 0 0 0 0 0 1 0
+99 0 0 0 1 0 0 0 0 0
+100 0 0 0 1 0 0 0 0 0
+101 0 0 0 3 0 0 0 0 0
+102 0 1 0 0 0 0 0 0 0
+103 0 1 0 0 0 0 1 0 0
+104 0 0 0 0 0 1 0 0 0
+105 0 1 0 0 0 0 0 0 0
+106 0 0 0 0 0 1 0 0 0
+107 0 0 0 0 0 1 0 0 0
+108 0 0 0 0 0 0 1 0 0
+109 0 0 0 1 0 0 1 5 2
+110 0 0 0 0 0 2 0 0 0
+111 0 0 0 0 0 0 1 0 0
+112 0 0 0 0 0 0 1 0 0
+113 0 0 0 0 0 1 0 0 0
+114 0 0 0 0 0 1 0 0 0
+115 0 0 0 0 0 1 0 0 0
+116 0 1 0 0 0 0 0 0 0
+117 1 0 2 0 0 6 0 0 0
+118 0 0 0 1 0 0 0 0 0
+119 0 0 0 0 0 0 0 1 0
+120 1 3 1 2 1 9 2 4 5
+121 0 0 0 0 0 0 0 1 0
+122 0 0 0 1 0 2 0 0 0
+123 0 0 0 0 0 0 1 0 0
+124 0 0 0 0 0 0 1 0 0
+125 0 0 0 0 0 0 1 0 0
+126 0 0 2 0 0 0 0 1 0
+127 0 0 0 0 0 1 0 0 0
+128 0 0 0 0 0 0 1 0 0
+129 0 0 0 1 0 0 0 0 0
+130 0 0 0 0 5 2 0 0 0
+131 0 0 1 3 0 0 0 0 0
+132 0 0 0 0 1 0 0 0 0
+133 0 0 1 0 0 0 0 0 0
+134 0 0 0 0 0 0 0 0 1
+135 0 0 1 0 0 0 0 0 0
+136 1 0 0 0 0 0 0 0 0
+137 0 0 0 0 0 0 0 1 0
+138 0 0 1 0 0 0 0 0 0
+139 1 0 0 0 0 0 0 0 0
+140 0 0 0 0 0 0 1 3 0
+141 0 0 0 0 1 0 0 0 0
+142 0 0 0 0 1 0 0 0 0
+143 0 0 1 0 0 0 0 0 0
+144 0 0 0 0 0 1 0 0 0
+145 0 0 2 0 0 0 0 0 0
+146 1 0 0 0 2 0 2 0 3
+147 0 1 0 1 1 0 0 0 3
+148 0 0 0 0 0 1 0 0 0
+149 0 0 0 0 0 0 0 1 0
+150 0 0 0 0 1 0 0 0 0
+151 0 0 0 1 0 0 0 1 0
+152 0 0 0 1 0 0 1 2 19
+153 0 2 1 2 0 0 1 1 1
+154 2 18 0 1 0 0 21 4 4
+155 0 0 0 0 0 5 9 5 3
+156 0 0 1 0 0 0 0 1 0
+157 0 0 1 0 0 0 0 0 0
+158 1 0 1 0 0 0 0 0 0
+159 0 0 0 0 0 0 0 1 1
+160 0 0 0 0 0 0 1 0 0
+161 0 0 1 0 0 0 0 0 0
+162 0 0 0 0 0 3 5 2 6
+163 0 0 0 0 0 0 0 0 1
+164 0 0 0 0 0 1 0 0 0
+165 2 1 1 0 0 0 0 0 0
+166 0 0 0 0 0 0 0 1 0
+167 1 0 0 0 0 0 0 0 0
+168 0 0 0 1 0 0 0 0 0
+169 0 2 0 7 0 0 0 2 0
+170 0 0 0 1 0 0 0 0 0
+171 0 0 0 1 0 0 0 0 0
+172 1 0 0 0 0 0 0 0 0
+173 0 0 0 0 0 1 0 0 0
+174 1 0 0 0 10 0 0 0 0
+175 0 0 0 0 1 0 0 0 0
+176 0 0 0 0 0 1 0 0 0
+177 0 0 0 1 0 0 0 0 0
+178 0 0 0 2 0 0 0 0 0
+179 0 0 0 1 0 0 0 0 0
+180 0 0 0 0 1 0 0 0 0
+181 1 4 2 6 0 0 0 0 0
+182 0 0 0 0 0 1 0 0 0
+183 0 0 0 0 0 0 1 0 0
+184 0 0 0 1 0 0 3 1 0
+185 0 0 0 0 0 0 0 0 1
+186 0 0 1 0 0 0 0 0 0
+187 0 1 0 0 0 0 0 0 1
+188 0 0 0 0 0 0 0 1 0
+189 0 0 0 1 0 0 0 0 0
+190 0 0 0 0 0 0 0 1 0
+191 2 1 10 2 24 0 0 1 1
+192 0 0 0 0 0 1 0 0 0
+193 0 0 0 0 0 1 0 0 0
+194 0 0 2 0 0 0 0 0 0
+195 0 0 0 0 0 1 0 0 0
+196 0 0 0 0 0 1 0 1 0
+197 0 1 0 0 0 0 0 0 0
+198 0 2 0 0 0 1 0 0 0
+199 0 0 0 0 0 1 1 0 0
+200 0 0 0 2 0 0 0 0 0
+201 0 0 0 1 0 1 0 0 0
+202 0 0 0 0 0 0 1 0 0
+203 0 2 2 4 0 5 1 5 0
+204 1 4 0 1 0 0 0 0 0
+205 0 0 0 0 0 0 0 1 0
+206 0 1 0 0 0 0 0 0 0
+207 0 0 0 0 0 0 0 1 0
+208 0 2 0 2 0 0 0 1 0
+209 0 0 1 0 0 0 0 0 0
+210 0 0 0 0 0 0 0 0 1
+211 1 0 0 1 0 0 0 0 0
+212 0 0 0 0 0 0 0 0 1
+213 0 0 0 0 0 0 0 2 0
+214 0 0 0 0 0 0 0 1 0
+215 0 0 0 0 0 0 0 1 0
+216 0 0 0 0 0 0 0 1 0
+217 0 0 0 0 0 2 0 1 0
+218 0 0 0 0 9 1 0 0 0
+219 0 0 0 0 1 0 0 0 0
+220 1 0 0 0 1 0 0 0 0
+221 0 0 0 0 0 0 0 1 0
+222 0 1 0 0 0 0 0 0 0
+223 0 0 0 0 0 0 0 2 2
+224 0 0 0 1 0 0 0 0 0
+225 0 2 1 0 0 0 0 0 0
+226 0 0 0 0 0 1 0 0 0
+227 0 1 2 0 9 1 1 1 3
+228 16 0 0 0 12 0 0 0 0
+229 0 0 0 0 0 1 1 0 0
+230 0 0 0 1 0 0 0 0 0
+231 0 19 2 0 2 0 3 0 0
+232 0 0 0 0 0 0 1 0 0
+233 0 0 0 0 1 0 0 0 0
+234 0 0 0 0 1 0 0 0 0
+235 0 1 1 0 1 0 0 0 0
+236 0 0 0 0 0 2 0 0 0
+237 0 0 0 0 1 0 0 0 0
+238 0 0 0 0 0 0 0 1 0
+239 0 0 0 0 0 1 0 0 0
+240 0 0 0 0 0 1 0 0 0
+241 0 0 0 0 0 0 2 0 0
+242 0 0 0 0 0 0 1 0 0
+243 0 0 0 0 0 0 1 0 0
+244 0 0 0 0 0 0 0 0 1
+245 0 0 0 1 0 0 0 1 0
+246 0 0 0 0 0 0 0 1 0
+247 0 0 1 0 0 0 0 0 0
+248 1 0 0 1 0 0 0 0 0
+249 1 0 0 0 0 0 0 0 0
+250 1 0 0 0 0 0 0 1 0
+251 0 0 0 1 4 0 0 0 0
+252 0 0 0 1 0 0 0 0 0
+253 0 0 0 0 2 0 0 5 0
+254 11 13 6 13 2 0 0 0 0
+255 0 0 0 0 0 1 0 0 0
+256 0 0 0 0 0 0 1 0 0
+257 0 0 0 0 0 0 5 0 0
+258 0 0 1 0 0 0 0 0 0
+259 0 0 0 0 0 0 0 1 0
+260 0 0 0 0 0 0 0 1 0
+261 0 0 0 0 0 0 0 1 0
+262 0 1 0 0 0 0 0 0 1
+263 0 0 0 0 1 0 0 0 0
+264 0 0 0 0 0 1 0 0 0
+265 0 0 0 0 0 2 0 0 0
+266 0 0 0 2 0 0 0 0 0
+267 1 0 0 5 17 20 0 0 0
+268 0 0 0 0 0 0 1 0 0
+269 0 0 0 1 0 0 0 0 0
+270 0 0 1 0 0 0 0 0 0
+271 0 0 0 0 0 0 0 0 1
+272 0 0 0 1 0 0 0 0 0
+273 0 0 0 0 0 0 1 0 0
+274 0 0 0 0 0 0 1 0 0
+275 0 0 0 0 0 0 1 0 0
+276 0 0 0 0 0 0 0 1 0
+277 1 0 0 0 0 0 0 0 0
+278 0 0 0 0 0 1 0 0 0
+279 0 0 0 0 0 1 0 0 0
+280 0 1 0 0 0 0 0 0 0
+281 1 0 0 0 0 0 0 0 0
+282 0 0 0 0 0 0 2 0 0
+283 0 0 0 0 0 0 2 1 0
+284 0 0 0 1 0 0 0 0 0
+285 0 0 0 0 0 0 1 0 0
+286 0 2 3 1 4 0 5 0 4
+287 0 0 0 0 0 0 1 1 1
+288 0 0 0 0 0 1 0 0 0
+289 0 0 0 0 3 0 0 0 0
+290 0 0 0 0 0 0 0 0 2
+291 0 0 0 0 1 0 0 0 0
+292 0 0 0 0 1 0 0 0 0
+293 0 0 0 0 0 1 0 0 0
+294 0 1 0 0 0 0 0 0 0
+295 29 1 10 0 0 0 0 0 0
+296 0 0 0 0 1 0 0 0 0
+297 0 0 0 1 0 0 0 0 0
+298 0 0 0 0 0 0 1 0 0
+299 0 0 0 0 0 0 1 0 1
+300 0 0 0 0 0 1 0 0 0
+301 0 0 0 0 0 0 2 0 0
+302 0 0 0 0 0 1 0 0 0
+303 0 0 0 0 0 0 0 0 1
+304 0 0 0 0 0 0 0 1 0
+305 1 0 0 0 0 0 0 0 0
+306 0 0 0 0 0 0 0 0 1
+307 0 0 1 0 0 0 0 0 0
+308 0 1 0 0 0 0 0 0 0
+309 0 0 0 1 0 0 0 0 0
+310 0 0 1 0 0 0 0 0 0
+311 0 0 0 0 0 1 0 0 0
+312 0 0 1 0 0 0 0 0 0
+313 0 1 0 0 0 0 0 0 1
+314 0 0 1 0 0 0 0 0 0
+315 1 3 1 0 0 0 0 0 0
+316 0 1 0 0 0 0 0 0 0
+317 0 0 0 0 0 0 1 0 0
+318 0 0 0 0 0 1 0 0 0
+319 0 2 1 0 0 0 0 0 0
+320 0 0 0 1 0 0 0 0 0
+321 0 0 0 0 0 0 0 0 1
+322 0 0 0 1 0 0 0 0 0
+323 0 0 1 0 0 0 0 0 0
+324 0 0 1 0 0 0 0 0 0
+325 0 1 0 0 0 0 0 0 0
+326 0 0 0 0 4 0 0 0 2
+327 0 0 0 0 0 0 0 1 0
+328 0 0 0 1 0 0 0 0 0
+329 2 2 0 1 0 0 0 0 0
+330 0 0 1 0 0 0 0 0 0
+331 0 0 0 0 1 0 0 0 0
+332 0 1 0 0 0 0 0 0 0
+333 0 0 0 0 0 6 0 3 0
+334 1 0 0 0 0 0 0 0 0
+335 0 0 0 0 0 0 0 1 0
+336 0 0 1 0 0 0 0 0 0
+337 0 0 0 1 0 0 0 0 0
+338 0 0 0 0 0 0 0 1 0
+339 0 0 1 0 0 0 0 0 0
+340 0 0 2 0 0 0 0 0 0
+341 0 0 1 0 0 0 0 1 0
+342 0 0 0 0 0 1 0 0 0
+343 0 0 0 0 0 0 0 0 1
+344 0 0 1 0 0 0 0 0 0
+345 1 0 0 0 0 0 0 0 0
+346 0 1 0 0 0 0 0 0 0
+347 0 0 0 1 0 0 0 0 0
+348 0 0 0 1 0 0 0 0 0
+349 0 0 0 0 0 0 1 0 1
+350 1 0 0 0 0 0 0 0 0
+351 0 0 0 0 2 2 1 4 1
+352 3 0 0 0 0 0 0 0 0
+353 0 4 4 0 1 2 0 2 1
+354 0 0 0 0 0 1 0 0 0
+355 0 0 0 0 0 0 0 1 0
+356 0 0 0 0 0 1 0 0 0
+357 0 0 0 4 0 0 0 0 0
+358 0 0 1 0 0 0 0 0 0
+359 0 0 1 0 0 0 0 0 0
+360 0 0 1 0 0 0 0 1 1
+361 2 0 2 1 0 0 0 0 0
+362 1 0 0 1 0 0 0 0 0
+363 0 0 0 0 0 1 0 1 0
+364 1 0 0 0 0 0 0 0 0
+365 0 0 0 0 0 2 0 0 0
+366 0 0 0 1 0 0 0 0 0
+367 0 0 0 0 1 0 0 0 0
+368 0 0 0 0 0 1 0 0 0
+369 0 0 0 0 0 1 0 0 0
+370 2 1 0 5 0 1 0 0 0
+371 1 1 0 0 0 0 0 0 0
+372 0 1 0 0 0 0 0 0 0
+373 0 1 0 0 0 0 3 0 0
+374 0 0 0 0 0 0 1 0 0
+375 0 0 0 0 0 0 4 0 0
+376 0 0 0 0 0 0 0 0 1
+377 0 0 0 0 0 0 0 1 0
+378 0 0 0 0 0 0 0 0 1
+379 0 0 0 0 0 1 0 0 0
+380 0 0 0 0 0 0 0 0 1
+381 0 0 2 0 0 0 0 0 0
+382 0 0 0 0 0 0 0 1 0
+383 4 9 0 2 0 0 0 2 0
+384 0 0 1 0 0 0 0 0 0
+385 0 0 0 0 0 0 0 0 1
+386 0 0 1 0 0 0 0 0 0
+387 0 0 1 0 0 0 0 0 0
+388 0 0 0 1 0 0 0 0 0
+389 0 1 0 0 0 0 0 0 0
+390 0 0 0 0 0 0 0 0 1
+391 0 0 0 0 0 0 0 0 1
+392 0 1 0 0 0 0 0 0 0
+393 0 0 0 0 0 1 0 0 0
+394 0 0 1 0 0 0 0 0 0
+395 1 1 1 0 0 0 0 0 0
+396 2 0 0 0 0 0 0 0 0
+397 0 0 0 0 0 0 0 1 0
+398 0 0 0 0 0 0 0 1 0
+399 0 0 0 0 0 0 13 0 0
+400 0 0 0 0 0 0 1 0 0
+401 0 1 0 0 0 0 0 0 0
+402 0 1 0 0 0 0 0 0 0
+403 0 0 0 0 0 0 0 1 0
+404 0 0 0 0 0 0 0 1 0
+405 0 0 0 0 0 0 0 1 0
+406 0 0 0 0 0 1 0 0 0
+407 1 0 0 0 0 4 0 0 0
+408 1 5 3 2 0 0 0 0 1
+409 0 0 0 0 0 0 0 1 1
+410 0 0 0 0 1 0 0 0 0
+411 0 0 0 1 0 0 0 0 0
+412 0 0 0 0 2 0 0 0 0
+413 0 0 0 0 0 0 0 1 0
+414 1 0 1 0 0 0 0 0 0
+415 0 0 0 0 0 7 0 2 2
+416 0 1 0 0 1 0 0 0 0"""
+
+
+class ParseUcTests(TestCase):
+
+ def test_empty(self):
+ """ empty uc file returns empty Table
+ """
+ actual = parse_uc(uc_empty.split('\n'))
+ expected = Table(np.array([[]]),
+ observation_ids=[],
+ sample_ids=[])
+ self.assertEqual(actual, expected)
+
+ def test_minimal(self):
+ """ single new seed observed
+ """
+ actual = parse_uc(uc_minimal.split('\n'))
+ expected = Table(np.array([[1.0]]),
+ observation_ids=['f2_1539'],
+ sample_ids=['f2'])
+ self.assertEqual(actual, expected)
+
+ def test_lib_minimal(self):
+ """ single library seed observed
+ """
+ actual = parse_uc(uc_lib_minimal.split('\n'))
+ expected = Table(np.array([[1.0]]),
+ observation_ids=['295053'],
+ sample_ids=['f2'])
+ self.assertEqual(actual, expected)
+
+ def test_invalid(self):
+ """ invalid query sequence identifier detected
+ """
+ self.assertRaises(ValueError, parse_uc, uc_invalid_id.split('\n'))
+
+ def test_seed_hits(self):
+ """ multiple new seeds observed
+ """
+ actual = parse_uc(uc_seed_hits.split('\n'))
+ expected = Table(np.array([[2.0, 1.0], [0.0, 1.0]]),
+ observation_ids=['f2_1539', 'f3_44'],
+ sample_ids=['f2', 'f3'])
+ self.assertEqual(actual, expected)
+
+ def test_mixed_hits(self):
+ """ new and library seeds observed
+ """
+ actual = parse_uc(uc_mixed_hits.split('\n'))
+ expected = Table(np.array([[2.0, 1.0], [0.0, 1.0], [1.0, 0.0]]),
+ observation_ids=['f2_1539', 'f3_44', '295053'],
+ sample_ids=['f2', 'f3'])
+ self.assertEqual(actual, expected)
+
+
+# no hits or library seeds
+uc_empty = """# uclust --input /var/folders/xq/0kh93ng53bs6zzk091w_bbsr0000gn/T/UclustExactMatchFilterrW47Ju.fasta --id 0.97 --tmpdir /var/folders/xq/0kh93ng53bs6zzk091w_bbsr0000gn/T --w 8 --stepwords 8 --usersort --maxaccepts 1 --stable_sort --maxrejects 8 --uc dn-otus/uclust_picked_otus/seqs_clusters.uc
+# version=1.2.22
+# Tab-separated fields:
+# 1=Type, 2=ClusterNr, 3=SeqLength or ClusterSize, 4=PctId, 5=Strand, 6=QueryStart, 7=SeedStart, 8=Alignment, 9=QueryLabel, 10=TargetLabel
+# Record types (field 1): L=LibSeed, S=NewSeed, H=Hit, R=Reject, D=LibCluster, C=NewCluster, N=NoHit
+# For C and D types, PctId is average id with seed.
+# QueryStart and SeedStart are zero-based relative to start of sequence.
+# If minus strand, SeedStart is relative to reverse-complemented seed.
+"""
+
+# label not in qiime post-split-libraries format
+uc_invalid_id = """# uclust --input /var/folders/xq/0kh93ng53bs6zzk091w_bbsr0000gn/T/UclustExactMatchFilterrW47Ju.fasta --id 0.97 --tmpdir /var/folders/xq/0kh93ng53bs6zzk091w_bbsr0000gn/T --w 8 --stepwords 8 --usersort --maxaccepts 1 --stable_sort --maxrejects 8 --uc dn-otus/uclust_picked_otus/seqs_clusters.uc
+# version=1.2.22
+# Tab-separated fields:
+# 1=Type, 2=ClusterNr, 3=SeqLength or ClusterSize, 4=PctId, 5=Strand, 6=QueryStart, 7=SeedStart, 8=Alignment, 9=QueryLabel, 10=TargetLabel
+# Record types (field 1): L=LibSeed, S=NewSeed, H=Hit, R=Reject, D=LibCluster, C=NewCluster, N=NoHit
+# For C and D types, PctId is average id with seed.
+# QueryStart and SeedStart are zero-based relative to start of sequence.
+# If minus strand, SeedStart is relative to reverse-complemented seed.
+S 0 133 * * * * * 1539 *
+"""
+
+# contains single new (de novo) seed hit
+uc_minimal = """# uclust --input /var/folders/xq/0kh93ng53bs6zzk091w_bbsr0000gn/T/UclustExactMatchFilterrW47Ju.fasta --id 0.97 --tmpdir /var/folders/xq/0kh93ng53bs6zzk091w_bbsr0000gn/T --w 8 --stepwords 8 --usersort --maxaccepts 1 --stable_sort --maxrejects 8 --uc dn-otus/uclust_picked_otus/seqs_clusters.uc
+# version=1.2.22
+# Tab-separated fields:
+# 1=Type, 2=ClusterNr, 3=SeqLength or ClusterSize, 4=PctId, 5=Strand, 6=QueryStart, 7=SeedStart, 8=Alignment, 9=QueryLabel, 10=TargetLabel
+# Record types (field 1): L=LibSeed, S=NewSeed, H=Hit, R=Reject, D=LibCluster, C=NewCluster, N=NoHit
+# For C and D types, PctId is average id with seed.
+# QueryStart and SeedStart are zero-based relative to start of sequence.
+# If minus strand, SeedStart is relative to reverse-complemented seed.
+S 0 133 * * * * * f2_1539 *
+"""
+
+# contains single library (reference) seed hit
+uc_lib_minimal = """# uclust --input /var/folders/xq/0kh93ng53bs6zzk091w_bbsr0000gn/T/UclustExactMatchFilterrW47Ju.fasta --id 0.97 --tmpdir /var/folders/xq/0kh93ng53bs6zzk091w_bbsr0000gn/T --w 8 --stepwords 8 --usersort --maxaccepts 1 --stable_sort --maxrejects 8 --uc dn-otus/uclust_picked_otus/seqs_clusters.uc
+# version=1.2.22
+# Tab-separated fields:
+# 1=Type, 2=ClusterNr, 3=SeqLength or ClusterSize, 4=PctId, 5=Strand, 6=QueryStart, 7=SeedStart, 8=Alignment, 9=QueryLabel, 10=TargetLabel
+# Record types (field 1): L=LibSeed, S=NewSeed, H=Hit, R=Reject, D=LibCluster, C=NewCluster, N=NoHit
+# For C and D types, PctId is average id with seed.
+# QueryStart and SeedStart are zero-based relative to start of sequence.
+# If minus strand, SeedStart is relative to reverse-complemented seed.
+L 3 1389 * * * * * 295053 *
+H 3 133 100.0 + 0 0 519I133M737I f2_1539 295053
+"""
+
+# contains new seed (de novo) hits only
+uc_seed_hits = """# uclust --input /var/folders/xq/0kh93ng53bs6zzk091w_bbsr0000gn/T/UclustExactMatchFilterrW47Ju.fasta --id 0.97 --tmpdir /var/folders/xq/0kh93ng53bs6zzk091w_bbsr0000gn/T --w 8 --stepwords 8 --usersort --maxaccepts 1 --stable_sort --maxrejects 8 --uc dn-otus/uclust_picked_otus/seqs_clusters.uc
+# version=1.2.22
+# Tab-separated fields:
+# 1=Type, 2=ClusterNr, 3=SeqLength or ClusterSize, 4=PctId, 5=Strand, 6=QueryStart, 7=SeedStart, 8=Alignment, 9=QueryLabel, 10=TargetLabel
+# Record types (field 1): L=LibSeed, S=NewSeed, H=Hit, R=Reject, D=LibCluster, C=NewCluster, N=NoHit
+# For C and D types, PctId is average id with seed.
+# QueryStart and SeedStart are zero-based relative to start of sequence.
+# If minus strand, SeedStart is relative to reverse-complemented seed.
+S 0 133 * * * * * f2_1539 *
+H 0 141 100.0 + 0 0 133M8D f3_42 f2_1539
+H 0 141 100.0 + 0 0 133M8D f2_43 f2_1539
+S 0 133 * * * * * f3_44 *
+"""
+
+# contains library (reference) and new seed (de novo) hits
+uc_mixed_hits = """# uclust --input /var/folders/xq/0kh93ng53bs6zzk091w_bbsr0000gn/T/UclustExactMatchFilterrW47Ju.fasta --id 0.97 --tmpdir /var/folders/xq/0kh93ng53bs6zzk091w_bbsr0000gn/T --w 8 --stepwords 8 --usersort --maxaccepts 1 --stable_sort --maxrejects 8 --uc dn-otus/uclust_picked_otus/seqs_clusters.uc
+# version=1.2.22
+# Tab-separated fields:
+# 1=Type, 2=ClusterNr, 3=SeqLength or ClusterSize, 4=PctId, 5=Strand, 6=QueryStart, 7=SeedStart, 8=Alignment, 9=QueryLabel, 10=TargetLabel
+# Record types (field 1): L=LibSeed, S=NewSeed, H=Hit, R=Reject, D=LibCluster, C=NewCluster, N=NoHit
+# For C and D types, PctId is average id with seed.
+# QueryStart and SeedStart are zero-based relative to start of sequence.
+# If minus strand, SeedStart is relative to reverse-complemented seed.
+S 0 133 * * * * * f2_1539 *
+H 0 141 100.0 + 0 0 133M8D f3_42 f2_1539
+H 0 141 100.0 + 0 0 133M8D f2_43 f2_1539
+S 0 133 * * * * * f3_44 *
+L 3 1389 * * * * * 295053 *
+H 3 133 100.0 + 0 0 519I133M737I f2_1539 295053
+"""
+
+if __name__ == '__main__':
+ main()
diff --git a/tests/test_table.py b/tests/test_table.py
new file mode 100644
index 0000000..6e6f21e
--- /dev/null
+++ b/tests/test_table.py
@@ -0,0 +1,3376 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# ----------------------------------------------------------------------------
+# Copyright (c) 2011-2013, The BIOM Format Development Team.
+#
+# Distributed under the terms of the Modified BSD License.
+#
+# The full license is in the file COPYING.txt, distributed with this software.
+# ----------------------------------------------------------------------------
+
+import os
+from json import loads
+from tempfile import NamedTemporaryFile
+from unittest import TestCase, main
+from io import StringIO
+
+from future.utils import viewkeys
+import numpy.testing as npt
+import numpy as np
+from scipy.sparse import lil_matrix, csr_matrix, csc_matrix
+
+from biom import example_table
+from biom.exception import UnknownAxisError, UnknownIDError, TableException
+from biom.util import unzip, HAVE_H5PY, H5PY_VLEN_STR
+from biom.table import (Table, prefer_self, index_list, list_nparray_to_sparse,
+ list_dict_to_sparse, dict_to_sparse,
+ coo_arrays_to_sparse, list_list_to_sparse,
+ nparray_to_sparse, list_sparse_to_sparse)
+from biom.parse import parse_biom_table
+from biom.err import errstate
+
+if HAVE_H5PY:
+ import h5py
+
+__author__ = "Daniel McDonald"
+__copyright__ = "Copyright 2011-2013, The BIOM Format Development Team"
+__credits__ = ["Daniel McDonald", "Jai Ram Rideout", "Justin Kuczynski",
+ "Greg Caporaso", "Jose Clemente", "Adam Robbins-Pianka",
+ "Joshua Shorenstein", "Jose Antonio Navas Molina",
+ "Jorge Cañardo Alastuey"]
+__license__ = "BSD"
+__url__ = "http://biom-format.org"
+__maintainer__ = "Daniel McDonald"
+__email__ = "daniel.mcdonald at colorado.edu"
+
+
+class SupportTests(TestCase):
+
+ def test_head(self):
+ # example table is 2 x 3, so no change in contained data
+ exp = example_table
+ obs = example_table.head()
+ self.assertIsNot(obs, exp)
+ self.assertEqual(obs, exp)
+
+ def test_head_bounded(self):
+ obs = example_table.head(1)
+ from collections import defaultdict
+ exp = Table(np.array([[0., 1., 2.]]), ['O1'], ['S1', 'S2', 'S3'],
+ [{'taxonomy': ['Bacteria', 'Firmicutes']}],
+ [{'environment': 'A'}, {'environment': 'B'},
+ {'environment': 'A'}])
+
+ self.assertEqual(obs, exp)
+
+ obs = example_table.head(m=2)
+ exp = Table(np.array([[0., 1.], [3., 4.]]), ['O1', 'O2'], ['S1', 'S2'],
+ [{'taxonomy': ['Bacteria', 'Firmicutes']},
+ {'taxonomy': ['Bacteria', 'Bacteroidetes']}],
+ [{'environment': 'A'}, {'environment': 'B'}])
+ self.assertEqual(obs, exp)
+
+ def test_head_overstep(self):
+ # silently works
+ exp = example_table
+ obs = example_table.head(10000)
+ self.assertIsNot(obs, exp)
+ self.assertEqual(obs, exp)
+
+ def test_head_zero_or_neg(self):
+ with self.assertRaises(IndexError):
+ example_table.head(0)
+
+ with self.assertRaises(IndexError):
+ example_table.head(-1)
+
+ with self.assertRaises(IndexError):
+ example_table.head(m=0)
+
+ with self.assertRaises(IndexError):
+ example_table.head(m=-1)
+
+ with self.assertRaises(IndexError):
+ example_table.head(0, 5)
+
+ with self.assertRaises(IndexError):
+ example_table.head(5, 0)
+
+ def test_table_sparse_nparray(self):
+ """beat the table sparsely to death"""
+ # nparray test
+ samp_ids = ['1', '2', '3', '4']
+ obs_ids = ['a', 'b', 'c']
+ nparray = np.array([[1, 2, 3, 4], [-1, 6, 7, 8], [9, 10, 11, 12]])
+ data = nparray_to_sparse(
+ np.array([[1, 2, 3, 4], [-1, 6, 7, 8], [9, 10, 11, 12]]))
+ exp = Table(data, obs_ids, samp_ids)
+ obs = Table(nparray, obs_ids, samp_ids)
+ self.assertEqual(obs, exp)
+
+ def test_table_sparse_list_nparray(self):
+ """beat the table sparsely to death"""
+ # list of nparray test
+ samp_ids = ['1', '2', '3', '4']
+ obs_ids = ['a', 'b', 'c']
+ list_np = [np.array([1, 2, 3, 4]), np.array([5, 6, 7, 8]),
+ np.array([9, 10, 11, 12])]
+ data = list_nparray_to_sparse(list_np)
+ exp = Table(data, obs_ids, samp_ids)
+ obs = Table(list_np, obs_ids, samp_ids)
+ self.assertEqual(obs, exp)
+
+ def test_table_sparse_dict(self):
+ """beat the table sparsely to death"""
+ # dict test
+ samp_ids = range(24)
+ obs_ids = range(101)
+ dict_input = {(0, 0): 1, (0, 10): 5, (100, 23): -3}
+ d_input = np.zeros((101, 24), dtype=float)
+ d_input[0, 0] = 1
+ d_input[0, 10] = 5
+ d_input[100, 23] = -3
+ data = nparray_to_sparse(d_input)
+ exp = Table(data, obs_ids, samp_ids)
+ obs = Table(dict_input, obs_ids, samp_ids)
+ self.assertEqual(obs, exp)
+
+ def test_table_sparse_list_dict(self):
+ """beat the table sparsely to death"""
+ # list of dict test
+ samp_ids = range(11)
+ obs_ids = range(3)
+ ld_input = np.zeros((3, 11), dtype=float)
+ ld_input[0, 5] = 10
+ ld_input[0, 10] = 2
+ ld_input[1, 1] = 15
+ ld_input[2, 3] = 7
+ data = nparray_to_sparse(ld_input)
+ exp = Table(data, obs_ids, samp_ids)
+ list_dict = [{(0, 5): 10, (10, 10): 2}, {(0, 1): 15}, {(0, 3): 7}]
+ obs = Table(list_dict, obs_ids, samp_ids)
+ self.assertEqual(obs, exp)
+
+ def test_table_sparse_list_list(self):
+ """beat the table sparsely to death"""
+ # list list test
+ samp_ids = range(3)
+ obs_ids = range(2)
+ exp_data = lil_matrix((2, 3))
+ exp_data[0, 1] = 5
+ exp_data[1, 2] = 10
+ exp = Table(exp_data, obs_ids, samp_ids)
+ input_ = [[0, 1, 5], [1, 2, 10]]
+ obs = Table(input_, obs_ids, samp_ids)
+ self.assertEqual(obs, exp)
+
+ def test_table_exception(self):
+ """Make sure a TableException can be raised"""
+ def f():
+ raise TableException
+ self.assertRaises(TableException, f)
+
+ def test_prefer_self(self):
+ """prefer x"""
+ exp = 1
+ obs = prefer_self(1, 2)
+ self.assertEqual(obs, exp)
+
+ exp = 2
+ obs = prefer_self(None, 2)
+ self.assertEqual(obs, exp)
+
+ exp = None
+ obs = prefer_self(None, None)
+ self.assertEqual(obs, exp)
+
+ def test_index_list(self):
+ """returns a dict for list lookups"""
+ exp = {'a': 2, 'b': 0, 'c': 1}
+ obs = index_list(['b', 'c', 'a'])
+ self.assertEqual(obs, exp)
+
+
+class TableTests(TestCase):
+
+ def setUp(self):
+ self.simple_derived = Table(
+ np.array([[5, 6], [7, 8]]), [3, 4], [1, 2])
+ self.vals = {(0, 0): 5, (0, 1): 6, (1, 0): 7, (1, 1): 8}
+ self.st1 = Table(self.vals, ['1', '2'], ['a', 'b'])
+ self.st2 = Table(self.vals, ['1', '2'], ['a', 'b'])
+ self.vals3 = {(0, 0): 1, (0, 1): 2, (1, 0): 3, (1, 1): 4}
+ self.vals4 = {(0, 0): 1, (0, 1): 2, (1, 0): 3, (1, 1): 4}
+ self.st3 = Table(self.vals3, ['2', '3'], ['b', 'c'])
+ self.st4 = Table(self.vals4, ['3', '4'], ['c', 'd'])
+ self.st_rich = Table(self.vals,
+ ['1', '2'], ['a', 'b'],
+ [{'taxonomy': ['k__a', 'p__b']},
+ {'taxonomy': ['k__a', 'p__c']}],
+ [{'barcode': 'aatt'}, {'barcode': 'ttgg'}],
+ )
+ self.st_group_rich = Table(
+ self.vals, ['1', '2'], ['a', 'b'],
+ [{'taxonomy': ['k__a', 'p__b']}, {'taxonomy': ['k__a', 'p__c']}],
+ [{'barcode': 'aatt'}, {'barcode': 'ttgg'}],
+ observation_group_metadata={'tree': ('newick', '(a:0.3,b:0.4);')},
+ sample_group_metadata={'category': ('newick', '(1:0.3,2:0.4);')}
+ )
+
+ self.empty_st = Table([], [], [])
+
+ self.vals5 = {(0, 1): 2, (1, 1): 4}
+ self.st5 = Table(self.vals5, ['5', '6'], ['a', 'b'])
+
+ self.vals6 = {(0, 0): 0, (0, 1): 0, (1, 0): 0, (1, 1): 0}
+ self.st6 = Table(self.vals6, ['5', '6'], ['a', 'b'])
+
+ self.vals7 = {(0, 0): 5, (0, 1): 7, (1, 0): 8, (1, 1): 0}
+ self.st7 = Table(self.vals7, ['5', '6'], ['a', 'b'])
+
+ self.single_sample_st = Table(
+ np.array([[2.0], [0.0], [1.0]]), ['O1', 'O2', 'O3'],
+ ['S1'])
+ self.single_obs_st = Table(np.array([[2.0, 0.0, 1.0]]),
+ ['01'], ['S1', 'S2', 'S3'])
+
+ self.to_remove = []
+
+ # 1 0 2
+ # 3 0 4
+ self.mat1 = Table(np.array([[1, 0, 2], [3, 0, 4]]),
+ ['o1', 'o2'], ['s1', 's2', 's3'])
+
+ # Empty/null cases (i.e., 0x0, 0xn, nx0).
+ ids = lambda X: ['x%d' % e for e in range(0, X)]
+ self.null1 = Table(np.zeros((0, 0)), [], [])
+ self.null2 = Table(
+ np.zeros((0, 42), dtype=float), [], ids(42))
+ self.null3 = Table(
+ np.zeros((42, 0), dtype=float), ids(42), [])
+ self.nulls = [self.null1, self.null2, self.null3]
+
+ # 0 0
+ # 0 0
+ self.empty = Table(np.zeros((2, 2)), ids(2), ids(2))
+
+ # 1 0 3
+ h = np.array([[1.0, 0.0, 3.0]])
+ self.row_vec = Table(h, ids(1), ids(3))
+
+ # 1
+ # 0
+ # 3
+ h = np.array([[1], [0], [3]])
+ self.col_vec = Table(h, ids(3), ids(1))
+
+ # 1x1
+ h = np.array([[42]])
+ self.single_ele = Table(h, ['b'], ['a'])
+
+ # Explicit zeros.
+ self.explicit_zeros = Table(np.array([[0, 0, 1], [1, 0, 0],
+ [1, 0, 2]]),
+ ['a', 'b', 'c'], ['x', 'y', 'z'])
+
+ def tearDown(self):
+ if self.to_remove:
+ for f in self.to_remove:
+ os.remove(f)
+
+ def test_data_property(self):
+ exp = self.simple_derived._data
+ obs = self.simple_derived.matrix_data
+ self.assertEqual((obs != exp).nnz, 0)
+
+ with self.assertRaises(AttributeError):
+ self.simple_derived.matrix_data = 'foo'
+
+ def test_repr(self):
+ """__repr__ method of biom.table.Table"""
+ # table
+ data = np.asarray([[0, 0, 0], [0, 1, 0], [0, 0, 0]])
+ t = Table(data, ['a', 'b', 'c'], ['x', 'y', 'z'])
+ self.assertEqual("3 x 3 <class 'biom.table.Table'> with 1 nonzero "
+ "entries (11% dense)", repr(t))
+
+ # empty table
+ data = np.asarray([[]])
+ t = Table(data, [], [])
+ self.assertEqual("0 x 0 <class 'biom.table.Table'> with 0 nonzero "
+ "entries (0% dense)", repr(t))
+
+ def test_init_with_nparray(self):
+ """to_sparse in constructor should be triggered"""
+ data = np.array([[1, 2], [3, 4]])
+ samp_ids = ['a', 'b']
+ obs_ids = ['1', '2']
+ exp = Table(data, obs_ids, samp_ids)
+ obs = Table(data, obs_ids, samp_ids)
+ self.assertEqual(obs, exp)
+
+ def test_min_observation(self):
+ exp = np.array([5, 7])
+ obs = self.simple_derived.min('observation')
+ npt.assert_equal(obs, exp)
+
+ def test_min_sample(self):
+ exp = np.array([5, 6])
+ obs = self.simple_derived.min('sample')
+ npt.assert_equal(obs, exp)
+
+ def test_min_whole(self):
+ exp = 5
+ obs = self.simple_derived.min('whole')
+ npt.assert_equal(obs, exp)
+
+ def test_max_observation(self):
+ exp = np.array([6, 8])
+ obs = self.simple_derived.max('observation')
+ npt.assert_equal(obs, exp)
+
+ def test_max_sample(self):
+ exp = np.array([7, 8])
+ obs = self.simple_derived.max('sample')
+ npt.assert_equal(obs, exp)
+
+ def test_max_whole(self):
+ exp = 8
+ obs = self.simple_derived.max('whole')
+ npt.assert_equal(obs, exp)
+
+ @npt.dec.skipif(HAVE_H5PY is False, msg='H5PY is not installed')
+ def test_from_hdf5_empty_md(self):
+ """Parse a hdf5 formatted BIOM table w/o metadata"""
+ cwd = os.getcwd()
+ if '/' in __file__:
+ os.chdir(__file__.rsplit('/', 1)[0])
+ t = Table.from_hdf5(h5py.File('test_data/empty.biom'))
+ os.chdir(cwd)
+
+ self.assertTrue(t._sample_metadata is None)
+ self.assertTrue(t._observation_metadata is None)
+
+ @npt.dec.skipif(HAVE_H5PY is False, msg='H5PY is not installed')
+ def test_from_hdf5_custom_parsers(self):
+ def parser(item):
+ return item.upper()
+ parse_fs = {'BODY_SITE': parser}
+
+ cwd = os.getcwd()
+ if '/' in __file__:
+ os.chdir(__file__.rsplit('/', 1)[0])
+ t = Table.from_hdf5(h5py.File('test_data/test.biom'),
+ parse_fs=parse_fs)
+ os.chdir(cwd)
+
+ for m in t.metadata():
+ self.assertIn(m['BODY_SITE'], ('GUT', 'SKIN'))
+
+ @npt.dec.skipif(HAVE_H5PY is False, msg='H5PY is not installed')
+ def test_from_hdf5(self):
+ """Parse a hdf5 formatted BIOM table"""
+ cwd = os.getcwd()
+ if '/' in __file__:
+ os.chdir(__file__.rsplit('/', 1)[0])
+ t = Table.from_hdf5(h5py.File('test_data/test.biom'))
+ os.chdir(cwd)
+
+ npt.assert_equal(t.ids(), (u'Sample1', u'Sample2', u'Sample3',
+ u'Sample4', u'Sample5', u'Sample6'))
+ npt.assert_equal(t.ids(axis='observation'),
+ (u'GG_OTU_1', u'GG_OTU_2', u'GG_OTU_3',
+ u'GG_OTU_4', u'GG_OTU_5'))
+ exp_obs_md = ({u'taxonomy': [u'k__Bacteria',
+ u'p__Proteobacteria',
+ u'c__Gammaproteobacteria',
+ u'o__Enterobacteriales',
+ u'f__Enterobacteriaceae',
+ u'g__Escherichia',
+ u's__']},
+ {u'taxonomy': [u'k__Bacteria',
+ u'p__Cyanobacteria',
+ u'c__Nostocophycideae',
+ u'o__Nostocales',
+ u'f__Nostocaceae',
+ u'g__Dolichospermum',
+ u's__']},
+ {u'taxonomy': [u'k__Archaea',
+ u'p__Euryarchaeota',
+ u'c__Methanomicrobia',
+ u'o__Methanosarcinales',
+ u'f__Methanosarcinaceae',
+ u'g__Methanosarcina',
+ u's__']},
+ {u'taxonomy': [u'k__Bacteria',
+ u'p__Firmicutes',
+ u'c__Clostridia',
+ u'o__Halanaerobiales',
+ u'f__Halanaerobiaceae',
+ u'g__Halanaerobium',
+ u's__Halanaerobiumsaccharolyticum']},
+ {u'taxonomy': [u'k__Bacteria',
+ u'p__Proteobacteria',
+ u'c__Gammaproteobacteria',
+ u'o__Enterobacteriales',
+ u'f__Enterobacteriaceae',
+ u'g__Escherichia',
+ u's__']})
+ self.assertEqual(t._observation_metadata, exp_obs_md)
+
+ exp_samp_md = ({u'LinkerPrimerSequence': u'CATGCTGCCTCCCGTAGGAGT',
+ u'BarcodeSequence': u'CGCTTATCGAGA',
+ u'Description': u'human gut',
+ u'BODY_SITE': u'gut'},
+ {u'LinkerPrimerSequence': u'CATGCTGCCTCCCGTAGGAGT',
+ u'BarcodeSequence': u'CATACCAGTAGC',
+ u'Description': u'human gut',
+ u'BODY_SITE': u'gut'},
+ {u'LinkerPrimerSequence': u'CATGCTGCCTCCCGTAGGAGT',
+ u'BarcodeSequence': u'CTCTCTACCTGT',
+ u'Description': u'human gut',
+ u'BODY_SITE': u'gut'},
+ {u'LinkerPrimerSequence': u'CATGCTGCCTCCCGTAGGAGT',
+ u'BarcodeSequence': u'CTCTCGGCCTGT',
+ u'Description': u'human skin',
+ u'BODY_SITE': u'skin'},
+ {u'LinkerPrimerSequence': u'CATGCTGCCTCCCGTAGGAGT',
+ u'BarcodeSequence': u'CTCTCTACCAAT',
+ u'Description': u'human skin',
+ u'BODY_SITE': u'skin'},
+ {u'LinkerPrimerSequence': u'CATGCTGCCTCCCGTAGGAGT',
+ u'BarcodeSequence': u'CTAACTACCAAT',
+ u'Description': u'human skin',
+ u'BODY_SITE': u'skin'})
+ self.assertEqual(t._sample_metadata, exp_samp_md)
+
+ exp = [np.array([0., 0., 1., 0., 0., 0.]),
+ np.array([5., 1., 0., 2., 3., 1.]),
+ np.array([0., 0., 1., 4., 0., 2.]),
+ np.array([2., 1., 1., 0., 0., 1.]),
+ np.array([0., 1., 1., 0., 0., 0.])]
+ npt.assert_equal(list(t.iter_data(axis="observation")), exp)
+
+ @npt.dec.skipif(HAVE_H5PY is False, msg='H5PY is not installed')
+ def test_from_hdf5_sample_subset(self):
+ """Parse a sample subset of a hdf5 formatted BIOM table"""
+ samples = [u'Sample2', u'Sample4', u'Sample6']
+
+ cwd = os.getcwd()
+ if '/' in __file__:
+ os.chdir(__file__.rsplit('/', 1)[0])
+ t = Table.from_hdf5(h5py.File('test_data/test.biom'), ids=samples)
+ os.chdir(cwd)
+
+ npt.assert_equal(t.ids(), [u'Sample2', u'Sample4', u'Sample6'])
+ npt.assert_equal(t.ids(axis='observation'),
+ [u'GG_OTU_2', u'GG_OTU_3', u'GG_OTU_4', u'GG_OTU_5'])
+ exp_obs_md = ({u'taxonomy': [u'k__Bacteria',
+ u'p__Cyanobacteria',
+ u'c__Nostocophycideae',
+ u'o__Nostocales',
+ u'f__Nostocaceae',
+ u'g__Dolichospermum',
+ u's__']},
+ {u'taxonomy': [u'k__Archaea',
+ u'p__Euryarchaeota',
+ u'c__Methanomicrobia',
+ u'o__Methanosarcinales',
+ u'f__Methanosarcinaceae',
+ u'g__Methanosarcina',
+ u's__']},
+ {u'taxonomy': [u'k__Bacteria',
+ u'p__Firmicutes',
+ u'c__Clostridia',
+ u'o__Halanaerobiales',
+ u'f__Halanaerobiaceae',
+ u'g__Halanaerobium',
+ u's__Halanaerobiumsaccharolyticum']},
+ {u'taxonomy': [u'k__Bacteria',
+ u'p__Proteobacteria',
+ u'c__Gammaproteobacteria',
+ u'o__Enterobacteriales',
+ u'f__Enterobacteriaceae',
+ u'g__Escherichia',
+ u's__']})
+ self.assertEqual(t._observation_metadata, exp_obs_md)
+
+ exp_samp_md = ({u'LinkerPrimerSequence': u'CATGCTGCCTCCCGTAGGAGT',
+ u'BarcodeSequence': u'CATACCAGTAGC',
+ u'Description': u'human gut',
+ u'BODY_SITE': u'gut'},
+ {u'LinkerPrimerSequence': u'CATGCTGCCTCCCGTAGGAGT',
+ u'BarcodeSequence': u'CTCTCGGCCTGT',
+ u'Description': u'human skin',
+ u'BODY_SITE': u'skin'},
+ {u'LinkerPrimerSequence': u'CATGCTGCCTCCCGTAGGAGT',
+ u'BarcodeSequence': u'CTAACTACCAAT',
+ u'Description': u'human skin',
+ u'BODY_SITE': u'skin'})
+ self.assertEqual(t._sample_metadata, exp_samp_md)
+
+ exp = [np.array([1., 2., 1.]),
+ np.array([0., 4., 2.]),
+ np.array([1., 0., 1.]),
+ np.array([1., 0., 0.])]
+ npt.assert_equal(list(t.iter_data(axis='observation')), exp)
+
+ @npt.dec.skipif(HAVE_H5PY is False, msg='H5PY is not installed')
+ def test_from_hdf5_observation_subset(self):
+ """Parse a observation subset of a hdf5 formatted BIOM table"""
+ observations = [u'GG_OTU_1', u'GG_OTU_3', u'GG_OTU_5']
+
+ cwd = os.getcwd()
+ if '/' in __file__:
+ os.chdir(__file__.rsplit('/', 1)[0])
+ t = Table.from_hdf5(h5py.File('test_data/test.biom'),
+ ids=observations, axis='observation')
+ os.chdir(cwd)
+
+ npt.assert_equal(t.ids(), [u'Sample2', u'Sample3', u'Sample4',
+ u'Sample6'])
+ npt.assert_equal(t.ids(axis='observation'),
+ [u'GG_OTU_1', u'GG_OTU_3', u'GG_OTU_5'])
+ exp_obs_md = ({u'taxonomy': [u'k__Bacteria',
+ u'p__Proteobacteria',
+ u'c__Gammaproteobacteria',
+ u'o__Enterobacteriales',
+ u'f__Enterobacteriaceae',
+ u'g__Escherichia',
+ u's__']},
+ {u'taxonomy': [u'k__Archaea',
+ u'p__Euryarchaeota',
+ u'c__Methanomicrobia',
+ u'o__Methanosarcinales',
+ u'f__Methanosarcinaceae',
+ u'g__Methanosarcina',
+ u's__']},
+ {u'taxonomy': [u'k__Bacteria',
+ u'p__Proteobacteria',
+ u'c__Gammaproteobacteria',
+ u'o__Enterobacteriales',
+ u'f__Enterobacteriaceae',
+ u'g__Escherichia',
+ u's__']})
+ self.assertEqual(t._observation_metadata, exp_obs_md)
+
+ exp_samp_md = ({u'LinkerPrimerSequence': u'CATGCTGCCTCCCGTAGGAGT',
+ u'BarcodeSequence': u'CATACCAGTAGC',
+ u'Description': u'human gut',
+ u'BODY_SITE': u'gut'},
+ {u'LinkerPrimerSequence': u'CATGCTGCCTCCCGTAGGAGT',
+ u'BarcodeSequence': u'CTCTCTACCTGT',
+ u'Description': u'human gut',
+ u'BODY_SITE': u'gut'},
+ {u'LinkerPrimerSequence': u'CATGCTGCCTCCCGTAGGAGT',
+ u'BarcodeSequence': u'CTCTCGGCCTGT',
+ u'Description': u'human skin',
+ u'BODY_SITE': u'skin'},
+ {u'LinkerPrimerSequence': u'CATGCTGCCTCCCGTAGGAGT',
+ u'BarcodeSequence': u'CTAACTACCAAT',
+ u'Description': u'human skin',
+ u'BODY_SITE': u'skin'})
+ self.assertEqual(t._sample_metadata, exp_samp_md)
+
+ exp = [np.array([0., 1., 0., 0.]),
+ np.array([0., 1., 4., 2.]),
+ np.array([1., 1., 0., 0.])]
+ npt.assert_equal(list(t.iter_data(axis='observation')), exp)
+
+ @npt.dec.skipif(HAVE_H5PY is False, msg='H5PY is not installed')
+ def test_from_hdf5_subset_error(self):
+ """hdf5 biom table parse throws error with invalid parameters"""
+ cwd = os.getcwd()
+ if '/' in __file__:
+ os.chdir(__file__.rsplit('/', 1)[0])
+
+ # Raises an error if not all the given samples are in the OTU table
+ with self.assertRaises(ValueError):
+ Table.from_hdf5(h5py.File('test_data/test.biom'),
+ ids=['Sample2', 'DoesNotExist', 'Sample6'])
+
+ # Raises an error if not all the given observation are in the OTU table
+ with self.assertRaises(ValueError):
+ Table.from_hdf5(h5py.File('test_data/test.biom'),
+ ids=['GG_OTU_1', 'DoesNotExist'],
+ axis='observation')
+ os.chdir(cwd)
+
+ @npt.dec.skipif(HAVE_H5PY is False, msg='H5PY is not installed')
+ def test_from_hdf5_empty_table(self):
+ """HDF5 biom parse successfully loads an empty table"""
+ cwd = os.getcwd()
+ if '/' in __file__:
+ os.chdir(__file__.rsplit('/', 1)[0])
+ t = Table.from_hdf5(h5py.File('test_data/empty.biom'))
+ os.chdir(cwd)
+
+ npt.assert_equal(t.ids(), [])
+ npt.assert_equal(t.ids(axis='observation'), [])
+ self.assertEqual(t._observation_metadata, None)
+ self.assertEqual(t._sample_metadata, None)
+ npt.assert_equal(list(t.iter_data(axis='observation')), [])
+
+ @npt.dec.skipif(HAVE_H5PY is False, msg='H5PY is not installed')
+ def test_to_hdf5_empty_table(self):
+ """Successfully writes an empty OTU table in HDF5 format"""
+ # Create an empty OTU table
+ t = Table([], [], [])
+ with NamedTemporaryFile() as tmpfile:
+ h5 = h5py.File(tmpfile.name, 'w')
+ t.to_hdf5(h5, 'tests')
+ h5.close()
+
+ @npt.dec.skipif(HAVE_H5PY is False, msg='H5PY is not installed')
+ def test_to_hdf5_error(self):
+ """Errors if a controlled category is not correctly formatted"""
+ with NamedTemporaryFile() as tmpfile:
+ h5 = h5py.File(tmpfile.name, 'w')
+ t = Table(
+ np.array([[5, 6, 7], [8, 9, 10], [11, 12, 13]]),
+ ['1', '2', '3'], ['a', 'b', 'c'],
+ [{'taxonomy': 'k__a; p__b'},
+ {'taxonomy': 'k__a; p__c'},
+ {'taxonomy': 'k__a; p__c'}],
+ [{'barcode': 'aatt'},
+ {'barcode': 'ttgg'},
+ {'barcode': 'aatt'}])
+ with self.assertRaises(TypeError):
+ t.to_hdf5(h5, 'tests')
+ h5.close()
+
+ @npt.dec.skipif(HAVE_H5PY is False, msg='H5PY is not installed')
+ def test_to_hdf5_custom_formatters(self):
+ self.st_rich = Table(self.vals,
+ ['1', '2'], ['a', 'b'],
+ [{'taxonomy': ['k__a', 'p__b']},
+ {'taxonomy': ['k__a', 'p__c']}],
+ [{'barcode': 'aatt'}, {'barcode': 'ttgg'}])
+
+ def bc_formatter(grp, category, md, compression):
+ name = 'metadata/%s' % category
+ data = np.array([m[category].upper().encode('utf8') for m in md])
+ grp.create_dataset(name, shape=data.shape, dtype=H5PY_VLEN_STR,
+ data=data, compression=compression)
+
+ with NamedTemporaryFile() as tmpfile:
+ h5 = h5py.File(tmpfile.name, 'w')
+ self.st_rich.to_hdf5(h5, 'tests',
+ format_fs={'barcode': bc_formatter})
+ h5.close()
+
+ h5 = h5py.File(tmpfile.name, 'r')
+ self.assertIn('observation', h5)
+ self.assertIn('sample', h5)
+ self.assertEqual(sorted(h5.attrs.keys()), sorted(['id', 'type',
+ 'format-url',
+ 'format-version',
+ 'generated-by',
+ 'creation-date',
+ 'shape', 'nnz']))
+
+ obs = Table.from_hdf5(h5)
+ for m1, m2 in zip(obs.metadata(), self.st_rich.metadata()):
+ self.assertNotEqual(m1['barcode'], m2['barcode'])
+ self.assertEqual(m1['barcode'].lower(), m2['barcode'])
+ h5.close()
+
+ @npt.dec.skipif(HAVE_H5PY is False, msg='H5PY is not installed')
+ def test_to_hdf5(self):
+ """Write a file"""
+ with NamedTemporaryFile() as tmpfile:
+ h5 = h5py.File(tmpfile.name, 'w')
+ self.st_rich.to_hdf5(h5, 'tests')
+ h5.close()
+
+ h5 = h5py.File(tmpfile.name, 'r')
+ self.assertIn('observation', h5)
+ self.assertIn('sample', h5)
+ self.assertEqual(sorted(h5.attrs.keys()), sorted(['id', 'type',
+ 'format-url',
+ 'format-version',
+ 'generated-by',
+ 'creation-date',
+ 'shape', 'nnz']))
+
+ obs = Table.from_hdf5(h5)
+ self.assertEqual(obs, self.st_rich)
+ h5.close()
+
+ # Test with a collapsed table
+ with NamedTemporaryFile() as tmpfile:
+ h5 = h5py.File(tmpfile.name, 'w')
+ dt_rich = Table(
+ np.array([[5, 6, 7], [8, 9, 10], [11, 12, 13]]),
+ ['1', '2', '3'], ['a', 'b', 'c'],
+ [{'taxonomy': ['k__a', 'p__b']},
+ {'taxonomy': ['k__a', 'p__c']},
+ {'taxonomy': ['k__a', 'p__c']}],
+ [{'barcode': 'aatt'},
+ {'barcode': 'ttgg'},
+ {'barcode': 'aatt'}])
+ bin_f = lambda id_, x: x['barcode']
+ collapsed = dt_rich.collapse(
+ bin_f, norm=False, min_group_size=1,
+ axis='sample').sort(axis='sample')
+ collapsed.to_hdf5(h5, 'tests')
+ h5.close()
+
+ h5 = h5py.File(tmpfile.name, 'r')
+ self.assertIn('observation', h5)
+ self.assertIn('sample', h5)
+ self.assertEqual(sorted(h5.attrs.keys()), sorted(['id', 'type',
+ 'format-url',
+ 'format-version',
+ 'generated-by',
+ 'creation-date',
+ 'shape', 'nnz']))
+
+ obs = Table.from_hdf5(h5)
+ h5.close()
+
+ exp = Table(
+ np.array([[12, 6], [18, 9], [24, 12]]),
+ ['1', '2', '3'], ['aatt', 'ttgg'],
+ [{'taxonomy': ['k__a', 'p__b']},
+ {'taxonomy': ['k__a', 'p__c']},
+ {'taxonomy': ['k__a', 'p__c']}],
+ [{'collapsed_ids': ['a', 'c']},
+ {'collapsed_ids': ['b']}])
+ self.assertEqual(obs, exp)
+
+ # Test with table having a None on taxonomy
+ with NamedTemporaryFile() as tmpfile:
+ h5 = h5py.File(tmpfile.name, 'w')
+ t = Table(self.vals, ['1', '2'], ['a', 'b'],
+ [{'taxonomy': ['k__a', 'p__b']},
+ {'taxonomy': None}],
+ [{'barcode': 'aatt'}, {'barcode': 'ttgg'}])
+ t.to_hdf5(h5, 'tests')
+ h5.close()
+
+ h5 = h5py.File(tmpfile.name, 'r')
+ self.assertIn('observation', h5)
+ self.assertIn('sample', h5)
+ self.assertEqual(sorted(h5.attrs.keys()), sorted(['id', 'type',
+ 'format-url',
+ 'format-version',
+ 'generated-by',
+ 'creation-date',
+ 'shape', 'nnz']))
+
+ obs = Table.from_hdf5(h5)
+ h5.close()
+ self.assertEqual(obs, t)
+
+ def test_from_tsv(self):
+ tab1_fh = StringIO(otu_table1)
+ sparse_rich = Table.from_tsv(tab1_fh, None, None,
+ OBS_META_TYPES['naive'])
+ self.assertEqual(sorted(sparse_rich.ids()),
+ sorted(['Fing', 'Key', 'NA']))
+ self.assertEqual(sorted(sparse_rich.ids(axis='observation')),
+ list(map(str, [0, 1, 3, 4, 7])))
+ for i, obs_id in enumerate(sparse_rich.ids(axis='observation')):
+ if obs_id == '0':
+ self.assertEqual(sparse_rich._observation_metadata[i],
+ {'Consensus Lineage': 'Bacteria; '
+ 'Actinobacteria; Actinobacteridae; '
+ 'Propionibacterineae; '
+ 'Propionibacterium'})
+ elif obs_id == '1':
+ self.assertEqual(sparse_rich._observation_metadata[i],
+ {'Consensus Lineage': 'Bacteria; Firmicutes; '
+ 'Alicyclobacillaceae; Bacilli; '
+ 'Lactobacillales; Lactobacillales; '
+ 'Streptococcaceae; '
+ 'Streptococcus'})
+ elif obs_id == '7':
+ self.assertEqual(sparse_rich._observation_metadata[i],
+ {'Consensus Lineage': 'Bacteria; '
+ 'Actinobacteria; Actinobacteridae; '
+ 'Gordoniaceae; '
+ 'Corynebacteriaceae'})
+ elif obs_id in ['3', '4']:
+ pass # got lazy
+ else:
+ raise RuntimeError('obs_id incorrect?')
+
+ self.assertEquals(sparse_rich._sample_metadata, None)
+
+ for i, obs_id in enumerate(sparse_rich.ids(axis='observation')):
+ for j, sample_id in enumerate(sparse_rich.ids()):
+ if obs_id == '1' and sample_id == 'Key':
+ # should test some abundance data
+ self.assertEqual(True, True)
+
+ def test_from_tsv_dense(self):
+ tab1_fh = StringIO(otu_table1)
+ sparse_rich = Table.from_tsv(tab1_fh.readlines(), None, None,
+ OBS_META_TYPES['naive'])
+ self.assertEqual(sorted(sparse_rich.ids()),
+ sorted(['Fing', 'Key', 'NA']))
+ self.assertEqual(sorted(sparse_rich.ids(axis='observation')),
+ list(map(str, [0, 1, 3, 4, 7])))
+ for i, obs_id in enumerate(sparse_rich.ids(axis='observation')):
+ if obs_id == '0':
+ self.assertEqual(sparse_rich._observation_metadata[i],
+ {'Consensus Lineage': 'Bacteria; '
+ 'Actinobacteria; Actinobacteridae; '
+ 'Propionibacterineae; '
+ 'Propionibacterium'})
+ elif obs_id == '1':
+ self.assertEqual(sparse_rich._observation_metadata[i],
+ {'Consensus Lineage': 'Bacteria; Firmicutes; '
+ 'Alicyclobacillaceae; Bacilli; '
+ 'Lactobacillales; Lactobacillales; '
+ 'Streptococcaceae; '
+ 'Streptococcus'})
+ elif obs_id == '7':
+ self.assertEqual(sparse_rich._observation_metadata[i],
+ {'Consensus Lineage': 'Bacteria; '
+ 'Actinobacteria; Actinobacteridae; '
+ 'Gordoniaceae; '
+ 'Corynebacteriaceae'})
+ elif obs_id in ['3', '4']:
+ pass # got lazy
+ else:
+ raise RuntimeError('obs_id incorrect?')
+
+ self.assertEquals(sparse_rich._sample_metadata, None)
+
+ for i, obs_id in enumerate(sparse_rich.ids(axis='observation')):
+ for j, sample_id in enumerate(sparse_rich.ids()):
+ if obs_id == '1' and sample_id == 'Key':
+ self.assertEqual(True, True)
+ # should test some abundance data
+
+ def test_to_tsv(self):
+ """Print out self in a delimited form"""
+ exp = '\n'.join(
+ ["# Constructed from biom file",
+ "#OTU ID\ta\tb",
+ "1\t5.0\t6.0",
+ "2\t7.0\t8.0"])
+ obs = self.st1.delimited_self()
+ self.assertEqual(obs, exp)
+
+ # Test observation_column_name.
+ exp = '\n'.join(
+ ["# Constructed from biom file",
+ "Taxon\ta\tb",
+ "1\t5.0\t6.0",
+ "2\t7.0\t8.0"])
+ obs = self.st1.to_tsv(observation_column_name='Taxon')
+ self.assertEqual(obs, exp)
+
+ def test_group_metadata_sample(self):
+ """Returns the sample group metadata"""
+ self.assertEqual(self.st_group_rich.group_metadata(),
+ {'category': ('newick', '(1:0.3,2:0.4);')})
+ self.assertEqual(self.st_rich.group_metadata(), None)
+
+ def test_group_metadata_observation(self):
+ """Returns the observation group metadata"""
+ self.assertEqual(self.st_group_rich.group_metadata(axis='observation'),
+ {'tree': ('newick', '(a:0.3,b:0.4);')})
+ self.assertEqual(self.st_rich.group_metadata(axis='observation'), None)
+
+ def test_group_metadata_error(self):
+ """Handles invalid input"""
+ with self.assertRaises(UnknownAxisError):
+ self.simple_derived.group_metadata('bro-axis')
+
+ def test_metadata_invalid_input(self):
+ """Correctly handles invalid input."""
+ with self.assertRaises(UnknownAxisError):
+ self.simple_derived.metadata(1, 'brofist')
+
+ def test_metadata_sample_id(self):
+ """returns the sample metadata for a given id"""
+ self.assertEqual({'barcode': 'aatt'},
+ self.st_rich.metadata('a'))
+ self.assertEqual({'barcode': 'ttgg'},
+ self.st_rich.metadata('b'))
+
+ with self.assertRaises(UnknownIDError):
+ self.st_rich.metadata(3, 'sample')
+
+ def test_metadata_sample(self):
+ """Return the sample metadata"""
+ obs = self.st_rich.metadata()
+ exp = [{'barcode': 'aatt'}, {'barcode': 'ttgg'}]
+ for o, e in zip(obs, exp):
+ self.assertDictEqual(o, e)
+
+ def test_metadata_observation_id(self):
+ """returns the observation metadata for a given id"""
+ self.assertEqual({'taxonomy': ['k__a', 'p__b']},
+ self.st_rich.metadata('1', 'observation'))
+ self.assertEqual({'taxonomy': ['k__a', 'p__c']},
+ self.st_rich.metadata('2', 'observation'))
+
+ with self.assertRaises(UnknownIDError):
+ self.simple_derived.metadata('3', 'observation')
+
+ def test_metadata_observation(self):
+ """returns the observation metadata"""
+ obs = self.st_rich.metadata(axis='observation')
+ exp = [{'taxonomy': ['k__a', 'p__b']}, {'taxonomy': ['k__a', 'p__c']}]
+ for o, e in zip(obs, exp):
+ self.assertDictEqual(o, e)
+
+ def test_index_invalid_input(self):
+ """Correctly handles invalid input."""
+ with self.assertRaises(UnknownAxisError):
+ self.simple_derived.index(1, 'brofist')
+
+ def test_index_sample(self):
+ """returns the sample index"""
+ self.assertEqual(0, self.simple_derived.index(1, 'sample'))
+ self.assertEqual(1, self.simple_derived.index(2, 'sample'))
+
+ with self.assertRaises(UnknownIDError):
+ self.simple_derived.index(3, 'sample')
+
+ def test_index_observation(self):
+ """returns the observation index"""
+ self.assertEqual(0, self.simple_derived.index(3, 'observation'))
+ self.assertEqual(1, self.simple_derived.index(4, 'observation'))
+
+ with self.assertRaises(UnknownIDError):
+ self.simple_derived.index(5, 'observation')
+
+ def test_index_ids(self):
+ """Index all the ids!!!"""
+ exp_samp = {1: 0, 2: 1}
+ exp_obs = {3: 0, 4: 1}
+ self.assertEqual(self.simple_derived._sample_index, exp_samp)
+ self.assertEqual(self.simple_derived._obs_index, exp_obs)
+
+ def test_sample_exists(self):
+ """Verify samples exist!"""
+ self.assertTrue(self.simple_derived.exists(1))
+ self.assertTrue(self.simple_derived.exists(2))
+ self.assertFalse(self.simple_derived.exists(3))
+
+ def test_observation_exists(self):
+ """Verify observation exist!"""
+ self.assertTrue(self.simple_derived.exists(3, axis="observation"))
+ self.assertTrue(self.simple_derived.exists(4, axis="observation"))
+ self.assertFalse(self.simple_derived.exists(2, axis="observation"))
+
+ def test_exists_invalid_axis(self):
+ """Verify ValueError raised!"""
+ with self.assertRaises(UnknownAxisError):
+ self.simple_derived.exists(3, axis="fooz")
+
+ def test_union_id_order(self):
+ """Combine unique ids, union"""
+ a = [1, 2, 3, 4]
+ b = [3, 4, 5, 6, 0, 'a']
+ exp = {1: 0, 2: 1, 3: 2, 4: 3, 5: 4, 6: 5, 0: 6, 'a': 7}
+ obs = self.st1._union_id_order(a, b)
+ self.assertEqual(obs, exp)
+
+ def test_intersect_id_order(self):
+ """Combine ids, intersection"""
+ a = [1, 2, 3, 4]
+ b = [3, 4, 5, 6, 0, 'a']
+ exp = {3: 0, 4: 1}
+ obs = self.st1._intersect_id_order(a, b)
+ self.assertEqual(obs, exp)
+
+ def test_verify_metadata(self):
+ """Make sure the metadata is sane (including obs/sample ids)"""
+ obs_ids = [1, 2, 3]
+ obs_md = [{'a': 0}, {'b': 0}, {'c': 0}]
+ samp_ids = [4, 5, 6, 7]
+ samp_md = [{'d': 0}, {'e': 0}, {'f': 0}, {'g': 0}]
+ d = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])
+ Table(d, obs_ids, samp_ids, obs_md, samp_md)
+ # test is that no exception is raised
+
+ obs_ids = [1, 2]
+ self.assertRaises(TableException, Table, d, samp_ids, obs_ids, samp_md,
+ obs_md)
+
+ obs_ids = [1, 2, 3]
+ samp_ids = [4, 5, 6]
+ self.assertRaises(TableException, Table, d, samp_ids, obs_ids, samp_md,
+ obs_md)
+
+ samp_ids = [4, 5, 6, 7]
+ obs_md = ['a', 'b']
+ self.assertRaises(TableException, Table, d, samp_ids, obs_ids, samp_md,
+ obs_md)
+
+ obs_md = ['a', 'b', 'c']
+ samp_md = ['d', 'e', 'f']
+ self.assertRaises(TableException, Table, d, samp_ids, obs_ids, samp_md,
+ obs_md)
+
+ obs_md = None
+ samp_md = None
+
+ # test is that no exception is raised
+ Table(d, obs_ids, samp_ids, obs_md, samp_md)
+
+ # do not allow duplicate ids
+ obs_ids = [1, 1, 3]
+ samp_ids = [4, 5, 6]
+ self.assertRaises(TableException, Table, d, samp_ids, obs_ids, samp_md,
+ obs_md)
+
+ obs_ids = [1, 2, 3]
+ samp_ids = [4, 4, 6]
+ self.assertRaises(TableException, Table, d, samp_ids, obs_ids, samp_md,
+ obs_md)
+
+ def test_cast_metadata(self):
+ """Cast metadata objects to defaultdict to support default values"""
+ obs_ids = [1, 2, 3]
+ obs_md = [{'a': 1}, {'b': 2}, {'c': 3}]
+ samp_ids = [4, 5, 6, 7]
+ samp_md = [{'d': 1}, None, {'f': 3}, {'g': 4}]
+ d = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])
+ t = Table(d, obs_ids, samp_ids, obs_md, samp_md)
+
+ self.assertEqual(t._sample_metadata[0]['non existent key'], None)
+ self.assertEqual(t._sample_metadata[1]['non existent key'], None)
+ self.assertEqual(t._sample_metadata[2]['non existent key'], None)
+ self.assertEqual(t._sample_metadata[3]['non existent key'], None)
+ self.assertEqual(t._observation_metadata[0]['non existent key'], None)
+ self.assertEqual(t._observation_metadata[1]['non existent key'], None)
+ self.assertEqual(t._observation_metadata[2]['non existent key'], None)
+
+ def test_add_group_metadata(self):
+ """add group metadata works correctly"""
+ obs_ids = [1, 2, 3]
+ samp_ids = [4, 5, 6, 7]
+ d = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])
+ obs_g_md = {'tree': ('newick', '(1:0.2,(2:0.3,3:0.4):0.5);')}
+ sample_g_md = {'graph': ('edge_list', '(4,5), (4,6), (5,7), (6,7)')}
+ t = Table(d, obs_ids, samp_ids, observation_group_metadata=None,
+ sample_group_metadata=sample_g_md)
+ t.add_group_metadata(obs_g_md, axis='observation')
+ self.assertEqual(t.group_metadata(axis='observation'),
+ {'tree': ('newick', '(1:0.2,(2:0.3,3:0.4):0.5);')})
+
+ def test_add_group_metadata_w_existing_metadata(self):
+ """add group metadata works with existing metadata"""
+ obs_ids = [1, 2, 3]
+ samp_ids = [4, 5, 6, 7]
+ d = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])
+ obs_g_md = {'tree': ('newick', '(1:0.2,(2:0.3,3:0.4):0.5);')}
+ sample_g_md = {'graph': ('edge_list', '(4,5), (4,6), (5,7), (6,7)')}
+ t = Table(d, obs_ids, samp_ids, observation_group_metadata=obs_g_md,
+ sample_group_metadata=sample_g_md)
+ new_sample_md = {
+ 'tree': ('newick', '((4:0.1,5:0.1):0.2,(6:0.1,7:0.1):0.2):0.3;')
+ }
+ t.add_group_metadata(new_sample_md)
+ self.assertEqual(
+ t.group_metadata(),
+ {'graph': ('edge_list', '(4,5), (4,6), (5,7), (6,7)'),
+ 'tree': ('newick', '((4:0.1,5:0.1):0.2,(6:0.1,7:0.1):0.2):0.3;')})
+
+ def test_add_metadata_two_entries(self):
+ """ add_metadata functions with more than one md entry """
+ obs_ids = [1, 2, 3]
+ obs_md = {1: {'taxonomy': ['A', 'B'], 'other': 'h1'},
+ 2: {'taxonomy': ['B', 'C'], 'other': 'h2'},
+ 3: {'taxonomy': ['E', 'D', 'F'], 'other': 'h3'}}
+ samp_ids = [4, 5, 6, 7]
+ samp_md = [{'d': 0}, {'e': 0}, {'f': 0}, {'g': 0}]
+ d = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])
+ t = Table(d, obs_ids, samp_ids, observation_metadata=None,
+ sample_metadata=samp_md)
+ t.add_metadata(obs_md, axis='observation')
+ self.assertEqual(t._observation_metadata[0]['taxonomy'], ['A', 'B'])
+ self.assertEqual(t._observation_metadata[1]['taxonomy'], ['B', 'C'])
+ self.assertEqual(t._observation_metadata[2]['taxonomy'],
+ ['E', 'D', 'F'])
+ self.assertEqual(t._observation_metadata[0]['other'], 'h1')
+ self.assertEqual(t._observation_metadata[1]['other'], 'h2')
+ self.assertEqual(t._observation_metadata[2]['other'], 'h3')
+
+ samp_md = {4: {'x': 'y', 'foo': 'bar'}, 5: {'x': 'z'}}
+ t.add_metadata(samp_md, axis='sample')
+ self.assertEqual(t._sample_metadata[0]['x'], 'y')
+ self.assertEqual(t._sample_metadata[0]['foo'], 'bar')
+ self.assertEqual(t._sample_metadata[1]['x'], 'z')
+
+ def test_add_metadata_one_w_existing_metadata(self):
+ """ add_sample_metadata functions with existing metadata """
+ obs_ids = [1, 2, 3]
+ obs_md = [{'a': 0}, {'b': 0}, {'c': 0}]
+ samp_ids = [4, 5, 6, 7]
+ samp_md = [{'Treatment': 'Control'},
+ {'Treatment': 'Fasting'},
+ {'Treatment': 'Fasting'},
+ {'Treatment': 'Control'}]
+ d = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])
+ t = Table(d, obs_ids, samp_ids, observation_metadata=obs_md,
+ sample_metadata=samp_md)
+ self.assertEqual(t._sample_metadata[0]['Treatment'], 'Control')
+ self.assertEqual(t._sample_metadata[1]['Treatment'], 'Fasting')
+ self.assertEqual(t._sample_metadata[2]['Treatment'], 'Fasting')
+ self.assertEqual(t._sample_metadata[3]['Treatment'], 'Control')
+
+ samp_md = {4: {'barcode': 'TTTT'},
+ 6: {'barcode': 'AAAA'},
+ 5: {'barcode': 'GGGG'},
+ 7: {'barcode': 'CCCC'},
+ 10: {'ignore': 'me'}}
+ t.add_metadata(samp_md, 'sample')
+ self.assertEqual(t._sample_metadata[0]['Treatment'], 'Control')
+ self.assertEqual(t._sample_metadata[1]['Treatment'], 'Fasting')
+ self.assertEqual(t._sample_metadata[2]['Treatment'], 'Fasting')
+ self.assertEqual(t._sample_metadata[3]['Treatment'], 'Control')
+ self.assertEqual(t._sample_metadata[0]['barcode'], 'TTTT')
+ self.assertEqual(t._sample_metadata[1]['barcode'], 'GGGG')
+ self.assertEqual(t._sample_metadata[2]['barcode'], 'AAAA')
+ self.assertEqual(t._sample_metadata[3]['barcode'], 'CCCC')
+
+ obs_md = {1: {'foo': 'bar'}}
+ t.add_metadata(obs_md, axis='observation')
+ self.assertEqual(t._observation_metadata[0]['foo'], 'bar')
+ self.assertEqual(t._observation_metadata[1]['foo'], None)
+ self.assertEqual(t._observation_metadata[2]['foo'], None)
+
+ def test_add_metadata_one_entry(self):
+ """ add_sample_metadata functions with single md entry """
+ obs_ids = [1, 2, 3]
+ obs_md = [{'a': 0}, {'b': 0}, {'c': 0}]
+ samp_ids = [4, 5, 6, 7]
+ samp_md = {4: {'Treatment': 'Control'},
+ 5: {'Treatment': 'Fasting'},
+ 6: {'Treatment': 'Fasting'},
+ 7: {'Treatment': 'Control'}}
+ d = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])
+ t = Table(d, obs_ids, samp_ids, obs_md=obs_md, samp_md=None)
+ t.add_metadata(samp_md, axis='sample')
+ self.assertEqual(t._sample_metadata[0]['Treatment'], 'Control')
+ self.assertEqual(t._sample_metadata[1]['Treatment'], 'Fasting')
+ self.assertEqual(t._sample_metadata[2]['Treatment'], 'Fasting')
+ self.assertEqual(t._sample_metadata[3]['Treatment'], 'Control')
+
+ def test_add_sample_metadata_two_entries(self):
+ """ add_sample_metadata functions with more than one md entry """
+ obs_ids = [1, 2, 3]
+ obs_md = [{'a': 0}, {'b': 0}, {'c': 0}]
+ samp_ids = [4, 5, 6, 7]
+ samp_md = {4: {'Treatment': 'Control', 'D': ['A', 'A']},
+ 5: {'Treatment': 'Fasting', 'D': ['A', 'B']},
+ 6: {'Treatment': 'Fasting', 'D': ['A', 'C']},
+ 7: {'Treatment': 'Control', 'D': ['A', 'D']}}
+ d = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])
+ t = Table(d, obs_ids, samp_ids, obs_md=obs_md, samp_md=None)
+ t.add_metadata(samp_md, axis='sample')
+ self.assertEqual(t._sample_metadata[0]['Treatment'], 'Control')
+ self.assertEqual(t._sample_metadata[1]['Treatment'], 'Fasting')
+ self.assertEqual(t._sample_metadata[2]['Treatment'], 'Fasting')
+ self.assertEqual(t._sample_metadata[3]['Treatment'], 'Control')
+ self.assertEqual(t._sample_metadata[0]['D'], ['A', 'A'])
+ self.assertEqual(t._sample_metadata[1]['D'], ['A', 'B'])
+ self.assertEqual(t._sample_metadata[2]['D'], ['A', 'C'])
+ self.assertEqual(t._sample_metadata[3]['D'], ['A', 'D'])
+
+ def test_get_value_by_ids(self):
+ """Return the value located in the matrix by the ids"""
+ t1 = Table(np.array([[5, 6], [7, 8]]), [3, 4], [1, 2])
+ t2 = Table(np.array([[5, 6], [7, 8]]),
+ ['c', 'd'], ['a', 'b'])
+
+ self.assertEqual(5, t1.get_value_by_ids(3, 1))
+ self.assertEqual(6, t1.get_value_by_ids(3, 2))
+ self.assertEqual(7, t1.get_value_by_ids(4, 1))
+ self.assertEqual(8, t1.get_value_by_ids(4, 2))
+ self.assertEqual(5, t2.get_value_by_ids('c', 'a'))
+ self.assertEqual(6, t2.get_value_by_ids('c', 'b'))
+ self.assertEqual(7, t2.get_value_by_ids('d', 'a'))
+ self.assertEqual(8, t2.get_value_by_ids('d', 'b'))
+
+ self.assertRaises(UnknownIDError, t1.get_value_by_ids, 'a', 1)
+ self.assertRaises(UnknownIDError, t2.get_value_by_ids, 0, 0)
+
+ def test_getitem(self):
+ """getitem should work as expeceted"""
+ self.assertEqual(self.simple_derived[0, 0], 5)
+ self.assertEqual(self.simple_derived[1, 0], 7)
+ self.assertEqual(self.simple_derived[0, 1], 6)
+ self.assertEqual(self.simple_derived[1, 1], 8)
+ self.assertRaises(IndexError, self.simple_derived.__getitem__, [1, 2])
+
+ def test_is_empty(self):
+ """returns true if empty"""
+ self.assertTrue(Table(np.array([]), [], []).is_empty())
+ self.assertFalse(self.simple_derived.is_empty())
+ self.assertTrue(Table(np.array([[]]), [], []).is_empty())
+
+ def test_convert_vector_to_dense(self):
+ """Properly converts ScipySparseMat vectors to dense numpy repr."""
+ input_row = lil_matrix((1, 3))
+ input_row[(0, 0)] = 1
+ input_row[(0, 2)] = 3
+ exp = np.array([1, 0, 3])
+ obs = self.row_vec._to_dense(input_row)
+ npt.assert_array_equal(obs, exp)
+
+ input_row = lil_matrix((3, 1))
+ input_row[(0, 0)] = 1
+ input_row[(2, 0)] = 3
+ exp = np.array([1, 0, 3])
+ obs = self.row_vec._to_dense(input_row)
+ npt.assert_array_equal(obs, exp)
+
+ input_row = lil_matrix((1, 1))
+ input_row[(0, 0)] = 42
+ exp = np.array([42])
+ obs = self.single_ele._to_dense(input_row)
+ npt.assert_array_equal(obs, exp)
+
+ def test_length(self):
+ npt.assert_array_equal(self.null1.length(), 0)
+ npt.assert_array_equal(self.null2.length(axis='sample'), 42)
+ npt.assert_array_equal(self.null3.length(axis='observation'), 42)
+ npt.assert_array_equal(self.mat1.length(), 3)
+ npt.assert_array_equal(self.empty.length(axis='observation'), 2)
+ npt.assert_array_equal(self.row_vec.length(axis='observation'), 1)
+ npt.assert_array_equal(self.row_vec.length(axis='sample'), 3)
+
+ with self.assertRaises(UnknownAxisError):
+ self.mat1.length(axis='foo')
+
+ def test_shape(self):
+ """What kind of shape are you in?"""
+ npt.assert_array_equal(self.null1.shape, (0, 0))
+ npt.assert_array_equal(self.null2.shape, (0, 42))
+ npt.assert_array_equal(self.null3.shape, (42, 0))
+ npt.assert_array_equal(self.mat1.shape, (2, 3))
+ npt.assert_array_equal(self.empty.shape, (2, 2))
+ npt.assert_array_equal(self.row_vec.shape, (1, 3))
+ npt.assert_array_equal(self.col_vec.shape, (3, 1))
+ npt.assert_array_equal(self.single_ele.shape, (1, 1))
+
+ def test_dtype(self):
+ """What's your type?"""
+ for m in self.nulls:
+ self.assertEqual(m.dtype, None)
+
+ self.assertEqual(self.empty.dtype, float)
+ self.assertEqual(self.row_vec.dtype, float)
+
+ def test_nnz(self):
+ """What is your NNZ?"""
+ for m in self.nulls:
+ self.assertEqual(m.nnz, 0)
+
+ self.assertEqual(self.empty.nnz, 0)
+ self.assertEqual(self.single_ele.nnz, 1)
+ self.assertEqual(self.mat1.nnz, 4)
+ self.assertEqual(self.explicit_zeros.nnz, 4)
+
+ def test_get_row(self):
+ """Test grabbing a row from the matrix."""
+ # note that we only have to test the first two elements don't have that
+ # row according to the underlying scipy sparse matrix
+ for i in range(0, 2):
+ with self.assertRaises(IndexError):
+ self.nulls[i]._get_row(0)
+
+ exp = lil_matrix((1, 3))
+ exp[(0, 0)] = 1
+ exp[(0, 2)] = 2
+
+ obs = self.mat1._get_row(0)
+ self.assertEqual((obs != exp).sum(), 0)
+
+ def test_get_col(self):
+ """Test grabbing a column from the matrix."""
+ # note that we only have to test the first and last element, these
+ # don't have that column according to the underlying scipy sparse
+ # matrix
+ for i in [0, 2]:
+ with self.assertRaises(IndexError):
+ self.nulls[i]._get_col(0)
+
+ exp = lil_matrix((2, 1))
+ exp[(0, 0)] = 1
+ exp[(1, 0)] = 3
+
+ obs = self.mat1._get_col(0)
+ self.assertEqual((obs != exp).sum(), 0)
+
+ def test_eq(self):
+ """Test whether two matrices are equal."""
+ # Empty/null cases (i.e., 0x0, 0xn, nx0).
+ ids = lambda X: ['x%d' % e for e in range(0, X)]
+ a = Table(np.zeros((0, 0)), [], [])
+ b = Table(np.zeros((0, 42), dtype=float), [], ids(42))
+ c = Table(np.zeros((42, 0), dtype=float), ids(42), [])
+ d = Table(np.zeros((2, 2)), ids(2), ids(2))
+
+ self.assertTrue(self.null1 == a)
+ self.assertTrue(self.null2 == b)
+ self.assertTrue(self.null3 == c)
+ self.assertTrue(self.empty == d)
+
+ mat2 = Table(np.array([[1, 0, 2], [3, 0, 4]]),
+ ['o1', 'o2'], ['s1', 's2', 's3'])
+ self.assertTrue(self.mat1 == mat2)
+
+ mat2._data = mat2._data.tolil()
+ self.assertNotEqual(self.mat1._data.format, mat2._data.format)
+ self.assertEqual(self.mat1, mat2)
+
+ # Equality works in both directions.
+ self.assertEqual(mat2, self.mat1)
+
+ def test_ne(self):
+ """Test whether two matrices are not equal."""
+ # Wrong type.
+ self.assertTrue(self.null1 != np.array([]))
+
+ # Wrong shape.
+ ids = lambda X: ['x%d' % e for e in range(0, X)]
+ d = Table(np.ones((1, 1)), ids(1), ids(1))
+ self.assertTrue(self.null2 != self.null3)
+ self.assertTrue(self.empty != d)
+
+ # Wrong dtype.
+ d = Table(np.zeros((2, 2)), ids(2), ids(2), type=float)
+ self.assertTrue(self.empty != d)
+
+ # Wrong size.
+ wrong_size = Table(np.zeros((2, 2)), ids(2), ids(2))
+ self.assertTrue(self.empty == wrong_size)
+ wrong_size = Table(np.ones((1, 1)), ['c'], ['a'])
+ self.assertTrue(self.empty != wrong_size)
+
+ # Wrong size.
+ wrong_data = self.mat1.copy()
+ self.assertTrue(self.mat1 == wrong_data)
+ wrong_data = Table(np.array([[42, 0, 2], [3, 0, 4]]),
+ ['o1', 'o2'], ['s1', 's2', 's3'])
+ self.assertTrue(self.mat1 != wrong_data)
+ self.assertTrue(wrong_data != self.mat1)
+
+ def test_getitem_2(self):
+ """Test getting an element from the matrix."""
+ for m in self.nulls:
+ with self.assertRaises(IndexError):
+ m[0, 0]
+
+ with self.assertRaises(IndexError):
+ self.empty[0]
+
+ with self.assertRaises(IndexError):
+ self.empty[:, :]
+
+ with self.assertRaises(IndexError):
+ self.empty[0:1, 0]
+
+ with self.assertRaises(IndexError):
+ self.empty[0, 0:1]
+
+ exp = lil_matrix((2, 1))
+ obs = self.empty[:, 0]
+ self.assertEqual((obs != exp).sum(), 0)
+
+ # Extracting a column.
+ obs = self.mat1[:, 2]
+ self.assertEqual((obs != self.mat1._get_col(2)).sum(), 0)
+
+ # Extracting a row.
+ obs = self.mat1[1, :]
+ self.assertEqual((obs != self.mat1._get_row(1)).sum(), 0)
+
+ # Extracting a single element.
+ self.assertEqual(self.empty[1, 1], 0)
+ self.assertEqual(self.mat1[1, 2], 4)
+
+ with self.assertRaises(IndexError):
+ self.mat1[1, 3]
+
+
+class SparseTableTests(TestCase):
+
+ def setUp(self):
+ self.vals = {(0, 0): 5, (0, 1): 6, (1, 0): 7, (1, 1): 8}
+ self.st1 = Table(self.vals,
+ ['1', '2'], ['a', 'b'])
+ self.st2 = Table(self.vals,
+ ['1', '2'], ['a', 'b'])
+ self.vals3 = {(0, 0): 1, (0, 1): 2, (1, 0): 3, (1, 1): 4}
+ self.vals4 = {(0, 0): 1, (0, 1): 2, (1, 0): 3, (1, 1): 4}
+ self.st3 = Table(self.vals3, ['2', '3'], ['b', 'c'])
+ self.st4 = Table(self.vals4, ['3', '4'], ['c', 'd'])
+ self._to_dict_f = lambda x: sorted(x.items())
+ self.st_rich = Table(self.vals,
+ ['1', '2'], ['a', 'b'],
+ [{'taxonomy': ['k__a', 'p__b']},
+ {'taxonomy': ['k__a', 'p__c']}],
+ [{'barcode': 'aatt'}, {'barcode': 'ttgg'}])
+
+ self.empty_st = Table([], [], [])
+
+ self.vals5 = {(0, 1): 2, (1, 1): 4}
+ self.st5 = Table(self.vals5, ['5', '6'], ['a', 'b'])
+
+ self.vals6 = {(0, 0): 0, (0, 1): 0, (1, 0): 0, (1, 1): 0}
+ self.st6 = Table(self.vals6, ['5', '6'], ['a', 'b'])
+
+ self.vals7 = {(0, 0): 5, (0, 1): 7, (1, 0): 8, (1, 1): 0}
+ self.st7 = Table(self.vals7, ['5', '6'], ['a', 'b'])
+
+ self.single_sample_st = Table(
+ np.array([[2.0], [0.0], [1.0]]),
+ ['O1', 'O2', 'O3'], ['S1'])
+ self.single_obs_st = Table(np.array([[2.0, 0.0, 1.0]]),
+ ['01'], ['S1', 'S2', 'S3'])
+
+ self.sparse_table = Table(np.array([[1, 0, 2, 0],
+ [0, 3, 4, 0],
+ [0, 5, 0, 0]]),
+ ['O1', 'O2', 'O3'],
+ ['S1', 'S2', 'S3', 'S4'])
+
+ def test_sum(self):
+ """Test of sum!"""
+ self.assertEqual(self.st1.sum('whole'), 26)
+ npt.assert_equal(self.st1.sum('sample'), np.array([12, 14]))
+ npt.assert_equal(self.st1.sum('observation'), np.array([11, 15]))
+
+ exp = np.array([3.0])
+ obs = self.single_sample_st.sum('sample')
+ self.assertEqual(obs, exp)
+
+ exp = np.array([3.0])
+ obs = self.single_obs_st.sum('observation')
+ self.assertEqual(obs, exp)
+
+ def test_reduce(self):
+ """Reduce method"""
+ f = lambda x, y: x * 2 + y
+ npt.assert_equal(self.st1.reduce(f, 'sample'), np.array([17, 20]))
+ npt.assert_equal(self.st1.reduce(f, 'observation'), np.array([16, 22]))
+
+ def test_transpose(self):
+ """Should transpose a sparse table"""
+ obs = self.st1.transpose()
+
+ npt.assert_equal(obs.ids(), self.st1.ids(axis='observation'))
+ npt.assert_equal(obs.ids(axis='observation'), self.st1.ids())
+ npt.assert_equal(obs.data('1', 'sample'),
+ self.st1.data('1', 'observation'))
+ npt.assert_equal(obs.data('2', 'sample'),
+ self.st1.data('2', 'observation'))
+ self.assertEqual(obs.transpose(), self.st1)
+
+ obs = self.st_rich.transpose()
+
+ npt.assert_equal(obs.ids(), self.st_rich.ids(axis='observation'))
+ npt.assert_equal(obs.ids(axis='observation'), self.st_rich.ids())
+ self.assertEqual(obs._sample_metadata,
+ self.st_rich._observation_metadata)
+ self.assertEqual(obs._observation_metadata,
+ self.st_rich._sample_metadata)
+ npt.assert_equal(obs.data('1', 'sample'),
+ self.st_rich.data('1', 'observation'))
+ npt.assert_equal(obs.data('2', 'sample'),
+ self.st_rich.data('2', 'observation'))
+ self.assertEqual(obs.transpose(), self.st_rich)
+
+ def test_update_ids(self):
+ """ids are updated as expected"""
+ # update observation ids
+ exp = self.st1.copy()
+ exp._observation_ids = np.array(['41', '42'])
+ id_map = {'2': '42', '1': '41'}
+ obs = self.st1.update_ids(id_map, axis='observation', inplace=False)
+ self.assertEqual(obs, exp)
+
+ # update sample ids
+ exp = self.st1.copy()
+ exp._sample_ids = np.array(['99', '100'])
+ id_map = {'a': '99', 'b': '100'}
+ obs = self.st1.update_ids(id_map, axis='sample', inplace=False)
+ self.assertEqual(obs, exp)
+
+ # extra ids in id_map are ignored
+ exp = self.st1.copy()
+ exp._observation_ids = np.array(['41', '42'])
+ id_map = {'2': '42', '1': '41', '0': '40'}
+ obs = self.st1.update_ids(id_map, axis='observation', inplace=False)
+ self.assertEqual(obs, exp)
+
+ # missing ids in id_map when strict=True
+ with self.assertRaises(TableException):
+ self.st1.update_ids({'b': '100'}, axis='sample', strict=True,
+ inplace=False)
+
+ # missing ids in id_map when strict=False
+ exp = self.st1.copy()
+ exp._sample_ids = np.array(['a', '100'])
+ id_map = {'b': '100'}
+ obs = self.st1.update_ids(id_map, axis='sample', strict=False,
+ inplace=False)
+ self.assertEqual(obs, exp)
+
+ # raise an error if update would result in duplicated ids
+ with self.assertRaises(TableException):
+ self.st1.update_ids({'a': '100', 'b': '100'}, axis='sample',
+ inplace=False)
+
+ # raises an error if a invalid axis is passed
+ with self.assertRaises(UnknownAxisError):
+ self.st1.update_ids(id_map, axis='foo', inplace=False)
+
+ # when inplace == False, the input object is unchanged
+ exp = self.st1.copy()
+ exp._observation_ids = np.array(['41', '42'])
+ id_map = {'2': '42', '1': '41'}
+ obs = self.st1.update_ids(id_map, axis='observation', inplace=False)
+ npt.assert_equal(self.st1._observation_ids, np.array(['1', '2']))
+ # when inplace == True, the input object is changed
+ obs = self.st1.update_ids(id_map, axis='observation', inplace=True)
+ npt.assert_equal(self.st1._observation_ids, np.array(['41', '42']))
+
+ def test_update_ids_nochange_bug(self):
+ """ids are updated as expected"""
+ # update observation ids
+ exp = self.st1.copy()
+ id_map = {'1': '1', '2': '2'}
+ obs = self.st1.update_ids(id_map, axis='observation', inplace=False)
+ self.assertEqual(obs, exp)
+
+ # test having one ID remain unchanged
+ exp = self.st1.copy()
+ exp._observation_ids = np.array(['1', '3'])
+ id_map = {'1': '1', '2': '3'}
+ obs = self.st1.update_ids(id_map, axis='observation', inplace=False)
+ self.assertEqual(obs, exp)
+
+ def test_update_ids_cache_bug(self):
+ obs = self.st1.update_ids({'1': 'x', '2': 'y'}, axis='observation',
+ inplace=False)
+ exp_index = {'x': 0, 'y': 1}
+ self.assertEqual(obs._obs_index, exp_index)
+
+ obs = self.st1.update_ids({'a': 'x', 'b': 'y'}, inplace=False)
+ exp_index = {'x': 0, 'y': 1}
+ self.assertEqual(obs._sample_index, exp_index)
+
+ def test_sort_order(self):
+ """sorts tables by arbitrary order"""
+ # sort by observations arbitrary order
+ vals = {(0, 0): 7, (0, 1): 8, (1, 0): 5, (1, 1): 6}
+ exp = Table(vals, ['2', '1'], ['a', 'b'])
+ obs = self.st1.sort_order(['2', '1'], axis='observation')
+ self.assertEqual(obs, exp)
+ # sort by samples arbitrary order
+ vals = {(0, 0): 6, (0, 1): 5,
+ (1, 0): 8, (1, 1): 7}
+ exp = Table(vals, ['1', '2'], ['b', 'a'])
+ obs = self.st1.sort_order(['b', 'a'], axis='sample')
+ self.assertEqual(obs, exp)
+ # raises an error if a invalid axis is passed
+ with self.assertRaises(UnknownAxisError):
+ self.st1.sort_order(['b', 'a'], axis='foo')
+
+ def test_sort(self):
+ """table sorted by a function and provided axis"""
+ # sort by samples by a function
+ sort_f = sorted
+ data_in = nparray_to_sparse(
+ np.array([[1, 2, 3, 8], [4, 5, 6, 9], [7, 8, 9, 11]]))
+ t = Table(data_in, [2, 1, 3], ['c', 'a', 'b', 'd'])
+ exp_data = nparray_to_sparse(
+ np.array([[2, 3, 1, 8], [5, 6, 4, 9], [8, 9, 7, 11]]))
+ exp = Table(exp_data, [2, 1, 3], ['a', 'b', 'c', 'd'])
+ obs = t.sort(sort_f=sort_f)
+ self.assertEqual(obs, exp)
+ # sort by observation ids by a function
+ sort_f = sorted
+ data_in = nparray_to_sparse(
+ np.array([[1, 2, 3, 8], [4, 5, 6, 9], [7, 8, 9, 11]]), float)
+ t = Table(data_in, [2, 1, 3], ['c', 'a', 'b', 'd'])
+ exp_data = nparray_to_sparse(
+ np.array([[4, 5, 6, 9], [1, 2, 3, 8], [7, 8, 9, 11]]), float)
+ exp = Table(exp_data, [1, 2, 3], ['c', 'a', 'b', 'd'])
+ obs = t.sort(sort_f=sort_f, axis='observation')
+ self.assertEqual(obs, exp)
+ # raises an error if a invalid axis is passed
+ with self.assertRaises(UnknownAxisError):
+ t.sort(axis='foo')
+
+ def test_eq(self):
+ """sparse equality"""
+ self.assertTrue(self.st1 == self.st2)
+ self.st1._observation_ids = np.array(["1", "2", "3"], dtype=object)
+ self.assertFalse(self.st1 == self.st2)
+
+ self.st1._observation_ids = self.st2._observation_ids
+ self.st1._data = nparray_to_sparse(np.array([[1, 2], [10, 20]]))
+ self.assertFalse(self.st1 == self.st2)
+
+ def test_data_equality(self):
+ """check equality between tables"""
+ self.assertTrue(self.st1._data_equality(self.st2._data))
+ self.assertTrue(self.st1._data_equality(self.st1._data))
+ self.assertFalse(self.st1._data_equality(self.st3._data))
+
+ def test_nonzero(self):
+ """Return a list of nonzero positions"""
+ data = {(0, 0): 5, (0, 1): 6, (0, 2): 0, (0, 3): 3,
+ (1, 0): 0, (1, 1): 7, (1, 2): 0, (1, 3): 8,
+ (2, 0): 1, (2, 1): -1, (2, 2): 0, (2, 3): 0}
+ st = Table(data, ['1', '2', '3'], ['a', 'b', 'c', 'd'])
+ exp = [('1', 'a'), ('1', 'b'), ('1', 'd'), ('2', 'b'), ('2', 'd'),
+ ('3', 'a'), ('3', 'b')]
+ obs = list(st.nonzero())
+ self.assertEqual(obs, exp)
+
+ def test_nonzero_csc_bug(self):
+ data = {(0, 0): 5, (0, 1): 6, (0, 2): 0, (0, 3): 3,
+ (1, 0): 0, (1, 1): 7, (1, 2): 0, (1, 3): 8,
+ (2, 0): 1, (2, 1): -1, (2, 2): 0, (2, 3): 0}
+ st = Table(data, ['1', '2', '3'], ['a', 'b', 'c', 'd'])
+ st._data = st._data.tocsc()
+ exp = [('1', 'a'), ('1', 'b'), ('1', 'd'), ('2', 'b'), ('2', 'd'),
+ ('3', 'a'), ('3', 'b')]
+ obs = list(st.nonzero())
+ self.assertEqual(obs, exp)
+
+ def test_nonzero_counts(self):
+ """Returns nonzero counts over an axis"""
+ data = {(0, 0): 5, (0, 1): 6, (0, 2): 0, (0, 3): 3,
+ (1, 0): 0, (1, 1): 7, (1, 2): 0, (1, 3): 8,
+ (2, 0): 1, (2, 1): -1, (2, 2): 0, (2, 3): 0}
+ st = Table(data, ['1', '2', '3'], ['a', 'b', 'c', 'd'])
+
+ exp_samp = np.array([6, 12, 0, 11])
+ exp_obs = np.array([14, 15, 0])
+ exp_whole = np.array([29])
+
+ obs_samp = st.nonzero_counts('sample')
+ obs_obs = st.nonzero_counts('observation')
+ obs_whole = st.nonzero_counts('whole')
+
+ npt.assert_equal(obs_samp, exp_samp)
+ npt.assert_equal(obs_obs, exp_obs)
+ npt.assert_equal(obs_whole, exp_whole)
+
+ def test_nonzero_counts_binary(self):
+ """Returns nonzero counts over an axis"""
+ data = {(0, 0): 5, (0, 1): 6, (0, 2): 0, (0, 3): 3,
+ (1, 0): 0, (1, 1): 7, (1, 2): 0, (1, 3): 8,
+ (2, 0): 1, (2, 1): -1, (2, 2): 0, (2, 3): 0}
+ st = Table(data, ['1', '2', '3'], ['a', 'b', 'c', 'd'])
+
+ exp_samp = np.array([2, 3, 0, 2])
+ exp_obs = np.array([3, 2, 2])
+ exp_whole = np.array([7])
+
+ obs_samp = st.nonzero_counts('sample', binary=True)
+ obs_obs = st.nonzero_counts('observation', binary=True)
+ obs_whole = st.nonzero_counts('whole', binary=True)
+
+ npt.assert_equal(obs_samp, exp_samp)
+ npt.assert_equal(obs_obs, exp_obs)
+ npt.assert_equal(obs_whole, exp_whole)
+
+ def test_merge(self):
+ """Merge two tables"""
+ u = 'union'
+ i = 'intersection'
+
+ # test 1
+ data = {(0, 0): 10, (0, 1): 12, (1, 0): 14, (1, 1): 16}
+ exp = Table(data, ['1', '2'], ['a', 'b'])
+ obs = self.st1.merge(self.st1, sample=u, observation=u)
+ self.assertEqual(obs, exp)
+
+ # test 2
+ data = {(0, 0): 5, (0, 1): 6, (0, 2): 0, (1, 0): 7, (1, 1): 9,
+ (1, 2): 2, (2, 0): 0, (2, 1): 3, (2, 2): 4}
+ exp = Table(data, ['1', '2', '3'], ['a', 'b', 'c'])
+ obs = self.st1.merge(self.st3, sample=u, observation=u)
+ self.assertEqual(obs, exp)
+
+ # test 3
+ data = {(0, 0): 5, (0, 1): 6, (0, 2): 0, (0, 3): 0,
+ (1, 0): 7, (1, 1): 8, (1, 2): 0, (1, 3): 0,
+ (2, 0): 0, (2, 1): 0, (2, 2): 1, (2, 3): 2,
+ (3, 0): 0, (3, 1): 0, (3, 2): 3, (3, 3): 4}
+ exp = Table(data, ['1', '2', '3', '4'], ['a', 'b', 'c', 'd'])
+ obs = self.st1.merge(self.st4, sample=u, observation=u)
+ self.assertEqual(obs, exp)
+
+ # test 4
+ data = {(0, 0): 10, (0, 1): 12, (1, 0): 14, (1, 1): 16}
+ exp = Table(data, ['1', '2'], ['a', 'b'])
+ obs = self.st1.merge(self.st1, sample=i, observation=i)
+ self.assertEqual(obs, exp)
+
+ # test 5
+ exp = Table({(0, 0): 9}, ['2'], ['b'])
+ obs = self.st1.merge(self.st3, sample=i, observation=i)
+ self.assertEqual(obs, exp)
+
+ # test 6
+ self.assertRaises(TableException, self.st1.merge, self.st4, i, i)
+
+ # test 7
+ data = {(0, 0): 10, (0, 1): 12, (1, 0): 14, (1, 1): 16}
+ exp = Table(data, ['1', '2'], ['a', 'b'])
+ obs = self.st1.merge(self.st1, sample=i, observation=u)
+ self.assertEqual(obs, exp)
+
+ # test 8
+ data = {(0, 0): 6, (1, 0): 9, (2, 0): 3}
+ exp = Table(data, ['1', '2', '3'], ['b'])
+ obs = self.st1.merge(self.st3, sample=i, observation=u)
+ self.assertEqual(obs, exp)
+
+ # test 9
+ self.assertRaises(TableException, self.st1.merge, self.st4, i, u)
+
+ # test 10
+ data = {(0, 0): 10, (0, 1): 12, (1, 0): 14, (1, 1): 16}
+ exp = Table(data, ['1', '2'], ['a', 'b'])
+ obs = self.st1.merge(self.st1, sample=u, observation=i)
+ self.assertEqual(obs, exp)
+
+ # test 11
+ data = {(0, 0): 7, (0, 1): 9, (0, 2): 2}
+ exp = Table(data, ['2'], ['a', 'b', 'c'])
+ obs = self.st1.merge(self.st3, sample=u, observation=i)
+ self.assertEqual(obs, exp)
+
+ # test 12
+ self.assertRaises(TableException, self.st1.merge, self.st4, u, i)
+
+ def test_data(self):
+ """"""
+ # Returns observations for a given sample
+ exp = np.array([5, 7])
+ obs = self.st1.data('a', 'sample')
+ npt.assert_equal(obs, exp)
+ with self.assertRaises(UnknownIDError):
+ self.st1.data('asdasd', 'sample')
+
+ # Returns samples for a given observation
+ exp = np.array([5, 6])
+ obs = self.st1.data('1', 'observation')
+ npt.assert_equal(obs, exp)
+ with self.assertRaises(UnknownIDError):
+ self.st1.data('asdsad', 'observation')
+
+ # Raises an error with unknown axis
+ with self.assertRaises(UnknownAxisError):
+ obs = self.st1.data('a', axis='foo')
+
+ def test_data_sparse(self):
+ # Returns observations for a given sample
+ exp = csc_matrix(np.array([[5], [7]]))
+ obs = self.st1.data('a', 'sample', dense=False)
+ self.assertEqual((obs != exp).nnz, 0)
+ with self.assertRaises(UnknownIDError):
+ self.st1.data('asdasd', 'sample')
+
+ # Returns samples for a given observation
+ exp = csr_matrix(np.array([5, 6]))
+ obs = self.st1.data('1', 'observation', dense=False)
+ self.assertEqual((obs != exp).nnz, 0)
+ with self.assertRaises(UnknownIDError):
+ self.st1.data('asdsad', 'observation')
+
+ def test_delimited_self(self):
+ """Print out self in a delimited form"""
+ exp = '\n'.join(
+ ["# Constructed from biom file",
+ "#OTU ID\ta\tb",
+ "1\t5.0\t6.0",
+ "2\t7.0\t8.0"])
+ obs = self.st1.delimited_self()
+ self.assertEqual(obs, exp)
+
+ # Test observation_column_name.
+ exp = '\n'.join(
+ ["# Constructed from biom file",
+ "Taxon\ta\tb",
+ "1\t5.0\t6.0",
+ "2\t7.0\t8.0"])
+ obs = self.st1.delimited_self(observation_column_name='Taxon')
+ self.assertEqual(obs, exp)
+
+ def test_conv_to_self_type(self):
+ """Should convert other to sparse type"""
+ exp = lil_matrix((2, 2))
+ exp[(0, 0)] = 5
+ exp[(0, 1)] = 6
+ exp[(1, 0)] = 7
+ exp[(1, 1)] = 8
+ obs = self.st1._conv_to_self_type(self.vals)
+ self.assertEqual((obs != exp).sum(), 0)
+
+ exp = lil_matrix((2, 2))
+ exp[(0, 0)] = 5
+ exp[(0, 1)] = 7
+ exp[(1, 0)] = 6
+ exp[(1, 1)] = 8
+ obs = self.st1._conv_to_self_type(self.vals, transpose=True)
+ self.assertEqual((obs != exp).sum(), 0)
+
+ # passing a single vector
+ exp = lil_matrix((1, 3))
+ exp[(0, 0)] = 2
+ exp[(0, 1)] = 0
+ exp[(0, 2)] = 3
+ obs = self.st1._conv_to_self_type(np.array([2, 0, 3]))
+ self.assertEqual((obs != exp).sum(), 0)
+
+ # passing a list of dicts
+ exp = lil_matrix((2, 3))
+ exp[(0, 0)] = 5
+ exp[(0, 1)] = 6
+ exp[(0, 2)] = 7
+ exp[(1, 0)] = 8
+ exp[(1, 1)] = 9
+ exp[(1, 2)] = 10
+ obs = self.st1._conv_to_self_type([{(0, 0): 5, (0, 1): 6, (0, 2): 7},
+ {(1, 0): 8, (1, 1): 9, (1, 2): 10}])
+ self.assertEqual((obs != exp).sum(), 0)
+
+ def test_to_dense(self):
+ """Should convert a self styled vector to numpy type"""
+ input_row = lil_matrix((1, 3))
+ input_row[(0, 0)] = 10
+ exp = np.array([10.0, 0, 0])
+ obs = self.st1._to_dense(input_row)
+ npt.assert_equal(obs, exp)
+
+ input_col = lil_matrix((3, 1))
+ input_col[(0, 0)] = 12
+ exp = np.array([12.0, 0, 0])
+ obs = self.st1._to_dense(input_col)
+ npt.assert_equal(obs, exp)
+
+ # 1x1
+ input_vec = lil_matrix((1, 1))
+ input_vec[(0, 0)] = 42
+ exp = np.array([42.0])
+ obs = self.st1._to_dense(input_vec)
+ npt.assert_equal(obs, exp)
+
+ def test_iter_data_dense(self):
+ exp = [np.array([5, 7]), np.array([6, 8])]
+ obs = list(self.st1.iter_data())
+ npt.assert_equal(obs, exp)
+
+ def test_iter_data_sparse(self):
+ exp = [csr_matrix(np.array([5, 7])),
+ csr_matrix(np.array([6, 8]))]
+ obs = list(self.st1.iter_data(dense=False))
+ for o, e in zip(obs, exp):
+ self.assertTrue((o != e).nnz == 0)
+
+ def test_iter_pairwise_simple(self):
+ """Should iterate pairwise over samples"""
+ exp = [((np.array([5, 7]), 'a', None), (np.array([5, 7]), 'a', None)),
+ ((np.array([5, 7]), 'a', None), (np.array([6, 8]), 'b', None)),
+ ((np.array([6, 8]), 'b', None), (np.array([5, 7]), 'a', None)),
+ ((np.array([6, 8]), 'b', None), (np.array([6, 8]), 'b', None))]
+ obs = list(self.st1.iter_pairwise(dense=True, tri=False, diag=True))
+ npt.assert_equal(obs, exp)
+
+ def test_iter_pairwise_tri(self):
+ """Should iterate pairwise over samples"""
+ exp = [((np.array([5, 7]), 'a', None), (np.array([5, 7]), 'a', None)),
+ ((np.array([5, 7]), 'a', None), (np.array([6, 8]), 'b', None)),
+ ((np.array([6, 8]), 'b', None), (np.array([6, 8]), 'b', None))]
+ obs = list(self.st1.iter_pairwise(dense=True, tri=True, diag=True))
+ npt.assert_equal(obs, exp)
+
+ def test_iter_pairwise_tri_diag(self):
+ """Should iterate pairwise over samples"""
+ exp = [((np.array([5, 7]), 'a', None), (np.array([6, 8]), 'b', None))]
+ obs = list(self.st1.iter_pairwise(dense=True, tri=True, diag=False))
+ npt.assert_equal(obs, exp)
+
+ def test_iter_pairwise_diag(self):
+ """Should iterate pairwise over samples"""
+ exp = [((np.array([5, 7]), 'a', None), (np.array([6, 8]), 'b', None)),
+ ((np.array([6, 8]), 'b', None), (np.array([5, 7]), 'a', None))]
+ obs = list(self.st1.iter_pairwise(dense=True, tri=False, diag=False))
+ npt.assert_equal(obs, exp)
+
+ def test_iter(self):
+ """Should iterate over samples"""
+ exp = [(np.array([5, 7]), 'a', None), (np.array([6, 8]), 'b', None)]
+ obs = list(self.st1)
+ npt.assert_equal(obs, exp)
+
+ def test_iter_obs(self):
+ """Iterate over observations of sparse matrix"""
+ r1 = lil_matrix((1, 2))
+ r2 = lil_matrix((1, 2))
+ r1[(0, 0)] = 5
+ r1[(0, 1)] = 6
+ r2[(0, 0)] = 7
+ r2[(0, 1)] = 8
+
+ exp = [r1.tocsr(), r2.tocsr()]
+ obs = list(self.st1._iter_obs())
+
+ for o, e in zip(obs, exp):
+ self.assertEqual((o != e).sum(), 0)
+
+ def test_iter_samp(self):
+ """Iterate over samples of sparse matrix"""
+ c1 = lil_matrix((1, 2))
+ c2 = lil_matrix((1, 2))
+ c1[(0, 0)] = 5
+ c1[(0, 1)] = 7
+ c2[(0, 0)] = 6
+ c2[(0, 1)] = 8
+
+ exp = [c1.tocsc(), c2.tocsc()]
+ obs = list(self.st1._iter_samp())
+
+ for o, e in zip(obs, exp):
+ self.assertEqual((o != e).sum(), 0)
+
+ def test_iter_samples(self):
+ """Iterates samples"""
+ gen = self.st1.iter()
+ exp = [(np.array([5, 7]), 'a', None), (np.array([6, 8]), 'b', None)]
+ obs = list(gen)
+ npt.assert_equal(obs, exp)
+
+ gen = self.st_rich.iter()
+ exp = [(np.array([5, 7]), 'a', {'barcode': 'aatt'}),
+ (np.array([6, 8]), 'b', {'barcode': 'ttgg'})]
+ obs = list(gen)
+ npt.assert_equal(obs, exp)
+
+ # [[1,2,3],[1,0,2]] isn't yielding column 2 correctly
+ vals = {(0, 0): 5, (0, 1): 6, (1, 1): 8}
+ st = Table(vals, ['1', '2'], ['a', 'b'])
+ gen = st.iter(axis='sample')
+ exp = [(np.array([5, 0]), 'a', None), (np.array([6, 8]), 'b', None)]
+ obs = list(gen)
+ npt.assert_equal(obs, exp)
+
+ def test_iter_observations(self):
+ """Iterates observations"""
+ gen = self.st1.iter(axis='observation')
+ exp = [(np.array([5, 6]), '1', None), (np.array([7, 8]), '2', None)]
+ obs = list(gen)
+ npt.assert_equal(obs, exp)
+
+ gen = self.st_rich.iter(axis='observation')
+ exp = [(np.array([5, 6]), '1', {'taxonomy': ['k__a', 'p__b']}),
+ (np.array([7, 8]), '2', {'taxonomy': ['k__a', 'p__c']})]
+ obs = list(gen)
+ npt.assert_equal(obs, exp)
+
+ def test_iter_sample_data(self):
+ """Iterates data by samples"""
+ gen = self.st1.iter_data()
+ exp = [np.array([5, 7]), np.array([6, 8])]
+ obs = list(gen)
+ npt.assert_equal(obs, exp)
+
+ gen = self.st_rich.iter_data()
+ exp = [np.array([5, 7]), np.array([6, 8])]
+ obs = list(gen)
+ npt.assert_equal(obs, exp)
+
+ # [[1,2,3],[1,0,2]] isn't yielding column 2 correctly
+ vals = {(0, 0): 5, (0, 1): 6, (1, 1): 8}
+ st = Table(vals, ['1', '2'], ['a', 'b'])
+ gen = st.iter_data()
+ exp = [np.array([5, 0]), np.array([6, 8])]
+ obs = list(gen)
+ npt.assert_equal(obs, exp)
+
+ def test_iter_sample_data_single_obs(self):
+ """Iterates data by samples with a single observation."""
+ exp = [np.array([2.0]), np.array([0.0]), np.array([1.0])]
+ obs = list(self.single_obs_st.iter_data())
+ # We test this way to make sure the observed value is a single element
+ # array instead of a numpy scalar.
+ for o, e in zip(obs, exp):
+ self.assertEqual(o, e)
+
+ def test_iter_observation_data(self):
+ """Iterates data by observations"""
+ gen = self.st1.iter_data(axis="observation")
+ exp = [np.array([5, 6]), np.array([7, 8])]
+ obs = list(gen)
+ npt.assert_equal(obs, exp)
+
+ gen = self.st_rich.iter_data(axis="observation")
+ exp = [np.array([5, 6]), np.array([7, 8])]
+ obs = list(gen)
+ npt.assert_equal(obs, exp)
+
+ def test_iter_observation_data_single_sample(self):
+ """Iterates data by observations from a single sample."""
+ exp = [np.array([2.0]), np.array([0.0]), np.array([1.0])]
+ obs = list(self.single_sample_st.iter_data(axis="observation"))
+ for o, e in zip(obs, exp):
+ self.assertEqual(o, e)
+
+ def test_copy_metadata(self):
+ self.st_rich._sample_metadata[0]['foo'] = ['bar']
+ copied_table = self.st_rich.copy()
+ copied_table._sample_metadata[0]['foo'].append('bar2')
+ self.assertNotEqual(copied_table, self.st_rich)
+ self.st_rich._observation_metadata[0]['foo'] = ['bar']
+ copied_table = self.st_rich.copy()
+ copied_table._observation_metadata[0]['foo'].append('bar2')
+ self.assertNotEqual(copied_table, self.st_rich)
+
+ def test_copy_ids(self):
+ copied_table = self.st_rich.copy()
+ self.st_rich._sample_ids[0] = 'a different id'
+ self.assertNotEqual(copied_table, self.st_rich)
+ copied_table = self.st_rich.copy()
+ self.st_rich._observation_ids[0] = 'a different id'
+ self.assertNotEqual(copied_table, self.st_rich)
+
+ def test_copy_data(self):
+ copied_table = self.st_rich.copy()
+ self.st_rich._data *= 2
+ self.assertNotEqual(copied_table, self.st_rich)
+
+ def test_filter_table_with_zeros(self):
+ table = self.sparse_table
+ f_sample = lambda vals, id_, md: vals.size == table.shape[0]
+ f_obs = lambda vals, id_, md: vals.size == table.shape[1]
+
+ obs = table.filter(f_sample, inplace=False)
+ self.assertEqual(obs, table)
+
+ obs = table.filter(f_obs, 'observation', inplace=False)
+ self.assertEqual(obs, table)
+
+ f = lambda vals, id_, md: (np.all(vals == [1, 0, 0]) or
+ np.all(vals == [0, 0, 0]))
+ obs = table.filter(f, inplace=False)
+ exp = Table(np.array([[1, 0],
+ [0, 0],
+ [0, 0]]),
+ ['O1', 'O2', 'O3'],
+ ['S1', 'S4'])
+ self.assertEqual(obs, exp)
+
+ f = lambda vals, id_, md: (np.all(vals == [0, 3, 4, 0]) or
+ np.all(vals == [0, 5, 0, 0]))
+ obs = table.filter(f, 'observation', inplace=False)
+ exp = Table(np.array([[0, 3, 4, 0],
+ [0, 5, 0, 0]]),
+ ['O1', 'O2'],
+ ['S1', 'S2', 'S3', 'S4'])
+ self.assertNotEqual(obs, exp)
+
+ def test_filter_id_state(self):
+ f = lambda vals, id_, md: id_[0] == 'b'
+ filtered_table = self.st3.filter(f, inplace=False)
+ filtered_table_2 = self.st3.filter(f, inplace=True)
+ self.assertEqual(filtered_table._sample_index, {'b': 0})
+ self.assertEqual(filtered_table._obs_index, {'2': 0, '3': 1})
+ self.assertEqual(filtered_table_2._sample_index, {'b': 0})
+ self.assertEqual(filtered_table_2._obs_index, {'2': 0, '3': 1})
+
+ def test_filter_return_type(self):
+ f = lambda vals, id_, md: id_[0] == 'b'
+ filtered_table = self.st3.filter(f, inplace=False)
+ filtered_table_2 = self.st3.filter(f, inplace=True)
+ self.assertEqual(filtered_table, filtered_table_2)
+ self.assertTrue(filtered_table_2 is self.st3)
+
+ def test_filter_general_sample(self):
+ f = lambda vals, id_, md: id_ == 'a'
+
+ values = csr_matrix(np.array([[5.],
+ [7.]]))
+ exp_table = Table(values, ['1', '2'], ['a'],
+ [{'taxonomy': ['k__a', 'p__b']},
+ {'taxonomy': ['k__a', 'p__c']}],
+ [{'barcode': 'aatt'}])
+
+ table = self.st_rich
+ obs_table = table.filter(f, 'sample', inplace=False)
+ self.assertEqual(obs_table, exp_table)
+
+ f_2 = lambda vals, id_, md: np.all(vals == np.array([5, 7]))
+ obs_table_2 = table.filter(f_2, 'sample', inplace=False)
+ self.assertEqual(obs_table_2, exp_table)
+
+ def test_filter_general_observation(self):
+ f = lambda vals, id_, md: md['taxonomy'][1] == 'p__c'
+ values = csr_matrix(np.array([[7., 8.]]))
+ exp_table = Table(values, ['2'], ['a', 'b'],
+ [{'taxonomy': ['k__a', 'p__c']}],
+ [{'barcode': 'aatt'}, {'barcode': 'ttgg'}])
+ table = self.st_rich
+ obs_table = table.filter(f, 'observation', inplace=False)
+ self.assertEqual(obs_table, exp_table)
+
+ f_2 = lambda vals, id_, md: np.all(vals == np.array([7, 8]))
+ obs_table_2 = table.filter(f_2, 'observation', inplace=False)
+ self.assertEqual(obs_table_2, exp_table)
+
+ def test_filter_sample_id(self):
+ f = lambda vals, id_, md: id_ == 'a'
+
+ values = csr_matrix(np.array([[5.],
+ [7.]]))
+ exp_table = Table(values, ['1', '2'], ['a'],
+ [{'taxonomy': ['k__a', 'p__b']},
+ {'taxonomy': ['k__a', 'p__c']}],
+ [{'barcode': 'aatt'}])
+
+ table = self.st_rich
+ table.filter(f, 'sample')
+ self.assertEqual(table, exp_table)
+
+ def test_filter_sample_metadata(self):
+ f = lambda vals, id_, md: md['barcode'] == 'ttgg'
+ values = csr_matrix(np.array([[6.],
+ [8.]]))
+ exp_table = Table(values, ['1', '2'], ['b'],
+ [{'taxonomy': ['k__a', 'p__b']},
+ {'taxonomy': ['k__a', 'p__c']}],
+ [{'barcode': 'ttgg'}])
+ table = self.st_rich
+ table.filter(f, 'sample')
+ self.assertEqual(table, exp_table)
+
+ def test_filter_sample_invert(self):
+ f = lambda vals, id_, md: md['barcode'] == 'aatt'
+ values = csr_matrix(np.array([[6.],
+ [8.]]))
+ exp_table = Table(values, ['1', '2'], ['b'],
+ [{'taxonomy': ['k__a', 'p__b']},
+ {'taxonomy': ['k__a', 'p__c']}],
+ [{'barcode': 'ttgg'}])
+ table = self.st_rich
+ table.filter(f, 'sample', invert=True)
+ self.assertEqual(table, exp_table)
+
+ def test_filter_sample_remove_everything(self):
+ with errstate(empty='raise'), self.assertRaises(TableException):
+ self.st_rich.filter(lambda vals, id_, md: False, 'sample')
+
+ def test_filter_observations_id(self):
+ f = lambda vals, id_, md: id_ == '1'
+ values = csr_matrix(np.array([[5., 6.]]))
+ exp_table = Table(values, ['1'], ['a', 'b'],
+ [{'taxonomy': ['k__a', 'p__b']}],
+ [{'barcode': 'aatt'}, {'barcode': 'ttgg'}])
+ table = self.st_rich
+ table.filter(f, 'observation')
+ self.assertEqual(table, exp_table)
+
+ def test_filter_observations_metadata(self):
+ f = lambda vals, id_, md: md['taxonomy'][1] == 'p__c'
+ values = csr_matrix(np.array([[7., 8.]]))
+ exp_table = Table(values, ['2'], ['a', 'b'],
+ [{'taxonomy': ['k__a', 'p__c']}],
+ [{'barcode': 'aatt'}, {'barcode': 'ttgg'}])
+ table = self.st_rich
+ table.filter(f, 'observation')
+ self.assertEqual(table, exp_table)
+
+ def test_filter_observations_invert(self):
+ f = lambda vals, id_, md: md['taxonomy'][1] == 'p__c'
+ values = csr_matrix(np.array([[5., 6.]]))
+ exp_table = Table(values, ['1'], ['a', 'b'],
+ [{'taxonomy': ['k__a', 'p__b']}],
+ [{'barcode': 'aatt'}, {'barcode': 'ttgg'}])
+ table = self.st_rich
+ table.filter(f, 'observation', invert=True)
+ self.assertEqual(table, exp_table)
+
+ def test_filter_observations_remove_everything(self):
+ with errstate(empty='raise'), self.assertRaises(TableException):
+ self.st_rich.filter(lambda vals, id_, md: False, 'observation')
+
+ def test_subsample_by_id(self):
+ table = Table(np.array([[3, 1, 2], [0, 3, 4]]), ['O1', 'O2'],
+ ['S1', 'S2', 'S3'])
+ actual_o1 = set()
+ actual_o2 = set()
+ for i in range(100):
+ obs = table.subsample(2, by_id=True)
+ actual_o1.add(tuple(obs.data('O1', 'observation')))
+ actual_o2.add(tuple(obs.data('O2', 'observation')))
+ self.assertEqual(actual_o1, {(3, 1), (1, 2), (3, 2)})
+ self.assertEqual(actual_o2, {(0, 3), (3, 4), (0, 4)}),
+
+ def test_filter_using_list_of_ids(self):
+ ids = ['S1', 'S4']
+ obs = self.sparse_table.filter(ids, inplace=False)
+ exp = Table(np.array([[1, 0],
+ [0, 0],
+ [0, 0]]),
+ ['O1', 'O2', 'O3'],
+ ['S1', 'S4'])
+ self.assertEqual(obs, exp)
+
+ ids = ['O1', 'O2']
+ obs = self.sparse_table.filter(ids, 'observation', invert=True,
+ inplace=False)
+ exp = Table(np.array([[0, 5, 0, 0]]),
+ ['O3'],
+ ['S1', 'S2', 'S3', 'S4'])
+ self.assertEqual(obs, exp)
+
+ def test_filter_out_full_table(self):
+ t = Table(np.asarray([[1, 2, 3],
+ [4, 5, 6]]),
+ ['a', 'b'], ['c', 'd', 'e'])
+ t_sample = t.filter(ids_to_keep=[], axis='sample', inplace=False)
+ t_obs = t.filter(ids_to_keep=[], axis='observation', inplace=False)
+
+ self.assertEqual(t_sample.shape, (2, 0))
+ self.assertEqual(t_obs.shape, (0, 3))
+
+ def test_subsample(self):
+ table = Table(np.array([[0, 5, 0]]), ['O1'], ['S1', 'S2', 'S3'])
+
+ obs = table.subsample(5, axis='observation')
+ npt.assert_equal(obs.data('O1', 'observation'), np.array([5]))
+ self.assertEqual(obs.ids(), ['S2'])
+
+ table = Table(np.array([[3, 1, 1], [0, 3, 3]]), ['O1', 'O2'],
+ ['S1', 'S2', 'S3'])
+ actual_o1 = set()
+ actual_o2 = set()
+ for i in range(100):
+ obs = table.subsample(3)
+ actual_o1.add(tuple(obs.data('O1', 'observation')))
+ actual_o2.add(tuple(obs.data('O2', 'observation')))
+ self.assertEqual(actual_o1, {(3, 0, 0), (3, 1, 0), (3, 0, 1),
+ (3, 1, 1)})
+ self.assertEqual(actual_o2, {(0, 3, 3), (0, 2, 3), (0, 3, 2),
+ (0, 2, 2)})
+
+ def test_subsample_md_copy_bug(self):
+ """subsample would except when if metadata were present"""
+ table = Table(np.array([[5, 5, 5]]), ['O1'], ['S1', 'S2', 'S3'],
+ [{'foo': 'bar'}], [{1: 2}, {3: 4}, {5: 6}])
+ exp = table.copy()
+ obs = table.subsample(5)
+ self.assertEqual(obs, exp)
+
+ def test_pa(self):
+ exp = Table(np.array([[1, 1], [1, 0]]), ['5', '6'], ['a', 'b'])
+ self.st7.pa()
+ self.assertEqual(self.st7, exp)
+
+ def test_pa_with_neg(self):
+ t = Table(np.array([[-10, 7], [0, -0.1]]), ['5', '6'], ['a', 'b'])
+ exp = Table(np.array([[1, 1], [0, 1]]), ['5', '6'], ['a', 'b'])
+ t.pa()
+ self.assertEqual(t, exp)
+
+ def test_pa_works_if_something_has_been_zeroed(self):
+ exp = Table(np.array([[0, 1], [1, 0]]), ['5', '6'], ['a', 'b'])
+ self.st7._data[0, 0] = 0
+ self.st7.pa()
+ self.assertEqual(self.st7, exp)
+
+ def test_transform_return_type(self):
+ f = lambda data, id_, md: data / 2.
+ filtered_table = self.st3.transform(f, inplace=False)
+ filtered_table_2 = self.st3.transform(f, inplace=True)
+ self.assertEqual(filtered_table, filtered_table_2)
+ self.assertTrue(filtered_table_2 is self.st3)
+
+ def test_transform_observation(self):
+ """Transform axis by arbitrary function"""
+ # Transform observations by arbitrary function
+ def obs_transform_f(v, id, md):
+ return np.where(v >= 7, 1, 0)
+ sp_sd = {(0, 0): 0, (0, 1): 0, (1, 0): 1, (1, 1): 1}
+ exp = Table(sp_sd, ['1', '2'], ['a', 'b'])
+ self.st1.transform(obs_transform_f, axis='observation')
+ self.assertEqual(self.st1, exp)
+
+ def test_transform_sample(self):
+ # """Transform samples by arbitrary function"""
+ def sample_transform_f(v, id, md):
+ return np.where(v >= 6, 1, 0)
+
+ sp_sd = {(0, 0): 0, (0, 1): 1, (1, 0): 1, (1, 1): 1}
+ exp = Table(sp_sd, ['1', '2'], ['a', 'b'])
+ self.st1.transform(sample_transform_f)
+ self.assertEqual(self.st1, exp)
+
+ # Raises UnknownAxisError if a invalid axis is passed
+ with self.assertRaises(UnknownAxisError):
+ self.st1.transform(sample_transform_f, axis='foo')
+
+ def test_norm_observation_by_sample(self):
+ """normalize observations by sample"""
+ data = {(0, 0): 2, (0, 1): 0, (1, 0): 6, (1, 1): 1}
+ data_exp = {(0, 0): 0.25, (0, 1): 0.0, (1, 0): 0.75, (1, 1): 1.0}
+
+ st = Table(data, ['1', '2'], ['a', 'b'])
+ exp = Table(data_exp, ['1', '2'], ['a', 'b'])
+ st.norm()
+ self.assertEqual(st, exp)
+
+ def test_norm_sample_by_observation(self):
+ """normalize sample by observation"""
+ data = {(0, 0): 0, (0, 1): 2, (1, 0): 2, (1, 1): 6}
+ data_exp = {(0, 0): 0.0, (0, 1): 1.0, (1, 0): 0.25, (1, 1): 0.75}
+ st = Table(data, ['1', '2'], ['a', 'b'])
+ exp = Table(data_exp, ['1', '2'], ['a', 'b'])
+ st.norm(axis='observation')
+ self.assertEqual(st, exp)
+
+ def test_collapse_observations_by_metadata_one_to_many_strict(self):
+ """Collapse observations by arbitary metadata"""
+ dt_rich = Table(np.array([[5, 6, 7], [8, 9, 10], [11, 12, 13]]),
+ ['1', '2', '3'], ['a', 'b', 'c'],
+ [{'pathways': [['a', 'bx'], ['a', 'd']]},
+ {'pathways': [['a', 'bx'], ['a', 'c']]},
+ {'pathways': [['a']]}],
+ [{'barcode': 'aatt'},
+ {'barcode': 'ttgg'},
+ {'barcode': 'aatt'}])
+ exp_cat2 = Table(np.array([[13, 15, 17], [8, 9, 10], [5, 6, 7]]),
+ ['bx', 'c', 'd'], ['a', 'b', 'c'],
+ [{'Path': ['a', 'bx']},
+ {'Path': ['a', 'c']},
+ {'Path': ['a', 'd']}],
+ [{'barcode': 'aatt'},
+ {'barcode': 'ttgg'},
+ {'barcode': 'aatt'}])
+
+ def bin_f(id_, x):
+ for foo in x['pathways']:
+ yield (foo, foo[1])
+
+ obs_cat2 = dt_rich.collapse(
+ bin_f, norm=False, min_group_size=1, one_to_many=True,
+ strict=False, axis='observation').sort(axis='observation')
+ self.assertEqual(obs_cat2, exp_cat2)
+
+ with self.assertRaises(IndexError):
+ dt_rich.collapse(
+ bin_f, norm=False, min_group_size=1, one_to_many=True,
+ strict=True, axis='observation')
+
+ def test_collapse_observations_by_metadata_one_to_many(self):
+ """Collapse observations by arbitary metadata"""
+ dt_rich = Table(np.array([[5, 6, 7], [8, 9, 10], [11, 12, 13],
+ [14, 15, 16]]),
+ ['1', '2', '3', '4'], ['a', 'b', 'c'],
+ [{'pathways': [['a', 'bx'], ['a', 'd']]},
+ {'pathways': [['a', 'bx'], ['a', 'c']]},
+ {'pathways': [['a', 'c']]},
+ {'pathways': [['a', 'c']]}],
+ [{'barcode': 'aatt'},
+ {'barcode': 'ttgg'},
+ {'barcode': 'aatt'}])
+ exp_cat2 = Table(np.array([[13, 15, 17], [33, 36, 39], [5, 6, 7]]),
+ ['bx', 'c', 'd'], ['a', 'b', 'c'],
+ [{'Path': ['a', 'bx']},
+ {'Path': ['a', 'c']},
+ {'Path': ['a', 'd']}],
+ [{'barcode': 'aatt'},
+ {'barcode': 'ttgg'},
+ {'barcode': 'aatt'}])
+
+ def bin_f(id_, x):
+ for foo in x['pathways']:
+ yield (foo, foo[-1])
+
+ obs_cat2 = dt_rich.collapse(
+ bin_f, norm=False, min_group_size=1,
+ one_to_many=True, axis='observation').sort(axis='observation')
+ self.assertEqual(obs_cat2, exp_cat2)
+
+ dt_rich = Table(np.array([[5, 6, 7], [8, 9, 10], [11, 12, 13]]),
+ ['1', '2', '3'], ['a', 'b', 'c'],
+ [{'pathways': [['a', 'b'], ['a', 'd']]},
+ {'pathways': [['a', 'b'], ['a', 'c']]},
+ {'pathways': [['a', 'c']]}],
+ [{'barcode': 'aatt'},
+ {'barcode': 'ttgg'},
+ {'barcode': 'aatt'}])
+ exp_cat1 = Table(np.array([[37, 42, 47]]),
+ ['a'], ['a', 'b', 'c'],
+ [{'Path': ['a']}],
+ [{'barcode': 'aatt'},
+ {'barcode': 'ttgg'},
+ {'barcode': 'aatt'}])
+
+ def bin_f(id_, x):
+ for foo in x['pathways']:
+ yield (foo[:1], foo[0])
+
+ obs_cat1 = dt_rich.collapse(
+ bin_f, norm=False, min_group_size=1,
+ one_to_many=True, axis='observation').sort(axis='observation')
+ self.assertEqual(obs_cat1, exp_cat1)
+
+ # Test out include_collapsed_metadata=False.
+ exp = Table(np.array([[37, 42, 47]]),
+ ['a'], ['a', 'b', 'c'], None,
+ [{'barcode': 'aatt'},
+ {'barcode': 'ttgg'},
+ {'barcode': 'aatt'}])
+ obs = dt_rich.collapse(
+ bin_f, norm=False, min_group_size=1, one_to_many=True,
+ include_collapsed_metadata=False,
+ axis='observation').sort(axis='observation')
+ self.assertEqual(obs, exp)
+
+ # Test out constructor.
+ obs = dt_rich.collapse(
+ bin_f, norm=False, min_group_size=1, one_to_many=True,
+ include_collapsed_metadata=False,
+ axis='observation').sort(axis='observation')
+ self.assertEqual(obs, exp)
+ self.assertEqual(type(obs), Table)
+
+ def test_collapse_observations_by_metadata_one_to_many_divide(self):
+ """Collapse observations by 1-M metadata using divide mode"""
+ dt_rich = Table(np.array([[1, 6, 7], [8, 0, 10], [11, 12, 13]]),
+ ['1', '2', '3'],
+ ['a', 'b', 'c'],
+ [{'pathways': [['a', 'bx'], ['a', 'd']]},
+ {'pathways': [['a', 'bx'], ['a', 'c']]},
+ {'pathways': [['a', 'c']]}],
+ [{'barcode': 'aatt'},
+ {'barcode': 'ttgg'},
+ {'barcode': 'aatt'}])
+ exp = Table(np.array([[4.5, 3, 8.5], [15, 12, 18], [0.5, 3, 3.5]]),
+ ['bx', 'c', 'd'],
+ ['a', 'b', 'c'],
+ [{'Path': ['a', 'bx']},
+ {'Path': ['a', 'c']},
+ {'Path': ['a', 'd']}],
+ [{'barcode': 'aatt'},
+ {'barcode': 'ttgg'},
+ {'barcode': 'aatt'}])
+
+ def bin_f(id_, x):
+ for foo in x['pathways']:
+ yield (foo, foo[-1])
+
+ obs = dt_rich.collapse(
+ bin_f, norm=False, one_to_many=True,
+ one_to_many_mode='divide',
+ axis='observation').sort(axis='observation')
+ self.assertEqual(obs, exp)
+
+ # Test skipping some observation metadata (strict=False).
+ dt_rich = Table(
+ np.array([[5.0, 6.0, 7], [8, 9, 10], [11, 12, 13.0]]),
+ ['1', '2', '3'], ['a', 'b', 'c'],
+ [{'pathways': [['a', 'bx'], ['a', 'd']]},
+ {'pathways': [['a', 'bx'], ['a', 'c'], ['z']]},
+ {'pathways': [['a']]}],
+ [{'barcode': 'aatt'},
+ {'barcode': 'ttgg'},
+ {'barcode': 'aatt'}])
+ exp = Table(np.array([[6.5, 7.5, 8.5], [4, 4.5, 5], [2.5, 3, 3.5]]),
+ ['bx', 'c', 'd'], ['a', 'b', 'c'],
+ [{'Path': ['a', 'bx']},
+ {'Path': ['a', 'c']},
+ {'Path': ['a', 'd']}],
+ [{'barcode': 'aatt'},
+ {'barcode': 'ttgg'},
+ {'barcode': 'aatt'}])
+
+ def bin_f(id_, x):
+ for foo in x['pathways']:
+ yield (foo, foo[1])
+
+ obs = dt_rich.collapse(
+ bin_f, norm=False, one_to_many=True, one_to_many_mode='divide',
+ strict=False, axis='observation').sort(axis='observation')
+
+ self.assertEqual(obs, exp)
+
+ with self.assertRaises(IndexError):
+ dt_rich.collapse(
+ bin_f, norm=False, one_to_many=True, one_to_many_mode='divide',
+ strict=True, axis='observation')
+
+ # Invalid one_to_many_mode.
+ with self.assertRaises(ValueError):
+ dt_rich.collapse(
+ bin_f, norm=False, one_to_many=True, one_to_many_mode='foo',
+ axis='observation')
+
+ def test_collapse_median(self):
+ table = Table(
+ np.array([[5, 6, 7],
+ [1, 2, 3],
+ [8, 9, 10],
+ [1, 2.5, 1],
+ [11, 12, 13],
+ [2, 3, 10]]),
+ ['a', 'b', 'c', 'd', 'e', 'f'],
+ ['s1', 's2', 's3'])
+
+ # two partitions, (a, c, e) and (b, d, f)
+ partition_f = lambda id_, md: id_ in set(['b', 'd', 'f'])
+
+ def collapse_f(t, axis):
+ return np.array([np.median(v) for v in t.iter_data(dense=True)])
+
+ obs = table.collapse(partition_f, collapse_f, axis='observation',
+ norm=False)
+ exp = Table(np.array([[8, 9, 10], [1, 2.5, 3]]),
+ [False, True],
+ ['s1', 's2', 's3'],
+ [{'collapsed_ids': ['a', 'c', 'e']},
+ {'collapsed_ids': ['b', 'd', 'f']}])
+ self.assertEqual(obs, exp)
+
+ def test_collapse_observations_by_metadata(self):
+ """Collapse observations by arbitrary metadata"""
+ dt_rich = Table(
+ np.array([[5, 6, 7], [8, 9, 10], [11, 12, 13]]),
+ ['1', '2', '3'], ['a', 'b', 'c'],
+ [{'taxonomy': ['k__a', 'p__b']},
+ {'taxonomy': ['k__a', 'p__c']},
+ {'taxonomy': ['k__a', 'p__c']}],
+ [{'barcode': 'aatt'},
+ {'barcode': 'ttgg'},
+ {'barcode': 'aatt'}])
+ exp_phy = Table(np.array([[5, 6, 7], [19, 21, 23]]),
+ ['p__b', 'p__c'], ['a', 'b', 'c'],
+ [{'collapsed_ids': ['1']},
+ {'collapsed_ids': ['2', '3']}],
+ [{'barcode': 'aatt'},
+ {'barcode': 'ttgg'},
+ {'barcode': 'aatt'}])
+ bin_f = lambda id_, x: x['taxonomy'][1]
+ obs_phy = dt_rich.collapse(
+ bin_f, norm=False, min_group_size=1,
+ axis='observation').sort(axis='observation')
+ self.assertEqual(obs_phy, exp_phy)
+
+ exp_king = Table(np.array([[24, 27, 30]]),
+ ['k__a'], ['a', 'b', 'c'],
+ [{'collapsed_ids': ['1', '2', '3']}],
+ [{'barcode': 'aatt'},
+ {'barcode': 'ttgg'},
+ {'barcode': 'aatt'}])
+ bin_f = lambda id_, x: x['taxonomy'][0]
+ obs_king = dt_rich.collapse(bin_f, norm=False, axis='observation')
+ self.assertEqual(obs_king, exp_king)
+
+ with errstate(all='raise'), self.assertRaises(TableException):
+ dt_rich.collapse(bin_f, min_group_size=10, axis='observation')
+
+ # Test out include_collapsed_metadata=False.
+ exp = Table(np.array([[24, 27, 30]]),
+ ['k__a'],
+ ['a', 'b', 'c'], None,
+ [{'barcode': 'aatt'},
+ {'barcode': 'ttgg'},
+ {'barcode': 'aatt'}])
+ obs = dt_rich.collapse(bin_f, norm=False,
+ include_collapsed_metadata=False,
+ axis='observation')
+ self.assertEqual(obs, exp)
+
+ # Test out constructor.
+ obs = dt_rich.collapse(bin_f, norm=False,
+ include_collapsed_metadata=False,
+ axis='observation')
+ self.assertEqual(obs, exp)
+ self.assertEqual(type(obs), Table)
+
+ def test_collapse_samples_by_metadata(self):
+ """Collapse samples by arbitrary metadata"""
+ dt_rich = Table(
+ np.array([[5, 6, 7], [8, 9, 10], [11, 12, 13]]),
+ ['1', '2', '3'], ['a', 'b', 'c'],
+ [{'taxonomy': ['k__a', 'p__b']},
+ {'taxonomy': ['k__a', 'p__c']},
+ {'taxonomy': ['k__a', 'p__c']}],
+ [{'barcode': 'aatt'},
+ {'barcode': 'ttgg'},
+ {'barcode': 'aatt'}])
+ exp_bc = Table(
+ np.array([[12, 6], [18, 9], [24, 12]]),
+ ['1', '2', '3'], ['aatt', 'ttgg'],
+ [{'taxonomy': ['k__a', 'p__b']},
+ {'taxonomy': ['k__a', 'p__c']},
+ {'taxonomy': ['k__a', 'p__c']}],
+ [{'collapsed_ids': ['a', 'c']},
+ {'collapsed_ids': ['b']}])
+ bin_f = lambda id_, x: x['barcode']
+ obs_bc = dt_rich.collapse(
+ bin_f, norm=False, min_group_size=1,
+ axis='sample').sort(axis='sample')
+ self.assertEqual(obs_bc, exp_bc)
+
+ with errstate(all='raise'), self.assertRaises(TableException):
+ dt_rich.collapse(bin_f, min_group_size=10)
+
+ # Test out include_collapsed_metadata=False.
+ exp = Table(np.array([[12, 6], [18, 9], [24, 12]]),
+ ['1', '2', '3'],
+ ['aatt', 'ttgg'],
+ [{'taxonomy': ['k__a', 'p__b']},
+ {'taxonomy': ['k__a', 'p__c']},
+ {'taxonomy': ['k__a', 'p__c']}],
+ None)
+
+ obs = dt_rich.collapse(
+ bin_f, norm=False, min_group_size=1,
+ include_collapsed_metadata=False).sort(axis='sample')
+ self.assertEqual(obs, exp)
+
+ # Test out constructor.
+ obs = dt_rich.collapse(
+ bin_f, norm=False, min_group_size=1,
+ include_collapsed_metadata=False).sort(axis='sample')
+ self.assertEqual(obs, exp)
+ self.assertEqual(type(obs), Table)
+
+ def test_collapse_samples_by_metadata_one_to_many_strict(self):
+ """Collapse samples by arbitary metadata"""
+ dt_rich = Table(np.array([[5, 6, 7], [8, 9, 10], [11, 12, 13]]),
+ ['1', '2', '3'],
+ ['XXa', 'XXb', 'XXc'],
+ [{'other': 'aatt'},
+ {'other': 'ttgg'},
+ {'other': 'aatt'}],
+ [{'foo': [['a', 'b'], ['a', 'd']]},
+ {'foo': [['a', 'b'], ['a', 'c']]},
+ {'foo': [['a']]}])
+ exp_cat2 = Table(np.array([[11, 17, 23], [6, 9, 12], [5, 8, 11]]).T,
+ ['1', '2', '3'],
+ ['b', 'c', 'd'],
+ [{'other': 'aatt'},
+ {'other': 'ttgg'},
+ {'other': 'aatt'}],
+ [{'Path': ['a', 'b']},
+ {'Path': ['a', 'c']},
+ {'Path': ['a', 'd']}])
+
+ def bin_f(id_, x):
+ for foo in x['foo']:
+ yield (foo, foo[1])
+
+ obs_cat2 = dt_rich.collapse(
+ bin_f, norm=False, min_group_size=1, one_to_many=True,
+ strict=False).sort(axis='observation')
+ self.assertEqual(obs_cat2, exp_cat2)
+
+ self.assertRaises(IndexError, dt_rich.collapse, bin_f,
+ norm=False, min_group_size=1, one_to_many=True,
+ strict=True)
+
+ def test_collapse_samples_by_metadata_one_to_many_divide(self):
+ """Collapse samples by 1-M metadata using divide mode"""
+ dt_rich = Table(np.array([[1, 8, 11], [6, 0, 12], [7, 10, 13]]),
+ ['a', 'b', 'c'],
+ ['1', '2', '3'],
+ [{'barcode': 'aatt'},
+ {'barcode': 'ttgg'},
+ {'barcode': 'aatt'}],
+ [{'pathways': [['a', 'bx'], ['a', 'd']]},
+ {'pathways': [['a', 'bx'], ['a', 'c']]},
+ {'pathways': [['a', 'c']]}])
+ exp = Table(np.array([[4.5, 15, 0.5], [3, 12, 3], [8.5, 18, 3.5]]),
+ ['a', 'b', 'c'],
+ ['bx', 'c', 'd'],
+ [{'barcode': 'aatt'},
+ {'barcode': 'ttgg'},
+ {'barcode': 'aatt'}],
+ [{'Path': ['a', 'bx']},
+ {'Path': ['a', 'c']},
+ {'Path': ['a', 'd']}])
+
+ def bin_f(id_, x):
+ for foo in x['pathways']:
+ yield (foo, foo[-1])
+
+ obs = dt_rich.collapse(
+ bin_f, norm=False, one_to_many=True,
+ one_to_many_mode='divide').sort(axis='sample')
+ self.assertEqual(obs, exp)
+
+ # Test skipping some sample metadata (strict=False).
+ dt_rich = Table(np.array([[5.0, 8, 11], [6.0, 9, 12], [7, 10, 13.0]]),
+ ['a', 'b', 'c'],
+ ['1', '2', '3'],
+ [{'barcode': 'aatt'},
+ {'barcode': 'ttgg'},
+ {'barcode': 'aatt'}],
+ [{'pathways': [['a', 'bx'], ['a', 'd']]},
+ {'pathways': [['a', 'bx'], ['a', 'c'], ['z']]},
+ {'pathways': [['a']]}])
+ exp = Table(np.array([[6.5, 4, 2.5], [7.5, 4.5, 3], [8.5, 5, 3.5]]),
+ ['a', 'b', 'c'],
+ ['bx', 'c', 'd'],
+ [{'barcode': 'aatt'},
+ {'barcode': 'ttgg'},
+ {'barcode': 'aatt'}],
+ [{'Path': ['a', 'bx']},
+ {'Path': ['a', 'c']},
+ {'Path': ['a', 'd']}])
+
+ def bin_f(id_, x):
+ for foo in x['pathways']:
+ yield (foo, foo[1])
+
+ obs = dt_rich.collapse(
+ bin_f, norm=False, one_to_many=True, one_to_many_mode='divide',
+ strict=False).sort(axis='sample')
+
+ self.assertEqual(obs, exp)
+
+ with self.assertRaises(IndexError):
+ dt_rich.collapse(bin_f, norm=False,
+ one_to_many=True,
+ one_to_many_mode='divide',
+ strict=True)
+
+ # Invalid one_to_many_mode.
+ with self.assertRaises(ValueError):
+ dt_rich.collapse(bin_f, norm=False,
+ one_to_many=True,
+ one_to_many_mode='foo')
+
+ def test_collapse_samples_by_metadata_one_to_many(self):
+ """Collapse samples by arbitary metadata"""
+ dt_rich = Table(np.array([[5, 6, 7],
+ [8, 9, 10],
+ [11, 12, 13]]),
+ ['1', '2', '3'],
+ ['XXa', 'XXb', 'XXc'],
+ [{'other': 'aatt'},
+ {'other': 'ttgg'},
+ {'other': 'aatt'}],
+ [{'foo': [['a', 'b'], ['a', 'd']]},
+ {'foo': [['a', 'b'], ['a', 'c']]},
+ {'foo': [['a', 'c']]}])
+ exp_cat2 = Table(
+ np.array([[11, 17, 23], [13, 19, 25], [5, 8, 11]]).T,
+ ['1', '2', '3'],
+ ['b', 'c', 'd'],
+ [{'other': 'aatt'},
+ {'other': 'ttgg'},
+ {'other': 'aatt'}],
+ [{'Path': ['a', 'b']},
+ {'Path': ['a', 'c']},
+ {'Path': ['a', 'd']}])
+
+ def bin_f(id_, x):
+ for foo in x['foo']:
+ yield (foo, foo[-1])
+
+ obs_cat2 = dt_rich.collapse(
+ bin_f, norm=False, min_group_size=1,
+ one_to_many=True, axis='sample').sort(axis='observation')
+
+ self.assertEqual(obs_cat2, exp_cat2)
+
+ dt_rich = Table(
+ np.array([[5, 6, 7], [8, 9, 10], [11, 12, 13]]),
+ ['1', '2', '3'], ['a', 'b', 'c'],
+ [{'other': 'aatt'},
+ {'other': 'ttgg'},
+ {'other': 'aatt'}],
+ [{'foo': [['a', 'b'], ['a', 'd']]},
+ {'foo': [['a', 'b'], ['a', 'c']]},
+ {'foo': [['a', 'c']]}])
+ exp_cat1 = Table(np.array([[29, 44, 59]]).T,
+ ['1', '2', '3'], ['a'],
+ [{'other': 'aatt'},
+ {'other': 'ttgg'},
+ {'other': 'aatt'}],
+ [{'Path': ['a']}])
+
+ def bin_f(id_, x):
+ for foo in x['foo']:
+ yield (foo[:1], foo[0])
+
+ obs_cat1 = dt_rich.collapse(
+ bin_f, norm=False, min_group_size=1,
+ one_to_many=True, axis='sample').sort(axis='observation')
+ self.assertEqual(obs_cat1, exp_cat1)
+
+ # Test out include_collapsed_metadata=False.
+ exp = Table(np.array([[29, 44, 59]]).T,
+ ['1', '2', '3'],
+ ['a'],
+ [{'other': 'aatt'},
+ {'other': 'ttgg'},
+ {'other': 'aatt'}],
+ None)
+ obs = dt_rich.collapse(
+ bin_f, norm=False, min_group_size=1, one_to_many=True,
+ include_collapsed_metadata=False,
+ axis='sample').sort(axis='observation')
+ self.assertEqual(obs, exp)
+
+ # Test out constructor.
+ obs = dt_rich.collapse(bin_f, norm=False, min_group_size=1,
+ one_to_many=True,
+ include_collapsed_metadata=False,
+ axis='sample').sort(axis='observation')
+ self.assertEqual(obs, exp)
+ self.assertEqual(type(obs), Table)
+
+ def test_to_json_empty(self):
+ t = Table({}, [], [])
+ serialized = t.to_json('foo')
+ reloaded = Table.from_json(loads(serialized))
+ self.assertEqual(t, reloaded)
+
+ def test_to_json_dense_int(self):
+ """Get a BIOM format string for a dense table of integers"""
+ # check by round trip
+ obs_ids = list(map(str, range(5)))
+ samp_ids = list(map(str, range(10)))
+ obs_md = [{'foo': i} for i in range(5)]
+ samp_md = [{'bar': i} for i in range(10)]
+ data = np.reshape(np.arange(50), (5, 10))
+
+ # using Table type to support parsing round trip
+ t = Table(data, obs_ids, samp_ids, obs_md, samp_md)
+
+ # verify that we can parse still
+ t2 = parse_biom_table(StringIO(t.to_json('asd')))
+
+ # verify that the tables are the same
+ self.assertEqual(t, t2)
+
+ def test_to_json_dense_float(self):
+ """Get a BIOM format string for a dense table of floats"""
+ # check by round trip
+ obs_ids = ['a', 'b']
+ samp_ids = ['c', 'd']
+ obs_md = [{'foo': i} for i in range(2)]
+ samp_md = [{'bar': i} for i in range(2)]
+ data = np.array([[0.01, 1.5], [0.0, 0.79]])
+
+ # using OTUTable type to support parsing round trip
+ t = Table(data, obs_ids, samp_ids, obs_md, samp_md)
+
+ # verify that we can parse still
+ t2 = parse_biom_table(StringIO(t.to_json('asd')))
+
+ # verify that the tables are the same
+ self.assertEqual(t, t2)
+
+ def test_to_json_dense_int_directio(self):
+ """Get a BIOM format string for a dense table of integers"""
+ # check by round trip
+ obs_ids = list(map(str, range(5)))
+ samp_ids = list(map(str, range(10)))
+ obs_md = [{'foo': i} for i in range(5)]
+ samp_md = [{'bar': i} for i in range(10)]
+ data = np.reshape(np.arange(50), (5, 10))
+
+ # using OTUTable type to support parsing round trip
+ t = Table(data, obs_ids, samp_ids, obs_md, samp_md)
+
+ # verify that we can parse still
+ io = StringIO()
+ t.to_json('asd', direct_io=io)
+ io.seek(0)
+ t2 = parse_biom_table(io)
+
+ # verify that the tables are the same
+ self.assertEqual(t, t2)
+
+ def test_to_json_dense_float_directio(self):
+ """Get a BIOM format string for a dense table of floats"""
+ # check by round trip
+ obs_ids = ['a', 'b']
+ samp_ids = ['c', 'd']
+ obs_md = [{'foo': i} for i in range(2)]
+ samp_md = [{'bar': i} for i in range(2)]
+ data = np.array([[0.01, 1.5], [0.0, 0.79]])
+
+ # using OTUTable type to support parsing round trip
+ t = Table(data, obs_ids, samp_ids, obs_md, samp_md)
+
+ # verify that we can parse still
+ io = StringIO()
+ t.to_json('asd', direct_io=io)
+ io.seek(0)
+ t2 = parse_biom_table(io)
+
+ # verify that the tables are the same
+ self.assertEqual(t, t2)
+
+ def test_to_json_sparse_int(self):
+ """Get a BIOM format string for a sparse table of integers"""
+ # check by round trip
+ obs_ids = list(map(str, range(5)))
+ samp_ids = list(map(str, range(10)))
+ obs_md = [{'foo': i} for i in range(5)]
+ samp_md = [{'bar': i} for i in range(10)]
+ data = [[0, 0, 10], [1, 1, 11], [2, 2, 12], [3, 3, 13], [4, 4, 14],
+ [3, 5, 15], [2, 6, 16], [1, 7, 18], [0, 8, 19], [1, 9, 20]]
+
+ # using OTUTable type to support parsing round trip
+ t = Table(data, obs_ids, samp_ids, obs_md, samp_md, obs_md)
+
+ # verify that we can parse still
+ t2 = parse_biom_table(StringIO(t.to_json('asd')))
+
+ # verify that the tables are the same
+ self.assertEqual(t, t2)
+
+ def test_to_json_sparse_float(self):
+ """Get a BIOM format string for a sparse table of floats"""
+ # check by round trip
+ obs_ids = ['a', 'b']
+ samp_ids = ['c', 'd']
+ obs_md = [{'foo': i} for i in range(2)]
+ samp_md = [{'bar': i} for i in range(2)]
+ data = [[0, 0, 0.01], [0, 1, 1.5], [1, 0, 0.0], [1, 1, 0.79]]
+
+ # using OTUTable type to support parsing round trip
+ t = Table(data, obs_ids, samp_ids, obs_md, samp_md, obs_md)
+
+ # verify that we can parse still
+ t2 = parse_biom_table(StringIO(t.to_json('asd')))
+
+ # verify that the tables are the same
+ self.assertEqual(t, t2)
+
+ def test_to_json_sparse_int_directio(self):
+ """Get a BIOM format string for a sparse table of integers"""
+ # check by round trip
+ obs_ids = list(map(str, range(5)))
+ samp_ids = list(map(str, range(10)))
+ obs_md = [{'foo': i} for i in range(5)]
+ samp_md = [{'bar': i} for i in range(10)]
+ data = [[0, 0, 10], [1, 1, 11], [2, 2, 12], [3, 3, 13], [4, 4, 14],
+ [3, 5, 15], [2, 6, 16], [1, 7, 18], [0, 8, 19], [1, 9, 20]]
+
+ # using OTUTable type to support parsing round trip
+ t = Table(data, obs_ids, samp_ids, obs_md, samp_md, obs_md)
+
+ # verify that we can parse still
+ io = StringIO()
+ t.to_json('asd', direct_io=io)
+ io.seek(0)
+ t2 = parse_biom_table(io)
+
+ # verify that the tables are the same
+ self.assertEqual(t, t2)
+
+ def test_to_json_sparse_float_directio(self):
+ """Get a BIOM format string for a sparse table of floats"""
+ # check by round trip
+ obs_ids = ['a', 'b']
+ samp_ids = ['c', 'd']
+ obs_md = [{'foo': i} for i in range(2)]
+ samp_md = [{'bar': i} for i in range(2)]
+ data = [[0, 0, 0.01], [0, 1, 1.5], [1, 0, 0.0], [1, 1, 0.79]]
+
+ # using OTUTable type to support parsing round trip
+ t = Table(data, obs_ids, samp_ids, obs_md, samp_md)
+
+ # verify that we can parse still
+ io = StringIO()
+ t.to_json('asd', direct_io=io)
+ io.seek(0)
+ t2 = parse_biom_table(io)
+
+ # verify that the tables are the same
+ self.assertEqual(t, t2)
+
+ def test_extract_data_from_tsv(self):
+ """Parses a classic table
+
+ This method is ported from QIIME (http://www.qiime.org). QIIME is a GPL
+ project, but we obtained permission from the authors of this method to
+ port it to the BIOM Format project (and keep it under BIOM's BSD
+ license).
+ """
+ input = legacy_otu_table1.splitlines()
+ samp_ids = ['Fing', 'Key', 'NA']
+ obs_ids = ['0', '1', '7', '3', '4']
+ metadata = [
+ 'Bacteria; Actinobacteria; Actinobacteridae; Propionibacterineae; '
+ 'Propionibacterium',
+ 'Bacteria; Firmicutes; Alicyclobacillaceae; Bacilli; Lactobacillal'
+ 'es; Lactobacillales; Streptococcaceae; Streptococcus',
+ 'Bacteria; Actinobacteria; Actinobacteridae; Gordoniaceae; Coryneb'
+ 'acteriaceae',
+ 'Bacteria; Firmicutes; Alicyclobacillaceae; Bacilli; Staphylococca'
+ 'ceae',
+ 'Bacteria; Cyanobacteria; Chloroplasts; vectors']
+ md_name = 'Consensus Lineage'
+ data = [[0, 0, 19111], [0, 1, 44536], [0, 2, 42],
+ [1, 0, 1216], [1, 1, 3500], [1, 2, 6],
+ [2, 0, 1803], [2, 1, 1184], [2, 2, 2],
+ [3, 0, 1722], [3, 1, 4903], [3, 2, 17],
+ [4, 0, 589], [4, 1, 2074], [4, 2, 34]]
+
+ exp = (samp_ids, obs_ids, data, metadata, md_name)
+ obs = Table._extract_data_from_tsv(input, dtype=int)
+ npt.assert_equal(obs, exp)
+
+ def test_bin_samples_by_metadata(self):
+ """Yield tables binned by sample metadata"""
+ f = lambda id_, md: md.get('age', np.inf)
+ obs_ids = ['a', 'b', 'c', 'd']
+ samp_ids = ['1', '2', '3', '4']
+ data = {(0, 0): 1, (0, 1): 2, (0, 2): 3, (0, 3): 4,
+ (1, 0): 5, (1, 1): 6, (1, 2): 7, (1, 3): 8,
+ (2, 0): 8, (2, 1): 9, (2, 2): 10, (2, 3): 11,
+ (3, 0): 12, (3, 1): 13, (3, 2): 14, (3, 3): 15}
+ obs_md = [{}, {}, {}, {}]
+ samp_md = [{'age': 2, 'foo': 10}, {'age': 4}, {'age': 2, 'bar': 5}, {}]
+ t = Table(data, obs_ids, samp_ids, obs_md, samp_md)
+ obs_bins, obs_tables = unzip(t.partition(f))
+
+ exp_bins = (2, 4, np.inf)
+ exp1_data = {(0, 0): 1, (0, 1): 3, (1, 0): 5, (1, 1): 7, (2, 0): 8,
+ (2, 1): 10, (3, 0): 12, (3, 1): 14}
+ exp1_obs_ids = ['a', 'b', 'c', 'd']
+ exp1_samp_ids = ['1', '3']
+ exp1_obs_md = [{}, {}, {}, {}]
+ exp1_samp_md = [{'age': 2, 'foo': 10}, {'age': 2, 'bar': 5}]
+ exp1 = Table(exp1_data, exp1_obs_ids, exp1_samp_ids, exp1_obs_md,
+ exp1_samp_md)
+ exp2_data = {(0, 0): 2, (1, 0): 6, (2, 0): 9, (3, 0): 13}
+ exp2_obs_ids = ['a', 'b', 'c', 'd']
+ exp2_samp_ids = ['2']
+ exp2_obs_md = [{}, {}, {}, {}]
+ exp2_samp_md = [{'age': 4}]
+ exp2 = Table(exp2_data, exp2_obs_ids, exp2_samp_ids, exp2_obs_md,
+ exp2_samp_md)
+ exp3_data = {(0, 0): 4, (1, 0): 8, (2, 0): 11, (3, 0): 15}
+ exp3_obs_ids = ['a', 'b', 'c', 'd']
+ exp3_samp_ids = ['4']
+ exp3_obs_md = [{}, {}, {}, {}]
+ exp3_samp_md = [{}]
+ exp3 = Table(exp3_data, exp3_obs_ids, exp3_samp_ids, exp3_obs_md,
+ exp3_samp_md)
+ exp_tables = (exp1, exp2, exp3)
+
+ exp1_idx = obs_bins.index(exp_bins[0])
+ exp2_idx = obs_bins.index(exp_bins[1])
+ exp3_idx = obs_bins.index(exp_bins[2])
+ obs_sort = (obs_bins[exp1_idx], obs_bins[exp2_idx], obs_bins[exp3_idx])
+ self.assertEqual(obs_sort, exp_bins)
+ obs_sort = (obs_tables[exp1_idx], obs_tables[exp2_idx],
+ obs_tables[exp3_idx])
+
+ self.assertEqual(obs_sort, exp_tables)
+
+ # We should get the same table type back.
+ exp_types = (Table, Table, Table)
+ obs_sort = (type(obs_tables[exp1_idx]), type(obs_tables[exp2_idx]),
+ type(obs_tables[exp3_idx]))
+ self.assertEqual(obs_sort, exp_types)
+
+ # Test passing a different constructor. We should get the same data
+ # equality, but different table types.
+ obs_bins, obs_tables = unzip(t.partition(f))
+
+ obs_sort = (obs_bins[exp1_idx], obs_bins[exp2_idx], obs_bins[exp3_idx])
+ self.assertEqual(obs_sort, exp_bins)
+ obs_sort = (obs_tables[exp1_idx], obs_tables[exp2_idx],
+ obs_tables[exp3_idx])
+ self.assertEqual(obs_sort, exp_tables)
+ exp_types = (Table, Table, Table)
+ obs_sort = (type(obs_tables[exp1_idx]), type(obs_tables[exp2_idx]),
+ type(obs_tables[exp3_idx]))
+ self.assertEqual(obs_sort, exp_types)
+
+ def test_bin_observations_by_metadata(self):
+ """Yield tables binned by observation metadata"""
+ def make_level_f(level):
+ def f(id_, metadata):
+ return metadata['taxonomy'][:level]
+ return f
+
+ func_king = make_level_f(1)
+ func_phy = make_level_f(2)
+
+ obs_ids = ['a', 'b', 'c']
+ samp_ids = [1, 2, 3]
+ data = {(0, 0): 1, (0, 1): 2, (0, 2): 3,
+ (1, 0): 4, (1, 1): 5, (1, 2): 6,
+ (2, 0): 7, (2, 1): 8, (2, 2): 9}
+ obs_md = [{"taxonomy": ['k__a', 'p__b', 'c__c']},
+ {"taxonomy": ['k__a', 'p__b', 'c__d']},
+ {"taxonomy": ['k__a', 'p__c', 'c__e']}]
+ t = Table(data, obs_ids, samp_ids, observation_metadata=obs_md)
+
+ exp_king_obs_ids = ['a', 'b', 'c']
+ exp_king_samp_ids = [1, 2, 3]
+ exp_king_obs_md = [{"taxonomy": ['k__a', 'p__b', 'c__c']},
+ {"taxonomy": ['k__a', 'p__b', 'c__d']},
+ {"taxonomy": ['k__a', 'p__c', 'c__e']}]
+ exp_king = Table(data, exp_king_obs_ids, exp_king_samp_ids,
+ observation_metadata=exp_king_obs_md)
+ obs_bins, obs_king = unzip(t.partition(func_king, axis='observation'))
+
+ self.assertEqual(obs_king, [exp_king])
+ self.assertEqual(obs_bins, [tuple(['k__a'])])
+ self.assertEqual(type(obs_king[0]), type(exp_king))
+
+ obs_bins, obs_king = unzip(t.partition(func_king, axis='observation'))
+ self.assertEqual(obs_king, [exp_king])
+ self.assertEqual(obs_bins, [tuple(['k__a'])])
+ self.assertEqual(type(obs_king[0]), Table)
+
+ exp_phy1_obs_ids = ['a', 'b']
+ exp_phy1_samp_ids = [1, 2, 3]
+ exp_phy1_data = np.array([[1, 2, 3], [4, 5, 6]])
+ exp_phy1_data = {(0, 0): 1, (0, 1): 2, (0, 2): 3,
+ (1, 0): 4, (1, 1): 5, (1, 2): 6}
+ exp_phy1_obs_md = [{"taxonomy": ['k__a', 'p__b', 'c__c']},
+ {"taxonomy": ['k__a', 'p__b', 'c__d']}]
+ exp_phy1 = Table(exp_phy1_data, exp_phy1_obs_ids, exp_phy1_samp_ids,
+ observation_metadata=exp_phy1_obs_md)
+ exp_phy2_obs_ids = ['c']
+ exp_phy2_samp_ids = [1, 2, 3]
+ exp_phy2_data = {(0, 0): 7, (0, 1): 8, (0, 2): 9}
+ exp_phy2_obs_md = [{"taxonomy": ['k__a', 'p__c', 'c__e']}]
+ exp_phy2 = Table(exp_phy2_data, exp_phy2_obs_ids, exp_phy2_samp_ids,
+ observation_metadata=exp_phy2_obs_md)
+ obs_bins, obs_phy = unzip(t.partition(func_phy, axis='observation'))
+ self.assertIn(obs_phy[0], [exp_phy1, exp_phy2])
+ self.assertIn(obs_phy[1], [exp_phy1, exp_phy2])
+ self.assertIn(obs_bins[0], [('k__a', 'p__b'), ('k__a', 'p__c')])
+ self.assertIn(obs_bins[1], [('k__a', 'p__b'), ('k__a', 'p__c')])
+
+ def test_get_table_density(self):
+ """Test correctly computes density of table."""
+ # Perfectly dense tables.
+ npt.assert_almost_equal(self.st1.get_table_density(), 1.0)
+ npt.assert_almost_equal(self.st3.get_table_density(), 1.0)
+ npt.assert_almost_equal(self.st_rich.get_table_density(), 1.0)
+
+ # Empty table (no dimensions).
+ npt.assert_almost_equal(self.empty_st.get_table_density(), 0.0)
+
+ # Tables with some zeros.
+ npt.assert_almost_equal(self.st5.get_table_density(), 0.5)
+
+ # Tables with all zeros (with dimensions).
+ npt.assert_almost_equal(self.st6.get_table_density(), 0.0)
+
+ # Tables with some zeros explicitly defined.
+ npt.assert_almost_equal(self.st7.get_table_density(), 0.75)
+
+
+class SupportTests2(TestCase):
+
+ def test_coo_arrays_to_sparse(self):
+ """convert (values, (row, col)) to scipy"""
+ n_rows, n_cols = 3, 4
+ exp_d = lil_matrix((n_rows, n_cols))
+ exp_d[(0, 0)] = 10
+ exp_d[(1, 3)] = 5
+ exp_d[(2, 1)] = 2
+ exp_d = exp_d.tocoo()
+ exp = lil_matrix((n_rows, n_cols))
+ exp[(0, 0)] = 10
+ exp[(1, 3)] = 5
+ exp[(2, 1)] = 2
+ data = (np.array([5.0, 2.0, 10.0]),
+ (np.array([1, 2, 0]),
+ np.array([3, 1, 0])))
+ obs = coo_arrays_to_sparse(data, shape=(n_rows, n_cols))
+ self.assertEqual((obs != exp).sum(), 0)
+
+ def test_list_list_to_sparse(self):
+ """convert [[row,col,value], ...] to scipy"""
+ input = [[0, 0, 1], [1, 1, 5.0], [0, 2, 6]]
+ exp = lil_matrix((2, 3))
+ exp[(0, 0)] = 1.0
+ exp[(1, 1)] = 5.0
+ exp[(0, 2)] = 6
+ obs = list_list_to_sparse(input)
+ self.assertEqual((obs != exp).sum(), 0)
+
+ def test_nparray_to_sparse(self):
+ """Convert nparray to sparse"""
+ input = np.array([[1, 2, 3, 4], [-1, 6, 7, 8], [9, 10, 11, 12]])
+ exp = lil_matrix((3, 4))
+ exp[(0, 0)] = 1
+ exp[(0, 1)] = 2
+ exp[(0, 2)] = 3
+ exp[(0, 3)] = 4
+ exp[(1, 0)] = -1
+ exp[(1, 1)] = 6
+ exp[(1, 2)] = 7
+ exp[(1, 3)] = 8
+ exp[(2, 0)] = 9
+ exp[(2, 1)] = 10
+ exp[(2, 2)] = 11
+ exp[(2, 3)] = 12
+ obs = nparray_to_sparse(input)
+ self.assertEqual((obs != exp).sum(), 0)
+
+ def test_list_dict_to_sparse(self):
+ """Take a list of dicts and condense down to a single dict"""
+ input = [{(0, 0): 10, (0, 1): 2}, {(1, 2): 15}, {(0, 3): 7}]
+ exp = lil_matrix((3, 4))
+ exp[(0, 0)] = 10
+ exp[(0, 1)] = 2
+ exp[(1, 2)] = 15
+ exp[(2, 3)] = 7
+ obs = list_dict_to_sparse(input)
+ self.assertEqual((obs != exp).sum(), 0)
+
+ def test_dict_to_sparse(self):
+ """Take a dict and convert to sparse"""
+ input = {(0, 1): 5, (1, 0): 2, (2, 1): 6}
+ exp = lil_matrix((3, 2))
+ exp[(0, 1)] = 5
+ exp[(1, 0)] = 2
+ exp[(2, 1)] = 6
+ obs = dict_to_sparse(input)
+ self.assertEqual((obs != exp).sum(), 0)
+
+ def test_to_sparse(self):
+ """Convert to expected sparse types"""
+ vals = {(0, 0): 5, (0, 1): 6, (1, 0): 7, (1, 1): 8}
+ obs = Table._to_sparse(vals)
+ exp = lil_matrix((2, 2))
+ exp[(0, 0)] = 5
+ exp[(0, 1)] = 6
+ exp[(1, 0)] = 7
+ exp[(1, 1)] = 8
+ self.assertEqual((obs != exp).sum(), 0)
+
+ input = {(0, 1): 5, (10, 8): -1.23}
+ input_transpose = {(1, 0): 5, (8, 10): -1.23}
+
+ exp = lil_matrix((11, 9))
+ exp[(0, 1)] = 5
+ exp[(10, 8)] = -1.23
+ obs = Table._to_sparse(input)
+ self.assertEqual((obs != exp).sum(), 0)
+
+ # test transpose
+ exp = lil_matrix((9, 11))
+ exp[(1, 0)] = 5
+ exp[(8, 10)] = -1.23
+ obs = Table._to_sparse(input_transpose)
+ self.assertEqual((obs != exp).sum(), 0)
+
+ # passing a list of dicts, transpose
+ exp = lil_matrix((3, 2))
+ exp[(0, 0)] = 5.0
+ exp[(1, 0)] = 6.0
+ exp[(2, 0)] = 7.0
+ exp[(0, 1)] = 8.0
+ exp[(1, 1)] = 9.0
+ exp[(2, 1)] = 10.0
+ obs = Table._to_sparse([{(0, 0): 5, (1, 0): 6, (2, 0): 7},
+ {(0, 1): 8, (1, 1): 9, (2, 1): 10}])
+ self.assertEqual((obs != exp).sum(), 0)
+
+ # passing a list of lil_matrix
+ exp = lil_matrix((2, 3))
+ exp[(0, 0)] = 5
+ exp[(0, 1)] = 6
+ exp[(0, 2)] = 7
+ exp[(1, 0)] = 8
+ exp[(1, 1)] = 9
+ exp[(1, 2)] = 10
+ row1 = lil_matrix((1, 3))
+ row1[(0, 0)] = 5
+ row1[(0, 1)] = 6
+ row1[(0, 2)] = 7
+ row2 = lil_matrix((1, 3))
+ row2[(0, 0)] = 8
+ row2[(0, 1)] = 9
+ row2[(0, 2)] = 10
+ obs = Table._to_sparse([row1, row2])
+ self.assertEqual((obs != exp).sum(), 0)
+
+ # test empty set
+ exp = lil_matrix((0, 0))
+ obs = Table._to_sparse([])
+ self.assertEqual((obs != exp).sum(), 0)
+
+ def test_list_nparray_to_sparse(self):
+ """lists of nparrays to sparse"""
+ ins = [np.array([0, 2, 1, 0]), np.array([1, 0, 0, 1])]
+ exp = lil_matrix((2, 4))
+ exp[(0, 1)] = 2
+ exp[(0, 2)] = 1
+ exp[(1, 0)] = 1
+ exp[(1, 3)] = 1
+ obs = list_nparray_to_sparse(ins)
+ self.assertEqual((obs != exp).sum(), 0)
+
+ def test_list_sparse_to_sparse(self):
+ """list of lil_matrix to sparse"""
+ ins = [lil_matrix((1, 4)), lil_matrix((1, 4))]
+ ins[0][0, 0] = 5
+ ins[0][0, 1] = 10
+ ins[1][0, 2] = 1
+ ins[1][0, 3] = 2
+ exp = lil_matrix((2, 4))
+ exp[0, 0] = 5
+ exp[0, 1] = 10
+ exp[1, 2] = 1
+ exp[1, 3] = 2
+ obs = list_sparse_to_sparse(ins)
+ self.assertEqual((obs != exp).sum(), 0)
+
+legacy_otu_table1 = u"""# some comment goes here
+#OTU id\tFing\tKey\tNA\tConsensus Lineage
+0\t19111\t44536\t42 \tBacteria; Actinobacteria; Actinobacteridae; Propioniba\
+cterineae; Propionibacterium
+
+1\t1216\t3500\t6\tBacteria; Firmicutes; Alicyclobacillaceae; Bacilli; La\
+ctobacillales; Lactobacillales; Streptococcaceae; Streptococcus
+7\t1803\t1184\t2\tBacteria; Actinobacteria; Actinobacteridae; Gordoniace\
+ae; Corynebacteriaceae
+3\t1722\t4903\t17\tBacteria; Firmicutes; Alicyclobacillaceae; Bacilli; St\
+aphylococcaceae
+4\t589\t2074\t34\tBacteria; Cyanobacteria; Chloroplasts; vectors
+"""
+otu_table1 = u"""# Some comment
+#OTU ID\tFing\tKey\tNA\tConsensus Lineage
+0\t19111\t44536\t42\tBacteria; Actinobacteria; Actinobacteridae; \
+Propionibacterineae; Propionibacterium
+# some other comment
+1\t1216\t3500\t6\tBacteria; Firmicutes; Alicyclobacillaceae; Bacilli; \
+Lactobacillales; Lactobacillales; Streptococcaceae; Streptococcus
+7\t1803\t1184\t2\tBacteria; Actinobacteria; Actinobacteridae; Gordoniaceae; \
+Corynebacteriaceae
+# comments
+# everywhere!
+3\t1722\t4903\t17\tBacteria; Firmicutes; Alicyclobacillaceae; \
+Bacilli; Staphylococcaceae
+4\t589\t2074\t34\tBacteria; Cyanobacteria; Chloroplasts; vectors
+"""
+
+OBS_META_TYPES = {'sc_separated': lambda x: [e.strip() for e in x.split(';')],
+ 'naive': lambda x: x
+ }
+OBS_META_TYPES['taxonomy'] = OBS_META_TYPES['sc_separated']
+
+if __name__ == '__main__':
+ main()
diff --git a/tests/test_util.py b/tests/test_util.py
new file mode 100644
index 0000000..782544d
--- /dev/null
+++ b/tests/test_util.py
@@ -0,0 +1,384 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# -----------------------------------------------------------------------------
+# Copyright (c) 2011-2013, The BIOM Format Development Team.
+#
+# Distributed under the terms of the Modified BSD License.
+#
+# The full license is in the file COPYING.txt, distributed with this software.
+# -----------------------------------------------------------------------------
+
+import gzip
+from os import remove
+from os.path import abspath, dirname, exists
+from tempfile import NamedTemporaryFile
+from unittest import TestCase, main
+
+import numpy as np
+import numpy.testing as npt
+
+from biom.table import Table
+from biom.parse import parse_biom_table
+from biom.util import (natsort, flatten, unzip, HAVE_H5PY,
+ get_biom_project_dir, parse_biom_config_files,
+ compute_counts_per_sample_stats, safe_md5, biom_open,
+ get_data_path, generate_subsamples, is_hdf5_file)
+
+
+__author__ = "Daniel McDonald"
+__copyright__ = "Copyright 2011-2013, The BIOM Format Development Team"
+__credits__ = ["Rob Knight", "Peter Maxwell", "Sandra Smit",
+ "Zongzhi Liu", "Micah Hamady", "Daniel McDonald",
+ "Jai Ram Rideout", "Jorge Cañardo Alastuey"]
+__license__ = "BSD"
+__url__ = "http://biom-format.org"
+__maintainer__ = "Daniel McDonald"
+__email__ = "daniel.mcdonald at colorado.edu"
+
+
+if HAVE_H5PY:
+ import h5py
+
+
+class UtilTests(TestCase):
+
+ def setUp(self):
+ self.biom_otu_table1_w_tax = parse_biom_table(biom_otu_table1_w_tax)
+
+ def test_generate_subsamples(self):
+ table = Table(np.array([[3, 1, 1], [0, 3, 3]]), ['O1', 'O2'],
+ ['S1', 'S2', 'S3'])
+ actual_o1 = set()
+ actual_o2 = set()
+ for i, obs in zip(range(100), generate_subsamples(table, 3)):
+ actual_o1.add(tuple(obs.data('O1', 'observation')))
+ actual_o2.add(tuple(obs.data('O2', 'observation')))
+
+ self.assertEqual(actual_o1, {(3, 0, 0), (3, 1, 0), (3, 0, 1),
+ (3, 1, 1)})
+ self.assertEqual(actual_o2, {(0, 3, 3), (0, 2, 3), (0, 3, 2),
+ (0, 2, 2)})
+
+ def test_natsort(self):
+ """natsort should perform numeric comparisons on strings
+
+ This method is ported from QIIME (http://www.qiime.org). QIIME is a GPL
+ project, but we obtained permission from the authors of this method to
+ port it to the BIOM Format project (and keep it under BIOM's BSD
+ license).
+ """
+ # string with alpha and numerics sort correctly
+ s = 'sample1 sample2 sample11 sample12'.split()
+ self.assertEqual(natsort(s),
+ 'sample1 sample2 sample11 sample12'.split())
+ s.reverse()
+ self.assertEqual(natsort(s),
+ 'sample1 sample2 sample11 sample12'.split())
+ self.assertEqual(natsort(list('cba321')), list('123abc'))
+
+ # strings with alpha only sort correctly
+ self.assertEqual(natsort(list('cdba')), list('abcd'))
+
+ # string of ints sort correctly
+ self.assertEqual(natsort(['11', '2', '1', '0']),
+ ['0', '1', '2', '11'])
+
+ # strings of floats sort correctly
+ self.assertEqual(natsort(['1.11', '1.12', '1.00', '0.009']),
+ ['0.009', '1.00', '1.11', '1.12'])
+
+ # string of ints sort correctly
+ self.assertEqual(
+ natsort([('11', 'A'), ('2', 'B'), ('1', 'C'), ('0', 'D')]),
+ [('0', 'D'), ('1', 'C'), ('2', 'B'), ('11', 'A')])
+
+ def test_unzip(self):
+ """unzip(items) should be the inverse of zip(*items)
+
+ This method is ported from PyCogent (http://www.pycogent.org). PyCogent
+ is a GPL project, but we obtained permission from the authors of this
+ method to port it to the BIOM Format project (and keep it under BIOM's
+ BSD license).
+ """
+ chars = [list('abcde'), list('ghijk')]
+ numbers = [[1, 2, 3, 4, 5], [0, 0, 0, 0, 0]]
+ strings = [["abcde", "fghij", "klmno"], ['xxxxx'] * 3]
+
+ lists = [chars, numbers, strings]
+ zipped = [zip(*i) for i in lists]
+ unzipped = [unzip(i) for i in zipped]
+
+ for u, l in zip(unzipped, lists):
+ self.assertEqual(u, l)
+
+ def test_flatten_no_change(self):
+ """flatten should not change non-nested sequences (except to list)
+
+ This method is ported from PyCogent (http://www.pycogent.org). PyCogent
+ is a GPL project, but we obtained permission from the authors of this
+ method to port it to the BIOM Format project (and keep it under BIOM's
+ BSD license).
+ """
+ self.assertEqual(flatten('abcdef'), list('abcdef')) # test identities
+ self.assertEqual(flatten([]), []) # test empty sequence
+ self.assertEqual(flatten(''), []) # test empty string
+
+ def test_flatten(self):
+ """flatten should remove one level of nesting from nested sequences
+
+ This method is ported from PyCogent (http://www.pycogent.org). PyCogent
+ is a GPL project, but we obtained permission from the authors of this
+ method to port it to the BIOM Format project (and keep it under BIOM's
+ BSD license).
+ """
+ self.assertEqual(flatten(['aa', 'bb', 'cc']), list('aabbcc'))
+ self.assertEqual(flatten([1, [2, 3], [[4, [5]]]]), [1, 2, 3, [4, [5]]])
+
+ def test_get_biom_project_dir(self):
+ """Getting the biom project directory functions as expected.
+
+ This method is ported from QIIME (http://www.qiime.org). QIIME is a GPL
+ project, but we obtained permission from the authors of this method to
+ port it to the BIOM Format project (and keep it under BIOM's BSD
+ license).
+ """
+ # Do an explicit check on whether the file system containing
+ # the current file is case insensitive. This is in response
+ # to SF bug #2945548, where this test would fail on certain
+ # unusual circumstances on case-insensitive file systems
+ # because the case of abspath(__file__) was inconsistent.
+ # (If you don't believe this, set case_insensitive_filesystem
+ # to False, and rename your top-level biom-format directory as
+ # Biom-format on OS X. That should cause this test to fail as
+ # actual will be path/to/Biom-format and expected will be
+ # path/to/biom-format.) Note that we don't need to change anything
+ # in the get_biom_project_dir() function as if the
+ # file system is case insenstive, the case of the returned
+ # string is irrelevant.
+ case_insensitive_filesystem = \
+ exists(__file__.upper()) and exists(__file__.lower())
+
+ actual = get_biom_project_dir()
+
+ # I base the expected here off the imported location of
+ # biom/util.py here, to handle cases where either the user
+ # has biom-format in their PYTHONPATH, or when they've installed it
+ # with setup.py.
+ # If util.py moves this test will fail -- that
+ # is what we want in this case, as the get_biom_project_dir()
+ # function would need to be modified.
+ import biom.util
+ util_py_filepath = abspath(abspath(biom.util.__file__))
+ expected = dirname(dirname(dirname(util_py_filepath)))
+
+ if case_insensitive_filesystem:
+ # Make both lowercase if the file system is case insensitive.
+ actual = actual.lower()
+ expected = expected.lower()
+ self.assertEqual(actual, expected)
+
+ def test_parse_biom_config_files(self):
+ """parse_biom_config_files functions as expected.
+
+ This method is ported from QIIME (http://www.qiime.org). QIIME is a GPL
+ project, but we obtained permission from the authors of this method to
+ port it to the BIOM Format project (and keep it under BIOM's BSD
+ license).
+ """
+ fake_file1 = ['key1\tval1', 'key2 val2']
+ fake_file2 = ['key2\tval3']
+ actual = parse_biom_config_files([fake_file1, fake_file2])
+ expected = {'key1': 'val1', 'key2': 'val3'}
+ self.assertEqual(actual, expected)
+
+ # Looking up a nonexistent value returns None.
+ self.assertEqual(actual['fake_key'], None)
+
+ # Empty dict on empty input.
+ self.assertEqual(parse_biom_config_files([]), {})
+
+ def test_compute_counts_per_sample_stats_empty(self):
+ t = Table({}, [] ,[])
+ res = compute_counts_per_sample_stats(t)
+ self.assertEqual(res, (0, 0, 0, 0, {}))
+
+ def test_compute_counts_per_sample_stats(self):
+ """compute_counts_per_sample_stats functions as expected
+
+ This method is ported from QIIME (http://www.qiime.org). QIIME is a GPL
+ project, but we obtained permission from the authors of this method to
+ port it to the BIOM Format project (and keep it under BIOM's BSD
+ license).
+ """
+ actual = compute_counts_per_sample_stats(self.biom_otu_table1_w_tax)
+ self.assertEqual(actual[0], 3)
+ self.assertEqual(actual[1], 7)
+ self.assertEqual(actual[2], 4)
+ self.assertEqual(actual[3], 4.5)
+ self.assertEqual(actual[4], {'Sample1': 7, 'Sample2': 3, 'Sample3': 4,
+ 'Sample4': 6, 'Sample5': 3, 'Sample6': 4})
+
+ def test_compute_counts_per_sample_stats_obs_counts(self):
+ """compute_counts_per_sample_stats functions as expected
+
+ This method is ported from QIIME (http://www.qiime.org). QIIME is a GPL
+ project, but we obtained permission from the authors of this method to
+ port it to the BIOM Format project (and keep it under BIOM's BSD
+ license).
+ """
+ actual = compute_counts_per_sample_stats(self.biom_otu_table1_w_tax,
+ binary_counts=True)
+ self.assertEqual(actual[0], 1)
+ self.assertEqual(actual[1], 4)
+ self.assertEqual(actual[2], 2.5)
+ self.assertEqual(actual[3], 2.5)
+ self.assertEqual(actual[4], {'Sample1': 2, 'Sample2': 3, 'Sample3': 4,
+ 'Sample4': 2, 'Sample5': 1, 'Sample6': 3})
+
+ def test_safe_md5(self):
+ """Make sure we have the expected md5 with varied input types
+
+ This method is ported from PyCogent (http://www.pycogent.org). PyCogent
+ is a GPL project, but we obtained permission from the authors of this
+ method to port it to the BIOM Format project (and keep it under BIOM's
+ BSD license).
+ """
+ exp = 'd3b07384d113edec49eaa6238ad5ff00'
+
+ tmp_f = NamedTemporaryFile(
+ mode='w',
+ prefix='test_safe_md5',
+ suffix='txt')
+ tmp_f.write('foo\n')
+ tmp_f.flush()
+
+ obs = safe_md5(open(tmp_f.name, 'U'))
+ self.assertEqual(obs, exp)
+
+ obs = safe_md5(['foo\n'])
+ self.assertEqual(obs, exp)
+
+ # unsupported type raises TypeError
+ self.assertRaises(TypeError, safe_md5, 42)
+
+ @npt.dec.skipif(HAVE_H5PY is False, msg='H5PY is not installed')
+ def test_biom_open_hdf5(self):
+ with biom_open(get_data_path('test.biom')) as f:
+ self.assertTrue(isinstance(f, h5py.File))
+
+ with biom_open(get_data_path('test_writing.biom'), 'w') as f:
+ self.assertTrue(isinstance(f, h5py.File))
+
+ remove(get_data_path('test_writing.biom'))
+
+ def test_biom_open_empty(self):
+ with self.assertRaises(ValueError) as e:
+ with biom_open(get_data_path('no-contents.biom'), 'r') as f:
+ pass
+ self.assertTrue("is empty and can't be parsed" in str(e.exception))
+
+ @npt.dec.skipif(HAVE_H5PY, msg='Can only be tested without H5PY')
+ def test_biom_open_hdf5_no_h5py(self):
+ with self.assertRaises(RuntimeError):
+ with biom_open(get_data_path('test.biom')) as f:
+ pass
+
+ def test_biom_open_json(self):
+ with biom_open(get_data_path('test.json')) as f:
+ self.assertTrue(hasattr(f, 'read'))
+
+ def test_biom_open_gz(self):
+ with biom_open(get_data_path('test.json.gz')) as f:
+ self.assertTrue(isinstance(f, gzip.GzipFile))
+
+ with biom_open(get_data_path('test_writing.json.gz'), 'w') as f:
+ self.assertTrue(isinstance(f, gzip.GzipFile))
+
+ remove(get_data_path('test_writing.json.gz'))
+
+ def test_is_hdf5_file(self):
+ self.assertTrue(is_hdf5_file(get_data_path('test.biom')))
+ self.assertFalse(is_hdf5_file(get_data_path('test.json')))
+
+
+biom_otu_table1_w_tax = """{
+ "id":null,
+ "format": "Biological Observation Matrix 1.0.0-dev",
+ "format_url": "http://biom-format.org",
+ "type": "OTU table",
+ "generated_by": "QIIME revision XYZ",
+ "date": "2011-12-19T19:00:00",
+ "rows":[
+ {"id":"GG_OTU_1", "metadata":{"taxonomy":["k__Bacteria", "p__Proteoba\
+cteria", "c__Gammaproteobacteria", "o__Enterobacteriales", "f__Enterobacteriac\
+eae", "g__Escherichia", "s__"]}},
+ {"id":"GG_OTU_2", "metadata":{"taxonomy":["k__Bacteria", "p__Cyanobact\
+eria", "c__Nostocophycideae", "o__Nostocales", "f__Nostocaceae", "g__Dolichosp\
+ermum", "s__"]}},
+ {"id":"GG_OTU_3", "metadata":{"taxonomy":["k__Archaea", "p__Euryarchae\
+ota", "c__Methanomicrobia", "o__Methanosarcinales", "f__Methanosarcinaceae", "\
+g__Methanosarcina", "s__"]}},
+ {"id":"GG_OTU_4", "metadata":{"taxonomy":["k__Bacteria", "p__Firmicute\
+s", "c__Clostridia", "o__Halanaerobiales", "f__Halanaerobiaceae", "g__Halanaer\
+obium", "s__Halanaerobiumsaccharolyticum"]}},
+ {"id":"GG_OTU_5", "metadata":{"taxonomy":["k__Bacteria", "p__Proteobac\
+teria", "c__Gammaproteobacteria", "o__Enterobacteriales", "f__Enterobacteriace\
+ae", "g__Escherichia", "s__"]}}
+ ],
+ "columns":[
+ {"id":"Sample1", "metadata":{
+ "BarcodeSequence":"CGCTTATCGAGA",
+ "LinkerPrimerSequence":"CATGCTGCCTCCCGTAGGAGT",
+ "BODY_SITE":"gut",
+ "Description":"human gut"}},
+ {"id":"Sample2", "metadata":{
+ "BarcodeSequence":"CATACCAGTAGC",
+ "LinkerPrimerSequence":"CATGCTGCCTCCCGTAGGAGT",
+ "BODY_SITE":"gut",
+ "Description":"human gut"}},
+ {"id":"Sample3", "metadata":{
+ "BarcodeSequence":"CTCTCTACCTGT",
+ "LinkerPrimerSequence":"CATGCTGCCTCCCGTAGGAGT",
+ "BODY_SITE":"gut",
+ "Description":"human gut"}},
+ {"id":"Sample4", "metadata":{
+ "BarcodeSequence":"CTCTCGGCCTGT",
+ "LinkerPrimerSequence":"CATGCTGCCTCCCGTAGGAGT",
+ "BODY_SITE":"skin",
+ "Description":"human skin"}},
+ {"id":"Sample5", "metadata":{
+ "BarcodeSequence":"CTCTCTACCAAT",
+ "LinkerPrimerSequence":"CATGCTGCCTCCCGTAGGAGT",
+ "BODY_SITE":"skin",
+ "Description":"human skin"}},
+ {"id":"Sample6", "metadata":{
+ "BarcodeSequence":"CTAACTACCAAT",
+ "LinkerPrimerSequence":"CATGCTGCCTCCCGTAGGAGT",
+ "BODY_SITE":"skin",
+ "Description":"human skin"}}
+ ],
+ "matrix_type": "sparse",
+ "matrix_element_type": "int",
+ "shape": [5, 6],
+ "data":[[0,2,1],
+ [1,0,5],
+ [1,1,1],
+ [1,3,2],
+ [1,4,3],
+ [1,5,1],
+ [2,2,1],
+ [2,3,4],
+ [2,5,2],
+ [3,0,2],
+ [3,1,1],
+ [3,2,1],
+ [3,5,1],
+ [4,1,1],
+ [4,2,1]
+ ]
+ }
+"""
+
+
+if __name__ == '__main__':
+ main()
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/python-biom-format.git
More information about the debian-med-commit
mailing list