[med-svn] [Git][med-team/python-skbio][master] 5 commits: New upstream version 0.5.3
Andreas Tille
gitlab at salsa.debian.org
Wed Aug 15 14:55:30 BST 2018
Andreas Tille pushed to branch master at Debian Med / python-skbio
Commits:
6885c5b3 by Andreas Tille at 2018-08-15T13:20:38Z
New upstream version 0.5.3
- - - - -
0962c8f3 by Andreas Tille at 2018-08-15T13:21:02Z
Update upstream source from tag 'upstream/0.5.3'
Update to upstream version '0.5.3'
with Debian dir 85b957ae7db66009d6c6da7eb6622963e0f4a136
- - - - -
a2c9323d by Andreas Tille at 2018-08-15T13:21:02Z
New upstream version
- - - - -
c38b8710 by Andreas Tille at 2018-08-15T13:23:05Z
Standards-Version: 4.2.0
- - - - -
f2418d0e by Andreas Tille at 2018-08-15T13:54:34Z
TODO: Needs python-hdmedians (to be packaged)
- - - - -
28 changed files:
- .travis.yml
- CHANGELOG.md
- README.rst
- ci/conda_requirements.txt
- ci/pip_requirements.txt
- debian/changelog
- debian/control
- setup.py
- skbio/__init__.py
- skbio/alignment/_pairwise.py
- skbio/diversity/_driver.py
- skbio/diversity/_util.py
- skbio/diversity/tests/test_util.py
- skbio/sequence/_grammared_sequence.py
- skbio/stats/distance/__init__.py
- + skbio/stats/distance/_permdisp.py
- + skbio/stats/distance/tests/data/moving_pictures_dm.tsv
- + skbio/stats/distance/tests/data/moving_pictures_mf.tsv
- + skbio/stats/distance/tests/test_permdisp.py
- skbio/stats/ordination/__init__.py
- skbio/stats/ordination/_canonical_correspondence_analysis.py
- skbio/stats/ordination/_principal_coordinate_analysis.py
- + skbio/stats/ordination/tests/data/PCoA_biplot_descriptors
- + skbio/stats/ordination/tests/data/PCoA_biplot_projected_descriptors
- + skbio/stats/ordination/tests/data/PCoA_skbio
- skbio/stats/ordination/tests/test_principal_coordinate_analysis.py
- skbio/tree/_tree.py
- skbio/tree/tests/test_tree.py
Changes:
=====================================
.travis.yml
=====================================
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,22 +1,25 @@
# Check on http://lint.travis-ci.org/ after modifying it! Originally
# modified from https://gist.github.com/dan-blanchard/7045057
sudo: false
-language: python
+language: c
+os:
+ - linux
+ - osx
env:
- - PYTHON_VERSION=3.6 MAKE_DOC=TRUE
- - PYTHON_VERSION=3.5 MAKE_DOC=TRUE
- - PYTHON_VERSION=3.4 USE_CYTHON=TRUE
+ - CONDA_PY=3.6 MAKE_DOC=TRUE
+ - CONDA_PY=3.5 MAKE_DOC=TRUE USE_CYTHON=TRUE
+
before_install:
- - "export DISPLAY=:99.0"
- - "sh -e /etc/init.d/xvfb start"
- - wget http://repo.continuum.io/miniconda/Miniconda3-3.7.3-Linux-x86_64.sh -O miniconda.sh
- - chmod +x miniconda.sh
- - ./miniconda.sh -b
- - export PATH=/home/travis/miniconda3/bin:$PATH
+ - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh; fi
+ - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then wget https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -O miniconda.sh; fi
+ - bash miniconda.sh -b -p $HOME/miniconda
+ - export PATH="$HOME/miniconda/bin:$PATH"
+ - hash -r
# Update conda itself
- conda update --yes conda
+
install:
- - conda create --yes -n env_name python=$PYTHON_VERSION
+ - conda create --yes -n env_name python=$CONDA_PY
- conda install --yes -n env_name --file ci/conda_requirements.txt
- if [ ${USE_CYTHON} ]; then conda install --yes -n env_name cython; fi
- source activate env_name
@@ -24,11 +27,14 @@ install:
- python --version
- pip install -r ci/pip_requirements.txt
- pip install . --no-deps
+
script:
- WITH_COVERAGE=TRUE make test
- if [ ${MAKE_DOC} ]; then make -C doc clean html; fi
+
after_success:
- coveralls
+
notifications:
webhooks:
urls:
=====================================
CHANGELOG.md
=====================================
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,36 @@
# scikit-bio changelog
+## Version 0.5.3 (2018-08-07)
+
+### Features
+
+* Added `unpack` and `unpack_by_func` to `skbio.tree.TreeNode` to unpack one or multiple internal nodes. The "unpack" operation removes an internal node and regrafts its children to its parent while retaining the overall length.
+
+* Added `support` to `skbio.tree.TreeNode` to return the support value of a node.
+
+* Added `permdisp` to `skbio.stats.distance` to test for the homogeniety of groups. ([#1228](https://github.com/biocore/scikit-bio/issues/1228)).
+
+* Added `pcoa_biplot` to `skbio.stats.ordination` to project descriptors into a PCoA plot.
+
+* Fixed pandas to 0.22.0 due to this: https://github.com/pandas-dev/pandas/issues/20527
+
+### Backward-incompatible changes [stable]
+
+### Backward-incompatible changes [experimental]
+
+### Performance enhancements
+
+### Bug fixes
+
+* Relaxing type checking in diversity calculations. ([#1583](https://github.com/biocore/scikit-bio/issues/1583)).
+
+### Deprecated functionality [stable]
+
+### Deprecated functionality [experimental]
+
+### Miscellaneous
+
+
## Version 0.5.2 (2018-04-18)
### Features
=====================================
README.rst
=====================================
--- a/README.rst
+++ b/README.rst
@@ -3,7 +3,7 @@
:target: http://scikit-bio.org
:alt: scikit-bio logo
-|Build Status| |Coverage Status| |ASV Benchmarks| |Gitter Badge| |Depsy Badge| |Anaconda Cloud Build| |Anaconda Cloud| |License| |Downloads| |Install|
+|Build Status| |Coverage Status| |ASV Benchmarks| |Gitter Badge| |Depsy Badge| |Anaconda Build Platforms| |Anaconda Build Version| |License| |Downloads| |Install|
scikit-bio is an open-source, BSD-licensed Python 3 package providing data structures, algorithms and educational resources for bioinformatics.
@@ -118,9 +118,9 @@ scikit-bio's logo was created by `Alina Prassas <http://cargocollective.com/alin
:target: https://gitter.im/biocore/scikit-bio?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge
.. |Depsy Badge| image:: http://depsy.org/api/package/pypi/scikit-bio/badge.svg
:target: http://depsy.org/package/python/scikit-bio
-.. |Anaconda Cloud Build| image:: https://anaconda.org/conda-forge/scikit-bio/badges/build.svg
- :target: https://anaconda.org/biocore/scikit-bio/builds
-.. |Anaconda Cloud| image:: https://anaconda.org/conda-forge/scikit-bio/badges/version.svg
+.. |Anaconda Build Platforms| image:: https://anaconda.org/conda-forge/scikit-bio/badges/platforms.svg
+ :target: https://anaconda.org/conda-forge/scikit-bio
+.. |Anaconda Build Version| image:: https://anaconda.org/conda-forge/scikit-bio/badges/version.svg
:target: https://anaconda.org/conda-forge/scikit-bio
.. |License| image:: https://anaconda.org/conda-forge/scikit-bio/badges/license.svg
:target: https://anaconda.org/conda-forge/scikit-bio
=====================================
ci/conda_requirements.txt
=====================================
--- a/ci/conda_requirements.txt
+++ b/ci/conda_requirements.txt
@@ -3,7 +3,7 @@ pip
numpy<1.14.0
scipy
matplotlib
-pandas
+pandas<0.23.0
nose
pep8
ipython
=====================================
ci/pip_requirements.txt
=====================================
--- a/ci/pip_requirements.txt
+++ b/ci/pip_requirements.txt
@@ -7,3 +7,4 @@ Sphinx
sphinx-bootstrap-theme
numpydoc < 0.8.0
check-manifest
+hdmedians
=====================================
debian/changelog
=====================================
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,11 @@
+python-skbio (0.5.3-1) UNRELEASED; urgency=medium
+
+ * New upstream version
+ * Standards-Version: 4.2.0
+ TODO: Needs python-hdmedians (to be packaged)
+
+ -- Andreas Tille <tille at debian.org> Wed, 15 Aug 2018 15:21:02 +0200
+
python-skbio (0.5.2-1) unstable; urgency=medium
* New upstream version
=====================================
debian/control
=====================================
--- a/debian/control
+++ b/debian/control
@@ -27,7 +27,7 @@ Build-Depends: debhelper (>= 11~),
python3-sphinx,
python3-sphinx-bootstrap-theme,
libssw-dev
-Standards-Version: 4.1.4
+Standards-Version: 4.2.0
Vcs-Browser: https://salsa.debian.org/med-team/python-skbio
Vcs-Git: https://salsa.debian.org/med-team/python-skbio.git
Homepage: https://github.com/biocore/scikit-bio
=====================================
setup.py
=====================================
--- a/setup.py
+++ b/setup.py
@@ -120,9 +120,10 @@ setup(name='scikit-bio',
# a numpy 1.14.0 conda package on `defaults` or `conda-forge`
# channels.
'numpy >= 1.9.2, < 1.14.0',
- 'pandas >= 0.19.2',
+ 'pandas >= 0.19.2, < 0.23.0',
'scipy >= 0.15.1',
'nose >= 1.3.7',
+ 'hdmedians >= 0.13',
'scikit-learn >= 0.19.1'
],
test_suite='nose.collector',
=====================================
skbio/__init__.py
=====================================
--- a/skbio/__init__.py
+++ b/skbio/__init__.py
@@ -26,7 +26,7 @@ __all__ = ['Sequence', 'DNA', 'RNA', 'Protein', 'GeneticCode',
'TreeNode', 'nj', 'read', 'write', 'OrdinationResults']
__credits__ = "https://github.com/biocore/scikit-bio/graphs/contributors"
-__version__ = "0.5.2"
+__version__ = "0.5.3"
mottos = [
# 03/15/2014
=====================================
skbio/alignment/_pairwise.py
=====================================
--- a/skbio/alignment/_pairwise.py
+++ b/skbio/alignment/_pairwise.py
@@ -735,7 +735,7 @@ def local_pairwise_align_ssw(sequence1, sequence2, **kwargs):
return msa, alignment.optimal_alignment_score, start_end
- at deprecated(as_of="0.4.0", until="0.5.3",
+ at deprecated(as_of="0.4.0", until="0.5.4",
reason="Will be replaced by a SubstitutionMatrix class. To track "
"progress, see [#161]"
"(https://github.com/biocore/scikit-bio/issues/161).")
=====================================
skbio/diversity/_driver.py
=====================================
--- a/skbio/diversity/_driver.py
+++ b/skbio/diversity/_driver.py
@@ -182,7 +182,7 @@ def alpha_diversity(metric, counts, ids=None, validate=True, **kwargs):
return pd.Series(results, index=ids)
- at deprecated(as_of='0.5.0', until='0.5.3',
+ at deprecated(as_of='0.5.0', until='0.6.0',
reason=('The return type is unstable. Developer caution is '
'advised. The resulting DistanceMatrix object will '
'include zeros when distance has not been calculated, and '
=====================================
skbio/diversity/_util.py
=====================================
--- a/skbio/diversity/_util.py
+++ b/skbio/diversity/_util.py
@@ -21,10 +21,8 @@ def _validate_counts_vector(counts, suppress_cast=False):
"""
counts = np.asarray(counts)
-
- if not suppress_cast:
- counts = counts.astype(int, casting='safe', copy=False)
-
+ if not np.all(np.isreal(counts)):
+ raise ValueError("Counts vector must contain real-valued entries.")
if counts.ndim != 1:
raise ValueError("Only 1-D vectors are supported.")
elif (counts < 0).any():
=====================================
skbio/diversity/tests/test_util.py
=====================================
--- a/skbio/diversity/tests/test_util.py
+++ b/skbio/diversity/tests/test_util.py
@@ -57,9 +57,6 @@ class ValidationTests(TestCase):
self.assertEqual(obs.dtype, int)
def test_validate_counts_vector_invalid_input(self):
- # wrong dtype
- with self.assertRaises(TypeError):
- _validate_counts_vector([0, 2, 1.2, 3])
# wrong number of dimensions (2-D)
with self.assertRaises(ValueError):
@@ -73,6 +70,10 @@ class ValidationTests(TestCase):
with self.assertRaises(ValueError):
_validate_counts_vector([0, 0, 2, -1, 3])
+ # strings
+ with self.assertRaises(ValueError):
+ _validate_counts_vector([0, 0, 'a', -1, 3])
+
def test_validate_counts_matrix(self):
# basic valid input (n=2)
obs = _validate_counts_matrix([[0, 1, 1, 0, 2],
@@ -101,8 +102,6 @@ class ValidationTests(TestCase):
npt.assert_array_equal(obs[1], np.array([42.2, 42.1, 1.0]))
self.assertEqual(obs[0].dtype, float)
self.assertEqual(obs[1].dtype, float)
- with self.assertRaises(TypeError):
- _validate_counts_matrix([[0.0], [1]], suppress_cast=False)
def test_validate_counts_matrix_negative_counts(self):
with self.assertRaises(ValueError):
=====================================
skbio/sequence/_grammared_sequence.py
=====================================
--- a/skbio/sequence/_grammared_sequence.py
+++ b/skbio/sequence/_grammared_sequence.py
@@ -281,7 +281,7 @@ class GrammaredSequence(Sequence, metaclass=GrammaredSequenceMeta):
return set(cls.degenerate_map)
@classproperty
- @deprecated(as_of='0.5.0', until='0.5.3',
+ @deprecated(as_of='0.5.0', until='0.6.0',
reason='Renamed to definite_chars')
def nondegenerate_chars(cls):
"""Return non-degenerate characters.
@@ -492,7 +492,7 @@ class GrammaredSequence(Sequence, metaclass=GrammaredSequenceMeta):
"""
return np.in1d(self._bytes, self._definite_char_codes)
- @deprecated(as_of='0.5.0', until='0.5.3',
+ @deprecated(as_of='0.5.0', until='0.6.0',
reason='Renamed to definites')
def nondegenerates(self):
"""Find positions containing non-degenerate characters in the sequence.
@@ -548,7 +548,7 @@ class GrammaredSequence(Sequence, metaclass=GrammaredSequenceMeta):
# TODO: cache results
return bool(self.definites().any())
- @deprecated(as_of='0.5.0', until='0.5.3',
+ @deprecated(as_of='0.5.0', until='0.6.0',
reason='Renamed to has_definites')
def has_nondegenerates(self):
"""Determine if sequence contains one or more non-degenerate characters
=====================================
skbio/stats/distance/__init__.py
=====================================
--- a/skbio/stats/distance/__init__.py
+++ b/skbio/stats/distance/__init__.py
@@ -154,6 +154,7 @@ Categorical Variable Stats
anosim
permanova
+ permdisp
Continuous Variable Stats
^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -196,9 +197,10 @@ from ._bioenv import bioenv
from ._anosim import anosim
from ._permanova import permanova
from ._mantel import mantel, pwmantel
+from ._permdisp import permdisp
__all__ = ['DissimilarityMatrixError', 'DistanceMatrixError', 'MissingIDError',
'DissimilarityMatrix', 'DistanceMatrix', 'randdm', 'anosim',
- 'permanova', 'bioenv', 'mantel', 'pwmantel']
+ 'permanova', 'bioenv', 'mantel', 'pwmantel', 'permdisp']
test = TestRunner(__file__).test
=====================================
skbio/stats/distance/_permdisp.py
=====================================
--- /dev/null
+++ b/skbio/stats/distance/_permdisp.py
@@ -0,0 +1,266 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2013--, scikit-bio development team.
+#
+# Distributed under the terms of the Modified BSD License.
+#
+# The full license is in the file COPYING.txt, distributed with this software.
+# ----------------------------------------------------------------------------
+
+from functools import partial
+
+import numpy as np
+from scipy.stats import f_oneway
+from scipy.spatial.distance import cdist
+
+import hdmedians as hd
+
+from ._base import (_preprocess_input, _run_monte_carlo_stats, _build_results)
+
+from skbio.stats.ordination import pcoa
+from skbio.util._decorator import experimental
+
+
+ at experimental(as_of="0.5.2")
+def permdisp(distance_matrix, grouping, column=None, test='median',
+ permutations=999):
+ """Test for Homogeneity of Multivariate Groups Disperisons using Marti
+ Anderson's PERMDISP2 procedure.
+
+ PERMDISP is a multivariate analogue of Levene's test for homogeneity of
+ multivariate variances. Distances are handled by reducing the
+ original distances to principal coordinates. PERMDISP calculates an
+ F-statistic to assess whether the dispersions between groups is significant
+
+
+ Parameters
+ ----------
+ distance_matrix : DistanceMatrix
+ Distance matrix containing distances between objects (e.g., distances
+ between samples of microbial communities).
+ grouping : 1-D array_like or pandas.DataFrame
+ Vector indicating the assignment of objects to groups. For example,
+ these could be strings or integers denoting which group an object
+ belongs to. If `grouping` is 1-D ``array_like``, it must be the same
+ length and in the same order as the objects in `distance_matrix`. If
+ `grouping` is a ``DataFrame``, the column specified by `column` will be
+ used as the grouping vector. The ``DataFrame`` must be indexed by the
+ IDs in `distance_matrix` (i.e., the row labels must be distance matrix
+ IDs), but the order of IDs between `distance_matrix` and the
+ ``DataFrame`` need not be the same. All IDs in the distance matrix must
+ be present in the ``DataFrame``. Extra IDs in the ``DataFrame`` are
+ allowed (they are ignored in the calculations).
+ column : str, optional
+ Column name to use as the grouping vector if `grouping` is a
+ ``DataFrame``. Must be provided if `grouping` is a ``DataFrame``.
+ Cannot be provided if `grouping` is 1-D ``array_like``.
+ test : {'centroid', 'median'}
+ determines whether the analysis is done using centroid or spaitial
+ median.
+ permutations : int, optional
+ Number of permutations to use when assessing statistical
+ significance. Must be greater than or equal to zero. If zero,
+ statistical significance calculations will be skipped and the p-value
+ will be ``np.nan``.
+
+ Returns
+ -------
+ pandas.Series
+ Results of the statistical test, including ``test statistic`` and
+ ``p-value``.
+
+ Raises
+ ------
+ TypeError
+ If, when using the spatial median test, the pcoa ordination is not of
+ type np.float32 or np.float64, the spatial median function will fail
+ and the centroid test should be used instead
+ ValueError
+ If the test is not centroid or median.
+ TypeError
+ If the distance matrix is not an instance of a
+ ``skbio.DistanceMatrix``.
+ ValueError
+ If there is only one group
+ ValueError
+ If a list and a column name are both provided
+ ValueError
+ If a list is provided for `grouping` and it's length does not match
+ the number of ids in distance_matrix
+ ValueError
+ If all of the values in the grouping vector are unique
+ KeyError
+ If there are ids in grouping that are not in distance_matrix
+
+ See Also
+ --------
+ permanova
+ anosim
+
+ Notes
+ -----
+ The significance of the results from this function will be the same as the
+ results found in vegan's betadisper, however due to floating point
+ variability the F-statistic results may vary slightly.
+
+ See [1]_ for the original method reference, as well as
+ ``vegan::betadisper``, available in R's vegan package [2]_.
+
+ References
+ ----------
+ .. [1] Anderson, Marti J. "Distance-Based Tests for Homogeneity of
+ Multivariate Dispersions." Biometrics 62 (2006):245-253
+
+ .. [2] http://cran.r-project.org/web/packages/vegan/index.html
+
+ Examples
+ --------
+ Load a 6x6 distance matrix and grouping vector denoting 2 groups of
+ objects:
+
+ >>> from skbio import DistanceMatrix
+ >>> dm = DistanceMatrix([[0, 0.5, 0.75, 1, 0.66, 0.33],
+ ... [0.5, 0, 0.25, 0.33, 0.77, 0.61],
+ ... [0.75, 0.25, 0, 0.1, 0.44, 0.55],
+ ... [1, 0.33, 0.1, 0, 0.75, 0.88],
+ ... [0.66, 0.77, 0.44, 0.75, 0, 0.77],
+ ... [0.33, 0.61, 0.55, 0.88, 0.77, 0]],
+ ... ['s1', 's2', 's3', 's4', 's5', 's6'])
+ >>> grouping = ['G1', 'G1', 'G1', 'G2', 'G2', 'G2']
+
+ Run PERMDISP using 99 permutations to caluculate the p-value:
+
+ >>> from skbio.stats.distance import permdisp
+ >>> import numpy as np
+ >>> #make output deterministic, should not be included during normal use
+ >>> np.random.seed(0)
+ >>> permdisp(dm, grouping, permutations=99)
+ method name PERMDISP
+ test statistic name F-value
+ sample size 6
+ number of groups 2
+ test statistic 1.03296
+ p-value 0.35
+ number of permutations 99
+ Name: PERMDISP results, dtype: object
+
+ The return value is a ``pandas.Series`` object containing the results of
+ the statistical test.
+
+ To suppress calculation of the p-value and only obtain the F statistic,
+ specify zero permutations:
+
+ >>> permdisp(dm, grouping, permutations=0)
+ method name PERMDISP
+ test statistic name F-value
+ sample size 6
+ number of groups 2
+ test statistic 1.03296
+ p-value NaN
+ number of permutations 0
+ Name: PERMDISP results, dtype: object
+
+ PERMDISP computes variances based on two types of tests, using either
+ centroids or spatial medians, also commonly referred to as a geometric
+ median. The spatial median is thought to yield a more robust test
+ statistic, and this test is used by default. Spatial medians are computed
+ using an iterative algorithm to find the optimally minimum point from all
+ other points in a group while centroids are computed using a deterministic
+ formula. As such the two different tests yeild slightly different F
+ statistics.
+
+ >>> np.random.seed(0)
+ >>> permdisp(dm, grouping, test='centroid', permutations=99)
+ method name PERMDISP
+ test statistic name F-value
+ sample size 6
+ number of groups 2
+ test statistic 3.67082
+ p-value 0.29
+ number of permutations 99
+ Name: PERMDISP results, dtype: object
+
+ You can also provide a ``pandas.DataFrame`` and a column denoting the
+ grouping instead of a grouping vector. The following DataFrame's
+ Grouping column specifies the same grouping as the vector we used in the
+ previous examples.:
+ >>> import pandas as pd
+ >>> df = pd.DataFrame.from_dict(
+ ... {'Grouping': {'s1': 'G1', 's2': 'G1', 's3': 'G1', 's4': 'G2',
+ ... 's5': 'G2', 's6': 'G2'}})
+ >>> # make output deterministic; should not be included during normal use
+ >>> np.random.seed(0)
+ >>> permdisp(dm, df, 'Grouping', permutations=99, test='centroid')
+ method name PERMDISP
+ test statistic name F-value
+ sample size 6
+ number of groups 2
+ test statistic 3.67082
+ p-value 0.29
+ number of permutations 99
+ Name: PERMDISP results, dtype: object
+
+ Note that when providing a ``DataFrame``, the ordering of rows and/or
+ columns does not affect the grouping vector that is extracted. The
+ ``DataFrame`` must be indexed by the distance matrix IDs (i.e., the row
+ labels must be distance matrix IDs).
+
+ If IDs (rows) are present in the ``DataFrame`` but not in the distance
+ matrix, they are ignored. The previous example's ``s7`` ID illustrates this
+ behavior: note that even though the ``DataFrame`` had 7 objects, only 6
+ were used in the test (see the "Sample size" row in the results above to
+ confirm this). Thus, the ``DataFrame`` can be a superset of the distance
+ matrix IDs. Note that the reverse is not true: IDs in the distance matrix
+ *must* be present in the ``DataFrame`` or an error will be raised.
+
+ PERMDISP should be used to determine whether the dispersions between the
+ groups in your distance matrix are significantly separated.
+ A non-significant test result indicates that group dispersions are similar
+ to each other. PERMANOVA or ANOSIM should then be used in conjunction to
+ determine whether clustering within groups is significant.
+
+ """
+ if test not in ['centroid', 'median']:
+ raise ValueError('Test must be centroid or median')
+
+ ordination = pcoa(distance_matrix)
+ samples = ordination.samples
+
+ sample_size, num_groups, grouping, tri_idxs, distances = _preprocess_input(
+ distance_matrix, grouping, column)
+
+ test_stat_function = partial(_compute_groups, samples, test)
+
+ stat, p_value = _run_monte_carlo_stats(test_stat_function, grouping,
+ permutations)
+
+ return _build_results('PERMDISP', 'F-value', sample_size, num_groups,
+ stat, p_value, permutations)
+
+
+def _compute_groups(samples, test_type, grouping):
+
+ groups = []
+
+ samples['grouping'] = grouping
+ if test_type == 'centroid':
+ centroids = samples.groupby('grouping').aggregate('mean')
+ elif test_type == 'median':
+ centroids = samples.groupby('grouping').aggregate(_config_med)
+
+ for label, df in samples.groupby('grouping'):
+ groups.append(cdist(df.values[:, :-1], [centroids.loc[label].values],
+ metric='euclidean'))
+
+ stat, _ = f_oneway(*groups)
+ stat = stat[0]
+
+ return stat
+
+
+def _config_med(x):
+ """
+ slice the vector up to the last value to exclude grouping column
+ and transpose the vector to be compatible with hd.geomedian
+ """
+ X = x.values[:, :-1]
+ return np.array(hd.geomedian(X.T))
=====================================
skbio/stats/distance/tests/data/moving_pictures_dm.tsv
=====================================
--- /dev/null
+++ b/skbio/stats/distance/tests/data/moving_pictures_dm.tsv
@@ -0,0 +1,34 @@
+ L1S105 L1S140 L1S208 L1S257 L1S281 L1S57 L1S76 L1S8 L2S155 L2S175 L2S204 L2S222 L2S240 L2S309 L2S357 L2S382 L3S294 L3S313 L3S341 L3S360 L3S378 L4S112 L4S137 L4S63 L5S104 L5S155 L5S174 L5S203 L5S222 L5S240 L6S20 L6S68 L6S93
+L1S105 0.0 0.5390370298457626 0.5958380256061412 0.5768343811669847 0.5923046539353664 0.44786237128294343 0.39489596582130626 0.5545584734774127 0.8628589351714164 0.8434498336351259 0.8592370856547886 0.8506877032999233 0.7889826076831608 0.8359411856255106 0.8200856211804968 0.8426252468809847 0.8588300792888636 0.7569476110839489 0.7737531468949157 0.8944159171736104 0.6033584308249973 0.8470795512030315 0.821242904747876 0.8652874911410461 0.7549182333398126 0.7223305078861331 0.7614916771466183 0.7384823167933402 0.7685949288454377 0.7217074962665071 0.7245343115615573 0.7830441925323288 0.7712136041691006
+L1S140 0.5390370298457626 0.0 0.5799280564700269 0.5498337267462643 0.5487499420209432 0.5617578336407074 0.5274803712970444 0.5842935186320399 0.8425845430236847 0.7832827608957819 0.832335123190656 0.83707961792672 0.7311087382394217 0.7969572429804197 0.74931933992796 0.791784337383268 0.7891927506641839 0.6721331780898249 0.7493321619468443 0.8773425730913526 0.3943371737754427 0.8380071824429106 0.7797415033649835 0.8604626784828243 0.7389385379102201 0.713927118153038 0.7409814677579339 0.6893827188871452 0.739005471405395 0.7134211109446568 0.7210216228178491 0.7548606893628177 0.75650714031249
+L1S208 0.5958380256061412 0.5799280564700269 0.0 0.3514478448812178 0.3099429863936709 0.4908395118886828 0.5060147104062029 0.5921114631534051 0.8899108348691316 0.871025922389371 0.882455510304791 0.8798738374168472 0.8153291321982421 0.8616764262370401 0.8215036618481708 0.8762874977082885 0.8826136551026731 0.8218942402038897 0.8270103157643566 0.8918833194204532 0.5595063305032598 0.8896853519676307 0.8701383470494592 0.9064920956741176 0.8245445313262268 0.7952347925702751 0.8252913142216206 0.7959214036381839 0.8153225533087411 0.7836084181624914 0.791415482862234 0.8107398191951546 0.8044123679011221
+L1S257 0.5768343811669847 0.5498337267462643 0.3514478448812178 0.0 0.25929060246530955 0.47255177559300116 0.43222880359346527 0.5884066995223238 0.8795468292082718 0.8633179655119932 0.8688146996880899 0.8629186587174017 0.8190468684000036 0.8492352934610714 0.8120267566814204 0.8666693265827448 0.8777562843225813 0.802227485419459 0.8160964503323095 0.8817724113978568 0.5576854601824073 0.8747926182003918 0.858838497674028 0.8925129391873841 0.828703636427477 0.8010491792499632 0.8063815782189153 0.8092051698995067 0.806495056831578 0.8032210970007331 0.7958200880953574 0.8161661389998109 0.8303929119421523
+L1S281 0.5923046539353664 0.5487499420209432 0.3099429863936709 0.25929060246530955 0.0 0.4609450181065116 0.4768241494226335 0.6336173247999576 0.8985838283907426 0.8902364311520621 0.8972881153423283 0.8878063096662511 0.842173339458122 0.8796280371215733 0.8476576569873849 0.8898292712501708 0.9039077039484619 0.8485533231569067 0.852633569868443 0.9096947429872373 0.5498250250306412 0.9022662022069642 0.8832755726058907 0.9169203743757105 0.8572110146660553 0.833803405339537 0.8394041604332061 0.8290963638641423 0.8384368165385457 0.8247152739488143 0.832089497492543 0.8365271234666928 0.8483699050146564
+L1S57 0.44786237128294343 0.5617578336407074 0.4908395118886828 0.47255177559300116 0.4609450181065116 0.0 0.31082469034707255 0.5024096303529431 0.9113813328862811 0.9014185555334876 0.9014865706960162 0.8945638042848107 0.8714620677099987 0.8806337056845028 0.8481649374200068 0.9084204945482022 0.913489362275528 0.852617211132442 0.8535720478866394 0.9122868711391121 0.5758415360633006 0.9020100495580599 0.9027332494336695 0.9186219021297435 0.853072903040757 0.8327332298901452 0.8629325424179554 0.838123751985925 0.8659733546560768 0.8346642658091125 0.8289313199906942 0.8703637908123152 0.8629123642673379
+L1S76 0.39489596582130626 0.5274803712970444 0.5060147104062029 0.43222880359346527 0.4768241494226335 0.31082469034707255 0.0 0.4003824579445799 0.894830907945713 0.8782305450289447 0.8768848673183877 0.8694496317335333 0.834400640278251 0.8645838229241534 0.8020621529524625 0.8821339436331491 0.8821873080715426 0.8108074314407202 0.8071422086314133 0.8931657453933083 0.5741407822604754 0.8800714967479438 0.8737823256177506 0.8981103792379882 0.8193019560519106 0.7921706586389919 0.8200532532128909 0.8020963162128805 0.8249980087726344 0.7949183972892307 0.7923436575946494 0.8310029142646927 0.8322683023877507
+L1S8 0.5545584734774127 0.5842935186320399 0.5921114631534051 0.5884066995223238 0.6336173247999576 0.5024096303529431 0.4003824579445799 0.0 0.8984532053428095 0.8752689122693623 0.892740827989152 0.8922691853549297 0.8314474877203453 0.8600974905243383 0.8016996748131581 0.8870353495912263 0.8805255216909961 0.7980602019240324 0.829887986255906 0.8798293357284456 0.5576624158659516 0.8948846831679798 0.8816102418936791 0.9112039786090779 0.8148374423400107 0.7791385980867408 0.8123406708143702 0.8036845403456274 0.8182429195392156 0.786233773105675 0.785902469716853 0.8323093879065956 0.8268942255033314
+L2S155 0.8628589351714164 0.8425845430236847 0.8899108348691316 0.8795468292082718 0.8985838283907426 0.9113813328862811 0.894830907945713 0.8984532053428095 0.0 0.46760603559570263 0.4249569948218847 0.43461957349574365 0.7136184283615585 0.3913207521607729 0.6347075539715223 0.7018474049205657 0.48529937944992957 0.6445967049513042 0.6384694268897185 0.4614095269789821 0.8793414820335794 0.49283220104466596 0.6490593420832039 0.47563246735903575 0.7320469060900457 0.8046481081885503 0.7992253539435805 0.7608659584888517 0.761349180972591 0.8017496796623877 0.7844856344015433 0.7648448360194874 0.7728251640763542
+L2S175 0.8434498336351259 0.7832827608957819 0.871025922389371 0.8633179655119932 0.8902364311520621 0.9014185555334876 0.8782305450289447 0.8752689122693623 0.46760603559570263 0.0 0.4521351118370733 0.47450759266616915 0.6496968752812166 0.4163459132801876 0.5642253392315422 0.6035203651725982 0.4127343227632307 0.5009839698168453 0.5720541940600513 0.5238301647807445 0.8519601477875526 0.45057960516670714 0.6012619962408408 0.5452699437274425 0.7004211741288418 0.7254489332170817 0.7241658196524235 0.6728749977683892 0.7120770444662118 0.7067601324302817 0.7154192599371452 0.6790044090495644 0.6670904831025087
+L2S204 0.8592370856547886 0.832335123190656 0.882455510304791 0.8688146996880899 0.8972881153423283 0.9014865706960162 0.8768848673183877 0.892740827989152 0.4249569948218847 0.4521351118370733 0.0 0.34707557932356076 0.7317461021420034 0.32542423370295653 0.5940997497062588 0.7202722055635978 0.46284470929748595 0.6399875240134363 0.5427529698531464 0.41546588368326215 0.8806954039760829 0.3819763193450203 0.6864606389600179 0.4541251117472289 0.7433650255074369 0.8140552153241122 0.8171702240197111 0.7787482342908187 0.7789238492726243 0.8145023991202386 0.797521853405542 0.7823436739920101 0.7825923091798144
+L2S222 0.8506877032999233 0.83707961792672 0.8798738374168472 0.8629186587174017 0.8878063096662511 0.8945638042848107 0.8694496317335333 0.8922691853549297 0.43461957349574365 0.47450759266616915 0.34707557932356076 0.0 0.7373605415524003 0.38933892308526385 0.6131841736075622 0.7071347655059155 0.5297960404918108 0.6776613871707217 0.614146849517274 0.41910689292075193 0.8807526952301693 0.43694693261646017 0.6992608535946145 0.4512186030792239 0.7657709194380475 0.8264506476297601 0.8235579667989716 0.7929926795664424 0.7900065630791773 0.8272758828816869 0.8177374352486667 0.7787324503994212 0.7937870099703493
+L2S240 0.7889826076831608 0.7311087382394217 0.8153291321982421 0.8190468684000036 0.842173339458122 0.8714620677099987 0.834400640278251 0.8314474877203453 0.7136184283615585 0.6496968752812166 0.7317461021420034 0.7373605415524003 0.0 0.7032990730427625 0.5742088858732407 0.5358811200052477 0.6113293724148627 0.5506881270264428 0.5515051266923414 0.7398454298135383 0.7579742017535656 0.7115018729258218 0.48083389190461734 0.7661067183222632 0.36560695464990867 0.4412731503394013 0.41030162605879844 0.38926922412434417 0.3758838249617733 0.43476390441875573 0.4013288162093646 0.29292532554452677 0.35832282922330977
+L2S309 0.8359411856255106 0.7969572429804197 0.8616764262370401 0.8492352934610714 0.8796280371215733 0.8806337056845028 0.8645838229241534 0.8600974905243383 0.3913207521607729 0.4163459132801876 0.32542423370295653 0.38933892308526385 0.7032990730427625 0.0 0.5284516343093534 0.7093129471508985 0.41393505549147774 0.6241411216254367 0.5766719921218544 0.36061963338100667 0.8395112062787546 0.4200940359303599 0.6691543813483889 0.43557544512779306 0.7065436739270591 0.7784024482700062 0.7846395088210678 0.7393917740028452 0.7397915081982174 0.7798934785055419 0.7729246906860544 0.763695527435426 0.7629847033876873
+L2S357 0.8200856211804968 0.74931933992796 0.8215036618481708 0.8120267566814204 0.8476576569873849 0.8481649374200068 0.8020621529524625 0.8016996748131581 0.6347075539715223 0.5642253392315422 0.5940997497062588 0.6131841736075622 0.5742088858732407 0.5284516343093534 0.0 0.6247944934502809 0.5283785180280727 0.44838682641972477 0.375128958725295 0.6121711834124891 0.8188660070261687 0.5950936496526126 0.542689027968325 0.6689211737896941 0.5421787179552776 0.6601284562072681 0.6793754491062178 0.5918194682678256 0.6055221091296257 0.6631138650570884 0.6559008959245738 0.6414849884779625 0.6348752854444591
+L2S382 0.8426252468809847 0.791784337383268 0.8762874977082885 0.8666693265827448 0.8898292712501708 0.9084204945482022 0.8821339436331491 0.8870353495912263 0.7018474049205657 0.6035203651725982 0.7202722055635978 0.7071347655059155 0.5358811200052477 0.7093129471508985 0.6247944934502809 0.0 0.680488155634992 0.5879411676849041 0.6300513986721148 0.7798117418893309 0.8451713307652351 0.7053126728281367 0.29452174278276233 0.7387926847081081 0.6760422527308095 0.6989286770361658 0.6126816214335168 0.6413026519516322 0.6319640587005974 0.6916898221776602 0.6638186733377722 0.5262410574638753 0.5609030584455355
+L3S294 0.8588300792888636 0.7891927506641839 0.8826136551026731 0.8777562843225813 0.9039077039484619 0.913489362275528 0.8821873080715426 0.8805255216909961 0.48529937944992957 0.4127343227632307 0.46284470929748595 0.5297960404918108 0.6113293724148627 0.41393505549147774 0.5283785180280727 0.680488155634992 0.0 0.5793151362285137 0.532519942057993 0.4417588676636619 0.8329083701065813 0.5166391190254049 0.6344485180123575 0.5880518426822319 0.6166977936514635 0.6976139122442497 0.6965206807593246 0.6533170380479183 0.6465722615751633 0.7007111257543401 0.697083374921763 0.7001623873009802 0.7062166839383266
+L3S313 0.7569476110839489 0.6721331780898249 0.8218942402038897 0.802227485419459 0.8485533231569067 0.852617211132442 0.8108074314407202 0.7980602019240324 0.6445967049513042 0.5009839698168453 0.6399875240134363 0.6776613871707217 0.5506881270264428 0.6241411216254367 0.44838682641972477 0.5879411676849041 0.5793151362285137 0.0 0.45248307788083786 0.7165487737423945 0.76582414397238 0.6574105445715698 0.560831971453598 0.7171733441832148 0.5902731094836756 0.5532059333861852 0.5853434250505234 0.5025483834097694 0.561609978051379 0.5582681387584417 0.5694158492427435 0.6032070350377756 0.5911945286328084
+L3S341 0.7737531468949157 0.7493321619468443 0.8270103157643566 0.8160964503323095 0.852633569868443 0.8535720478866394 0.8071422086314133 0.829887986255906 0.6384694268897185 0.5720541940600513 0.5427529698531464 0.614146849517274 0.5515051266923414 0.5766719921218544 0.375128958725295 0.6300513986721148 0.532519942057993 0.45248307788083786 0.0 0.6196019964860159 0.783720015310488 0.5829777947168483 0.5534737998316789 0.6520900532121093 0.5493281969319693 0.6308837007391367 0.652197593637747 0.6025364874432444 0.6084801413450426 0.6341899420458715 0.6354966927534835 0.6495259871270996 0.6352447858014029
+L3S360 0.8944159171736104 0.8773425730913526 0.8918833194204532 0.8817724113978568 0.9096947429872373 0.9122868711391121 0.8931657453933083 0.8798293357284456 0.4614095269789821 0.5238301647807445 0.41546588368326215 0.41910689292075193 0.7398454298135383 0.36061963338100667 0.6121711834124891 0.7798117418893309 0.4417588676636619 0.7165487737423945 0.6196019964860159 0.0 0.886859306656774 0.5343546555847905 0.7414678143333879 0.5400460290177217 0.7495782488980336 0.8108767861286648 0.8149762338575348 0.7802652073770237 0.7846158292767554 0.81147352370555 0.8005244117641637 0.8006143679532055 0.8014989491687546
+L3S378 0.6033584308249973 0.3943371737754427 0.5595063305032598 0.5576854601824073 0.5498250250306412 0.5758415360633006 0.5741407822604754 0.5576624158659516 0.8793414820335794 0.8519601477875526 0.8806954039760829 0.8807526952301693 0.7579742017535656 0.8395112062787546 0.8188660070261687 0.8451713307652351 0.8329083701065813 0.76582414397238 0.783720015310488 0.886859306656774 0.0 0.8878531310256906 0.8359164049140062 0.8993931924960518 0.7252856380797102 0.6746582064114732 0.7140756986649465 0.7000095154033142 0.7254146177064372 0.6747284675539041 0.694536665024702 0.750964397649065 0.7478762997708954
+L4S112 0.8470795512030315 0.8380071824429106 0.8896853519676307 0.8747926182003918 0.9022662022069642 0.9020100495580599 0.8800714967479438 0.8948846831679798 0.49283220104466596 0.45057960516670714 0.3819763193450203 0.43694693261646017 0.7115018729258218 0.4200940359303599 0.5950936496526126 0.7053126728281367 0.5166391190254049 0.6574105445715698 0.5829777947168483 0.5343546555847905 0.8878531310256906 0.0 0.6481421375149321 0.3575409039402891 0.7357714051501275 0.809583225542944 0.8029186032245896 0.7654724305797693 0.7692411532770428 0.8100058186769521 0.7895581361870725 0.7651442703099545 0.7646440275794989
+L4S137 0.821242904747876 0.7797415033649835 0.8701383470494592 0.858838497674028 0.8832755726058907 0.9027332494336695 0.8737823256177506 0.8816102418936791 0.6490593420832039 0.6012619962408408 0.6864606389600179 0.6992608535946145 0.48083389190461734 0.6691543813483889 0.542689027968325 0.29452174278276233 0.6344485180123575 0.560831971453598 0.5534737998316789 0.7414678143333879 0.8359164049140062 0.6481421375149321 0.0 0.6908078779515394 0.6039667084060777 0.680281107717286 0.5936509777076375 0.6119432909211573 0.6028821638318781 0.6703353838958876 0.6453016109359295 0.5078212308619812 0.5169165993321925
+L4S63 0.8652874911410461 0.8604626784828243 0.9064920956741176 0.8925129391873841 0.9169203743757105 0.9186219021297435 0.8981103792379882 0.9112039786090779 0.47563246735903575 0.5452699437274425 0.4541251117472289 0.4512186030792239 0.7661067183222632 0.43557544512779306 0.6689211737896941 0.7387926847081081 0.5880518426822319 0.7171733441832148 0.6520900532121093 0.5400460290177217 0.8993931924960518 0.3575409039402891 0.6908078779515394 0.0 0.7771055341254081 0.8401318632573123 0.8278791663557197 0.8033319145222341 0.8021181534933208 0.8397629806795364 0.8174036603850671 0.8034029168891261 0.8047837943956528
+L5S104 0.7549182333398126 0.7389385379102201 0.8245445313262268 0.828703636427477 0.8572110146660553 0.853072903040757 0.8193019560519106 0.8148374423400107 0.7320469060900457 0.7004211741288418 0.7433650255074369 0.7657709194380475 0.36560695464990867 0.7065436739270591 0.5421787179552776 0.6760422527308095 0.6166977936514635 0.5902731094836756 0.5493281969319693 0.7495782488980336 0.7252856380797102 0.7357714051501275 0.6039667084060777 0.7771055341254081 0.0 0.299746565246475 0.3717459070188454 0.30611431267503736 0.2687293485232606 0.29488133279702533 0.35282575882032935 0.4059272692757093 0.3546188970208585
+L5S155 0.7223305078861331 0.713927118153038 0.7952347925702751 0.8010491792499632 0.833803405339537 0.8327332298901452 0.7921706586389919 0.7791385980867408 0.8046481081885503 0.7254489332170817 0.8140552153241122 0.8264506476297601 0.4412731503394013 0.7784024482700062 0.6601284562072681 0.6989286770361658 0.6976139122442497 0.5532059333861852 0.6308837007391367 0.8108767861286648 0.6746582064114732 0.809583225542944 0.680281107717286 0.8401318632573123 0.299746565246475 0.0 0.24482970831325948 0.21540954545570457 0.28453959248410193 0.08866970541518727 0.2086655812403446 0.42817224711244256 0.35580061290923976
+L5S174 0.7614916771466183 0.7409814677579339 0.8252913142216206 0.8063815782189153 0.8394041604332061 0.8629325424179554 0.8200532532128909 0.8123406708143702 0.7992253539435805 0.7241658196524235 0.8171702240197111 0.8235579667989716 0.41030162605879844 0.7846395088210678 0.6793754491062178 0.6126816214335168 0.6965206807593246 0.5853434250505234 0.652197593637747 0.8149762338575348 0.7140756986649465 0.8029186032245896 0.5936509777076375 0.8278791663557197 0.3717459070188454 0.24482970831325948 0.0 0.2877214934590952 0.25347139824912546 0.2605730633285677 0.2559815010487754 0.27892381163313434 0.3053187587048309
+L5S203 0.7384823167933402 0.6893827188871452 0.7959214036381839 0.8092051698995067 0.8290963638641423 0.838123751985925 0.8020963162128805 0.8036845403456274 0.7608659584888517 0.6728749977683892 0.7787482342908187 0.7929926795664424 0.38926922412434417 0.7393917740028452 0.5918194682678256 0.6413026519516322 0.6533170380479183 0.5025483834097694 0.6025364874432444 0.7802652073770237 0.7000095154033142 0.7654724305797693 0.6119432909211573 0.8033319145222341 0.30611431267503736 0.21540954545570457 0.2877214934590952 0.0 0.24311981207597314 0.2201431600708006 0.2346438472041132 0.333150002145841 0.2850632669863039
+L5S222 0.7685949288454377 0.739005471405395 0.8153225533087411 0.806495056831578 0.8384368165385457 0.8659733546560768 0.8249980087726344 0.8182429195392156 0.761349180972591 0.7120770444662118 0.7789238492726243 0.7900065630791773 0.3758838249617733 0.7397915081982174 0.6055221091296257 0.6319640587005974 0.6465722615751633 0.561609978051379 0.6084801413450426 0.7846158292767554 0.7254146177064372 0.7692411532770428 0.6028821638318781 0.8021181534933208 0.2687293485232606 0.28453959248410193 0.25347139824912546 0.24311981207597314 0.0 0.31116489519567736 0.30117438587754664 0.29331724421227606 0.2904033216874486
+L5S240 0.7217074962665071 0.7134211109446568 0.7836084181624914 0.8032210970007331 0.8247152739488143 0.8346642658091125 0.7949183972892307 0.786233773105675 0.8017496796623877 0.7067601324302817 0.8145023991202386 0.8272758828816869 0.43476390441875573 0.7798934785055419 0.6631138650570884 0.6916898221776602 0.7007111257543401 0.5582681387584417 0.6341899420458715 0.81147352370555 0.6747284675539041 0.8100058186769521 0.6703353838958876 0.8397629806795364 0.29488133279702533 0.08866970541518727 0.2605730633285677 0.2201431600708006 0.31116489519567736 0.0 0.21735399370958108 0.3819989872862365 0.3241682922704345
+L6S20 0.7245343115615573 0.7210216228178491 0.791415482862234 0.7958200880953574 0.832089497492543 0.8289313199906942 0.7923436575946494 0.785902469716853 0.7844856344015433 0.7154192599371452 0.797521853405542 0.8177374352486667 0.4013288162093646 0.7729246906860544 0.6559008959245738 0.6638186733377722 0.697083374921763 0.5694158492427435 0.6354966927534835 0.8005244117641637 0.694536665024702 0.7895581361870725 0.6453016109359295 0.8174036603850671 0.35282575882032935 0.2086655812403446 0.2559815010487754 0.2346438472041132 0.30117438587754664 0.21735399370958108 0.0 0.36633132081192926 0.3477206555495855
+L6S68 0.7830441925323288 0.7548606893628177 0.8107398191951546 0.8161661389998109 0.8365271234666928 0.8703637908123152 0.8310029142646927 0.8323093879065956 0.7648448360194874 0.6790044090495644 0.7823436739920101 0.7787324503994212 0.29292532554452677 0.763695527435426 0.6414849884779625 0.5262410574638753 0.7001623873009802 0.6032070350377756 0.6495259871270996 0.8006143679532055 0.750964397649065 0.7651442703099545 0.5078212308619812 0.8034029168891261 0.4059272692757093 0.42817224711244256 0.27892381163313434 0.333150002145841 0.29331724421227606 0.3819989872862365 0.36633132081192926 0.0 0.19042209104190255
+L6S93 0.7712136041691006 0.75650714031249 0.8044123679011221 0.8303929119421523 0.8483699050146564 0.8629123642673379 0.8322683023877507 0.8268942255033314 0.7728251640763542 0.6670904831025087 0.7825923091798144 0.7937870099703493 0.35832282922330977 0.7629847033876873 0.6348752854444591 0.5609030584455355 0.7062166839383266 0.5911945286328084 0.6352447858014029 0.8014989491687546 0.7478762997708954 0.7646440275794989 0.5169165993321925 0.8047837943956528 0.3546188970208585 0.35580061290923976 0.3053187587048309 0.2850632669863039 0.2904033216874486 0.3241682922704345 0.3477206555495855 0.19042209104190255 0.0
=====================================
skbio/stats/distance/tests/data/moving_pictures_mf.tsv
=====================================
--- /dev/null
+++ b/skbio/stats/distance/tests/data/moving_pictures_mf.tsv
@@ -0,0 +1,35 @@
+#SampleID BarcodeSequence LinkerPrimerSequence BodySite Year Month Day Subject ReportedAntibioticUsage DaysSinceExperimentStart Description
+L1S8 AGCTGACTAGTC GTGCCAGCMGCCGCGGTAA gut 2008 10 28 subject-1 Yes 0 subject-1.gut.2008-10-28
+L1S57 ACACACTATGGC GTGCCAGCMGCCGCGGTAA gut 2009 1 20 subject-1 No 84 subject-1.gut.2009-1-20
+L1S76 ACTACGTGTGGT GTGCCAGCMGCCGCGGTAA gut 2009 2 17 subject-1 No 112 subject-1.gut.2009-2-17
+L1S105 AGTGCGATGCGT GTGCCAGCMGCCGCGGTAA gut 2009 3 17 subject-1 No 140 subject-1.gut.2009-3-17
+L2S155 ACGATGCGACCA GTGCCAGCMGCCGCGGTAA left palm 2009 1 20 subject-1 No 84 subject-1.left-palm.2009-1-20
+L2S175 AGCTATCCACGA GTGCCAGCMGCCGCGGTAA left palm 2009 2 17 subject-1 No 112 subject-1.left-palm.2009-2-17
+L2S204 ATGCAGCTCAGT GTGCCAGCMGCCGCGGTAA left palm 2009 3 17 subject-1 No 140 subject-1.left-palm.2009-3-17
+L2S222 CACGTGACATGT GTGCCAGCMGCCGCGGTAA left palm 2009 4 14 subject-1 No 168 subject-1.left-palm.2009-4-14
+L3S242 ACAGTTGCGCGA GTGCCAGCMGCCGCGGTAA right palm 2008 10 28 subject-1 Yes 0 subject-1.right-palm.2008-10-28
+L3S294 CACGACAGGCTA GTGCCAGCMGCCGCGGTAA right palm 2009 1 20 subject-1 No 84 subject-1.right-palm.2009-1-20
+L3S313 AGTGTCACGGTG GTGCCAGCMGCCGCGGTAA right palm 2009 2 17 subject-1 No 112 subject-1.right-palm.2009-2-17
+L3S341 CAAGTGAGAGAG GTGCCAGCMGCCGCGGTAA right palm 2009 3 17 subject-1 No 140 subject-1.right-palm.2009-3-17
+L3S360 CATCGTATCAAC GTGCCAGCMGCCGCGGTAA right palm 2009 4 14 subject-1 No 168 subject-1.right-palm.2009-4-14
+L5S104 CAGTGTCAGGAC GTGCCAGCMGCCGCGGTAA tongue 2008 10 28 subject-1 Yes 0 subject-1.tongue.2008-10-28
+L5S155 ATCTTAGACTGC GTGCCAGCMGCCGCGGTAA tongue 2009 1 20 subject-1 No 84 subject-1.tongue.2009-1-20
+L5S174 CAGACATTGCGT GTGCCAGCMGCCGCGGTAA tongue 2009 2 17 subject-1 No 112 subject-1.tongue.2009-2-17
+L5S203 CGATGCACCAGA GTGCCAGCMGCCGCGGTAA tongue 2009 3 17 subject-1 No 140 subject-1.tongue.2009-3-17
+L5S222 CTAGAGACTCTT GTGCCAGCMGCCGCGGTAA tongue 2009 4 14 subject-1 No 168 subject-1.tongue.2009-4-14
+L1S140 ATGGCAGCTCTA GTGCCAGCMGCCGCGGTAA gut 2008 10 28 subject-2 Yes 0 subject-2.gut.2008-10-28
+L1S208 CTGAGATACGCG GTGCCAGCMGCCGCGGTAA gut 2009 1 20 subject-2 No 84 subject-2.gut.2009-1-20
+L1S257 CCGACTGAGATG GTGCCAGCMGCCGCGGTAA gut 2009 3 17 subject-2 No 140 subject-2.gut.2009-3-17
+L1S281 CCTCTCGTGATC GTGCCAGCMGCCGCGGTAA gut 2009 4 14 subject-2 No 168 subject-2.gut.2009-4-14
+L2S240 CATATCGCAGTT GTGCCAGCMGCCGCGGTAA left palm 2008 10 28 subject-2 Yes 0 subject-2.left-palm.2008-10-28
+L2S309 CGTGCATTATCA GTGCCAGCMGCCGCGGTAA left palm 2009 1 20 subject-2 No 84 subject-2.left-palm.2009-1-20
+L2S357 CTAACGCAGTCA GTGCCAGCMGCCGCGGTAA left palm 2009 3 17 subject-2 No 140 subject-2.left-palm.2009-3-17
+L2S382 CTCAATGACTCA GTGCCAGCMGCCGCGGTAA left palm 2009 4 14 subject-2 No 168 subject-2.left-palm.2009-4-14
+L3S378 ATCGATCTGTGG GTGCCAGCMGCCGCGGTAA right palm 2008 10 28 subject-2 Yes 0 subject-2.right-palm.2008-10-28
+L4S63 CTCGTGGAGTAG GTGCCAGCMGCCGCGGTAA right palm 2009 1 20 subject-2 No 84 subject-2.right-palm.2009-1-20
+L4S112 GCGTTACACACA GTGCCAGCMGCCGCGGTAA right palm 2009 3 17 subject-2 No 140 subject-2.right-palm.2009-3-17
+L4S137 GAACTGTATCTC GTGCCAGCMGCCGCGGTAA right palm 2009 4 14 subject-2 No 168 subject-2.right-palm.2009-4-14
+L5S240 CTGGACTCATAG GTGCCAGCMGCCGCGGTAA tongue 2008 10 28 subject-2 Yes 0 subject-2.tongue.2008-10-28
+L6S20 GAGGCTCATCAT GTGCCAGCMGCCGCGGTAA tongue 2009 1 20 subject-2 No 84 subject-2.tongue.2009-1-20
+L6S68 GATACGTCCTGA GTGCCAGCMGCCGCGGTAA tongue 2009 3 17 subject-2 No 140 subject-2.tongue.2009-3-17
+L6S93 GATTAGCACTCT GTGCCAGCMGCCGCGGTAA tongue 2009 4 14 subject-2 No 168 subject-2.tongue.2009-4-14
\ No newline at end of file
=====================================
skbio/stats/distance/tests/test_permdisp.py
=====================================
--- /dev/null
+++ b/skbio/stats/distance/tests/test_permdisp.py
@@ -0,0 +1,253 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2013--, scikit-bio development team.
+#
+# Distributed under the terms of the Modified BSD License.
+#
+# The full license is in the file COPYING.txt, distributed with this software.
+# ----------------------------------------------------------------------------
+
+from functools import partial
+from unittest import TestCase, main
+
+import numpy as np
+import numpy.testing as npt
+import pandas as pd
+from pandas.util.testing import assert_series_equal
+from scipy.stats import f_oneway
+import hdmedians as hd
+
+from skbio import DistanceMatrix
+from skbio.stats.ordination import pcoa
+from skbio.stats.distance import permdisp
+from skbio.stats.distance._permdisp import _compute_groups
+from skbio.util import get_data_path
+
+
+class testPERMDISP(TestCase):
+
+ def setUp(self):
+ # test with 2 groups of equal size
+ # when assigned different labels, results should be the same
+ self.grouping_eq = ['foo', 'foo', 'foo', 'bar', 'bar', 'bar']
+ self.grouping_eq_relab = ['pyt', 'pyt', 'pyt', 'hon', 'hon', 'hon']
+ self.exp_index = ['method name', 'test statistic name', 'sample size',
+ 'number of groups', 'test statistic', 'p-value',
+ 'number of permutations']
+ # test with 3 groups of different sizes
+ # when assigned different labels results should be the same
+ self.grouping_uneq = ['foo', 'foo', 'bar', 'bar', 'bar',
+ 'qw', 'qw', 'qw', 'qw']
+
+ self.grouping_uneq_relab = [12, 12, 7, 7, 7, 23, 23, 23, 23]
+
+ self.grouping_un_mixed = ['a', 'a', 7, 7, 7, 'b', 'b', 'b', 'b']
+
+ eq_ids = ['s1', 's2', 's3', 's4', 's5', 's6']
+ uneq_ids = ['s1', 's2', 's3', 's4', 's5', 's6', 's7', 's8', 's9']
+ # matrix for equal grouping
+ self.eq_mat = DistanceMatrix([[0, 4, 0, 0, 4, 2],
+ [4, 0, 2, 0, 3, 1],
+ [0, 2, 0, 5, 2, 5],
+ [0, 0, 5, 0, 0, 2],
+ [4, 3, 2, 0, 0, 2],
+ [2, 1, 5, 2, 2, 0]], eq_ids)
+
+ # matrix for unequal grouping
+ self.uneq_mat = DistanceMatrix([[0, 0, 4, 0, 0, 3, 5, 3, 0],
+ [0, 0, 0, 3, 4, 5, 3, 0, 3],
+ [4, 0, 0, 4, 3, 1, 0, 5, 2],
+ [0, 3, 4, 0, 0, 2, 1, 3, 5],
+ [0, 4, 3, 0, 0, 1, 1, 5, 0],
+ [3, 5, 1, 2, 1, 0, 2, 0, 5],
+ [5, 3, 0, 1, 1, 2, 0, 4, 3],
+ [3, 0, 5, 3, 5, 0, 4, 0, 4],
+ [0, 3, 2, 5, 0, 5, 3, 4, 0]], uneq_ids)
+
+ # null matrix for equal grouping
+ self.null_mat = DistanceMatrix([[0, 0, 0, 0, 0, 0],
+ [0, 0, 0, 0, 0, 0],
+ [0, 0, 0, 0, 0, 0],
+ [0, 0, 0, 0, 0, 0],
+ [0, 0, 0, 0, 0, 0],
+ [0, 0, 0, 0, 0, 0]], eq_ids)
+
+ unif_ids = ['PC.354', 'PC.355', 'PC.356', 'PC.481', 'PC.593', 'PC.607',
+ 'PC.634', 'PC.635', 'PC.636']
+
+ self.unifrac_dm = DistanceMatrix(
+ [[0.0, 0.595483768391, 0.618074717633, 0.582763100909,
+ 0.566949022108, 0.714717232268, 0.772001731764, 0.690237118413,
+ 0.740681707488],
+ [0.595483768391, 0.0, 0.581427669668, 0.613726772383,
+ 0.65945132763, 0.745176523638, 0.733836123821, 0.720305073505,
+ 0.680785600439],
+ [0.618074717633, 0.581427669668, 0.0, 0.672149021573,
+ 0.699416863323, 0.71405573754, 0.759178215168, 0.689701276341,
+ 0.725100672826],
+ [0.582763100909, 0.613726772383, 0.672149021573, 0.0,
+ 0.64756120797, 0.666018240373, 0.66532968784, 0.650464714994,
+ 0.632524644216],
+ [0.566949022108, 0.65945132763, 0.699416863323, 0.64756120797,
+ 0.0, 0.703720200713, 0.748240937349, 0.73416971958,
+ 0.727154987937],
+ [0.714717232268, 0.745176523638, 0.71405573754, 0.666018240373,
+ 0.703720200713, 0.0, 0.707316869557, 0.636288883818,
+ 0.699880573956],
+ [0.772001731764, 0.733836123821, 0.759178215168, 0.66532968784,
+ 0.748240937349, 0.707316869557, 0.0, 0.565875193399,
+ 0.560605525642],
+ [0.690237118413, 0.720305073505, 0.689701276341, 0.650464714994,
+ 0.73416971958, 0.636288883818, 0.565875193399, 0.0,
+ 0.575788039321],
+ [0.740681707488, 0.680785600439, 0.725100672826, 0.632524644216,
+ 0.727154987937, 0.699880573956, 0.560605525642, 0.575788039321,
+ 0.0]], unif_ids)
+
+ self.unif_grouping = ['Control', 'Control', 'Control', 'Control',
+ 'Control', 'Fast', 'Fast', 'Fast', 'Fast']
+
+ self.assert_series_equal = partial(assert_series_equal,
+ check_index_type=True,
+ check_series_type=True)
+
+ def test_centroids_eq_groups(self):
+ exp = [[1.2886811963240687, 1.890538910062923, 1.490527658097728],
+ [2.17349240061718, 2.3192679626679946, 2.028338553903792]]
+ exp_stat, _ = f_oneway(*exp)
+
+ dm = pcoa(self.eq_mat)
+ dm = dm.samples
+
+ obs = _compute_groups(dm, 'centroid', self.grouping_eq)
+ self.assertAlmostEqual(obs, exp_stat, places=6)
+
+ obs_relab = _compute_groups(dm, 'centroid', self.grouping_eq_relab)
+ self.assertAlmostEqual(obs_relab, obs, places=6)
+
+ def test_centroids_uneq_groups(self):
+ """
+ the expected result here was calculated by hand
+ """
+ exp = [[2.5847022428144935, 2.285624595858895,
+ 1.7022431146340287],
+ [1.724817266046108, 1.724817266046108],
+ [2.4333280644972795, 2.389000390879655,
+ 2.8547180589306036, 3.218568759338847]]
+ exp_stat, _ = f_oneway(*exp)
+
+ dm = pcoa(self.uneq_mat)
+ dm = dm.samples
+
+ obs = _compute_groups(dm, 'centroid', self.grouping_uneq)
+ self.assertAlmostEqual(obs, exp_stat, places=6)
+
+ obs_relab = _compute_groups(dm, 'centroid', self.grouping_uneq_relab)
+ self.assertAlmostEqual(obs, obs_relab, places=6)
+
+ def test_centroids_mixedgroups(self):
+ exp = [[2.5847022428144935, 2.285624595858895,
+ 1.7022431146340287],
+ [1.724817266046108, 1.724817266046108],
+ [2.4333280644972795, 2.389000390879655,
+ 2.8547180589306036, 3.218568759338847]]
+ dm = pcoa(self.uneq_mat)
+ dm = dm.samples
+
+ exp_stat, _ = f_oneway(*exp)
+
+ obs_mixed = _compute_groups(dm, 'centroid', self.grouping_un_mixed)
+ self.assertAlmostEqual(exp_stat, obs_mixed, places=6)
+
+ def test_centroids_null(self):
+ dm = pcoa(self.null_mat)
+ dm = dm.samples
+
+ obs_null = _compute_groups(dm, 'centroid', self.grouping_eq)
+ np.isnan(obs_null)
+
+ def test_centroid_normal(self):
+ exp = pd.Series(index=self.exp_index,
+ data=['PERMDISP', 'F-value', 9, 2, 0.244501519876,
+ 0.63, 99],
+ name='PERMDISP results')
+
+ grouping = ['Control', 'Control', 'Control', 'Control', 'Control',
+ 'Fast', 'Fast', 'Fast', 'Fast']
+
+ np.random.seed(0)
+ obs = permdisp(self.unifrac_dm, grouping, test='centroid',
+ permutations=99)
+
+ self.assert_series_equal(obs, exp)
+
+ def test_median_normal(self):
+
+ exp = pd.Series(index=self.exp_index,
+ data=['PERMDISP', 'F-value', 9, 2, 0.139475441876,
+ 0.61, 99],
+ name='PERMDISP results')
+
+ np.random.seed(0)
+ obs = permdisp(self.unifrac_dm, self.unif_grouping, test='median',
+ permutations=99)
+
+ self.assert_series_equal(obs, exp)
+
+ def test_not_distance_matrix(self):
+ dm = []
+ grouping = ['Control', 'Control', 'Control', 'Control', 'Control',
+ 'Fast', 'Fast', 'Fast', 'Fast']
+
+ npt.assert_raises(TypeError, permdisp, dm, grouping, permutations=0)
+
+ def test_mismatched_group(self):
+
+ gr = ['foo', 'bar']
+ npt.assert_raises(ValueError, permdisp, self.unifrac_dm, gr)
+
+ def test_single_group(self):
+
+ gr = ['f', 'f', 'f', 'f', 'f', 'f', 'f', 'f', 'f']
+ npt.assert_raises(ValueError, permdisp, self.unifrac_dm, gr)
+
+ def test_no_permuations(self):
+ obs = permdisp(self.eq_mat, self.grouping_eq, permutations=0)
+
+ pval = obs['p-value']
+ np.isnan(pval)
+
+ def test_hdmedians(self):
+ exp = np.array([2.01956244, 1.53164546, 2.60571752, 0.91424179,
+ 1.76214416, 1.69943057])
+ obs = np.array(hd.geomedian(self.eq_mat.data))
+ npt.assert_almost_equal(obs, exp, decimal=6)
+
+ def test_confirm_betadispr_results(self):
+ mp_dm = DistanceMatrix.read(get_data_path('moving_pictures_dm.tsv'))
+ mp_mf = pd.read_csv(get_data_path('moving_pictures_mf.tsv'), sep='\t')
+ mp_mf.set_index('#SampleID', inplace=True)
+
+ obs_med_mp = permdisp(mp_dm, mp_mf,
+ column='BodySite')
+ obs_cen_mp = permdisp(mp_dm, mp_mf, column='BodySite',
+ test='centroid')
+
+ exp_data_m = ['PERMDISP', 'F-value', 33, 4, 10.1956, 0.001, 999]
+ exp_data_c = ['PERMDISP', 'F-value', 33, 4, 17.4242, 0.001, 999]
+ exp_ind = ['method name', 'test statistic name', 'sample size',
+ 'number of groups', 'test statistic', 'p-value',
+ 'number of permutations']
+
+ exp_med_mp = pd.Series(data=exp_data_m, index=exp_ind, dtype='object',
+ name='PERMDISP results')
+
+ exp_cen_mp = pd.Series(data=exp_data_c, index=exp_ind, dtype='object',
+ name='PERMDISP results')
+
+ self.assert_series_equal(exp_med_mp, obs_med_mp)
+
+ self.assert_series_equal(exp_cen_mp, obs_cen_mp)
+
+
+if __name__ == '__main__':
+ main()
=====================================
skbio/stats/ordination/__init__.py
=====================================
--- a/skbio/stats/ordination/__init__.py
+++ b/skbio/stats/ordination/__init__.py
@@ -17,6 +17,7 @@ Functions
ca
pcoa
+ pcoa_biplot
cca
rda
mean_and_std
@@ -126,11 +127,11 @@ from skbio.util import TestRunner
from ._redundancy_analysis import rda
from ._correspondence_analysis import ca
from ._canonical_correspondence_analysis import cca
-from ._principal_coordinate_analysis import pcoa
+from ._principal_coordinate_analysis import pcoa, pcoa_biplot
from ._ordination_results import OrdinationResults
from ._utils import (mean_and_std, scale, svd_rank, corr, e_matrix, f_matrix)
-__all__ = ['ca', 'rda', 'cca', 'pcoa', 'OrdinationResults',
+__all__ = ['ca', 'rda', 'cca', 'pcoa', 'pcoa_biplot', 'OrdinationResults',
'mean_and_std', 'scale', 'svd_rank', 'corr',
'e_matrix', 'f_matrix']
=====================================
skbio/stats/ordination/_canonical_correspondence_analysis.py
=====================================
--- a/skbio/stats/ordination/_canonical_correspondence_analysis.py
+++ b/skbio/stats/ordination/_canonical_correspondence_analysis.py
@@ -44,7 +44,7 @@ def cca(y, x, scaling=1):
Samples by constraints table (n, q)
scaling : int, {1, 2}, optional
Scaling type 1 maintains :math:`\chi^2` distances between rows.
- Scaling type 2 preserver :math:`\chi^2` distances between columns.
+ Scaling type 2 preserves :math:`\chi^2` distances between columns.
For a more detailed explanation of the interpretation, check Legendre &
Legendre 1998, section 9.4.3.
=====================================
skbio/stats/ordination/_principal_coordinate_analysis.py
=====================================
--- a/skbio/stats/ordination/_principal_coordinate_analysis.py
+++ b/skbio/stats/ordination/_principal_coordinate_analysis.py
@@ -15,7 +15,7 @@ from scipy.linalg import eigh
from skbio.stats.distance import DistanceMatrix
from skbio.util._decorator import experimental
from ._ordination_results import OrdinationResults
-from ._utils import e_matrix, f_matrix
+from ._utils import e_matrix, f_matrix, scale
# - In cogent, after computing eigenvalues/vectors, the imaginary part
# is dropped, if any. We know for a fact that the eigenvalues are
@@ -142,3 +142,64 @@ def pcoa(distance_matrix):
columns=axis_labels),
proportion_explained=pd.Series(proportion_explained,
index=axis_labels))
+
+
+ at experimental(as_of="0.5.3")
+def pcoa_biplot(ordination, y):
+ """Compute the projection of descriptors into a PCoA matrix
+
+ This implementation is as described in Chapter 9 of Legendre & Legendre,
+ Numerical Ecology 3rd edition.
+
+ Parameters
+ ----------
+ ordination: OrdinationResults
+ The computed principal coordinates analysis of dimensions (n, c) where
+ the matrix ``y`` will be projected onto.
+ y: DataFrame
+ Samples by features table of dimensions (n, m). These can be
+ environmental features or abundance counts. This table should be
+ normalized in cases of dimensionally heterogenous physical variables.
+
+ Returns
+ -------
+ OrdinationResults
+ The modified input object that includes projected features onto the
+ ordination space in the ``features`` attribute.
+ """
+
+ # acknowledge that most saved ordinations lack a name, however if they have
+ # a name, it should be PCoA
+ if (ordination.short_method_name != '' and
+ ordination.short_method_name != 'PCoA'):
+ raise ValueError('This biplot computation can only be performed in a '
+ 'PCoA matrix.')
+
+ if set(y.index) != set(ordination.samples.index):
+ raise ValueError('The eigenvectors and the descriptors must describe '
+ 'the same samples.')
+
+ eigvals = ordination.eigvals
+ coordinates = ordination.samples
+ N = coordinates.shape[0]
+
+ # align the descriptors and eigenvectors in a sample-wise fashion
+ y = y.reindex(coordinates.index)
+
+ # S_pc from equation 9.44
+ # Represents the covariance matrix between the features matrix and the
+ # column-centered eigenvectors of the pcoa.
+ spc = (1 / (N - 1)) * y.values.T.dot(scale(coordinates, ddof=1))
+
+ # U_proj from equation 9.55, is the matrix of descriptors to be projected.
+ #
+ # Only get the power of non-zero values, otherwise this will raise a
+ # divide by zero warning. There shouldn't be negative eigenvalues(?)
+ Uproj = np.sqrt(N - 1) * spc.dot(np.diag(np.power(eigvals, -0.5,
+ where=eigvals > 0)))
+
+ ordination.features = pd.DataFrame(data=Uproj,
+ index=y.columns.copy(),
+ columns=coordinates.columns.copy())
+
+ return ordination
=====================================
skbio/stats/ordination/tests/data/PCoA_biplot_descriptors
=====================================
--- /dev/null
+++ b/skbio/stats/ordination/tests/data/PCoA_biplot_descriptors
@@ -0,0 +1,9 @@
+Taxon PC.636 PC.635 PC.356 PC.481 PC.354 PC.593 PC.355 PC.607 PC.634
+Root;k__Bacteria;Other 0.0202702702703 0.0469798657718 0.0266666666667 0.027397260274 0.0134228187919 0.0134228187919 0.0136054421769 0.0469798657718 0.02
+Root;k__Bacteria;p__Actinobacteria 0.00675675675676 0.00671140939597 0.0 0.00684931506849 0.0 0.0 0.0 0.0201342281879 0.02
+Root;k__Bacteria;p__Bacteroidetes 0.695945945946 0.523489932886 0.193333333333 0.143835616438 0.0738255033557 0.389261744966 0.285714285714 0.275167785235 0.653333333333
+Root;k__Bacteria;p__Deferribacteres 0.0472972972973 0.0134228187919 0.0 0.0 0.0 0.0 0.0 0.0201342281879 0.0333333333333
+Root;k__Bacteria;p__Firmicutes 0.209459459459 0.395973154362 0.773333333333 0.787671232877 0.89932885906 0.41610738255 0.700680272109 0.456375838926 0.22
+Root;k__Bacteria;p__Proteobacteria 0.00675675675676 0.00671140939597 0.0 0.0 0.0 0.0335570469799 0.0 0.0201342281879 0.0133333333333
+Root;k__Bacteria;p__TM7 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0133333333333
+Root;k__Bacteria;p__ 0.0135135135135 0.00671140939597 0.00666666666667 0.0342465753425 0.0134228187919 0.147651006711 0.0 0.161073825503 0.0266666666667
=====================================
skbio/stats/ordination/tests/data/PCoA_biplot_projected_descriptors
=====================================
--- /dev/null
+++ b/skbio/stats/ordination/tests/data/PCoA_biplot_projected_descriptors
@@ -0,0 +1,9 @@
+"","Axis.1","Axis.2","Axis.3","Axis.4","Axis.5","Axis.6","Axis.7","Axis.8"
+"Root;k__Bacteria;Other",-0.0251703458354009,-0.0387419997798285,0.0370307944752097,-0.0182485866183309,-0.00129741030252842,0.000814106793625313,-0.0173701207829898,0.0361211055519348
+"Root;k__Bacteria;p__Actinobacteria",-0.0243896442435075,-0.0173750855733946,0.00412122918478027,-0.00122921164491863,-0.0133154056835269,-0.0154481346200809,0.0110025326657988,-0.0193889181281414
+"Root;k__Bacteria;p__Bacteroidetes",-0.715120229604841,0.317840715450945,-0.183247320427738,0.616597812375811,0.261720590975174,-0.0233954969961052,0.0235684811385711,0.108576064612194
+"Root;k__Bacteria;p__Deferribacteres",-0.0581057801759818,0.0109676872979955,0.000973434160884227,0.0403394413149601,-0.0126657203131676,0.0312522995897358,0.00445295277091873,-0.0391828157435708
+"Root;k__Bacteria;p__Firmicutes",0.826514889978526,0.0338669291769973,0.258683679610306,-0.820041925817023,-0.236331008329088,0.0820371240664409,-0.146993916645359,-0.114613637622136
+"Root;k__Bacteria;p__Proteobacteria",-0.00665357958782869,-0.0351957821869525,-0.0269691957766513,0.0384070851104123,0.019915981256556,-0.0152225877254676,0.019285238587548,0.0101464789573946
+"Root;k__Bacteria;p__TM7",-0.00936607225354557,0.00504668520861811,-0.00329562583232389,-0.00189853964818609,0.00398022745872819,-0.0160400884328152,0.0128760393792986,-0.0162949930598132
+"Root;k__Bacteria;p__",0.0122907617222583,-0.276409149593715,-0.0872969953941216,0.146073924926225,-0.022007255063595,-0.0439972226754927,0.0931787928855206,0.0346367154320729
=====================================
skbio/stats/ordination/tests/data/PCoA_skbio
=====================================
--- /dev/null
+++ b/skbio/stats/ordination/tests/data/PCoA_skbio
@@ -0,0 +1,22 @@
+Eigvals 9
+0.5123672604605048 0.30071909442702155 0.2679120660041405 0.20898868107836532 0.19169895326008157 0.16054234528018121 0.1501769571197782 0.12245774816668883 0.0
+
+Proportion explained 9
+0.2675738327765797 0.1570446960499008 0.1399118637740237 0.109140272453788 0.10011104850250738 0.08384011619116792 0.07842699390110737 0.06395117635092507 0.0
+
+Species 0 0
+
+Site 9 9
+PC.636 -0.25846546118284214 0.17399954688273864 -0.03828757925519388 0.1944775056196811 -0.08311760208440848 0.2624303332005185 -0.023163639223487725 0.018479403958060633 -0.0
+PC.635 -0.27100113539100923 -0.01859513190633933 0.08648419263485602 -0.11806424531525102 0.19880835843691802 -0.021172359953453967 -0.19102402756537798 -0.1556465923768283 -0.0
+PC.356 0.2350778981747308 0.0962519254488697 0.3457927267138699 0.003208625776186833 0.09637776755188007 0.04570253869527276 0.1854728132858981 -0.04040939717932225 -0.0
+PC.481 0.026140766432533637 -0.011145967653319279 -0.14766060301460832 -0.29087660852960373 -0.20394547280060757 0.06197123847575147 0.10164132870879576 -0.1056909987185146 -0.0
+PC.354 0.2850075522831214 -0.019254988848331458 -0.062326337538532166 -0.13812679985198154 0.10479860242276298 0.09517207306283292 -0.12963609754202599 0.22068717037162142 -0.0
+PC.593 0.20463632624145503 -0.13936115093164061 -0.2915138196228669 0.18156678682059035 0.1595801327151684 -0.024641213016179383 0.08662524044412884 -0.09962214768709698 -0.0
+PC.355 0.23348240321199026 0.22525797406849948 0.018862309626814986 0.10772998183109622 -0.1771089995718839 -0.19290583515124646 -0.14981947140833857 -0.03835490374645002 -0.0
+PC.607 -0.09496319113225948 -0.42097480249530345 0.1548694548694144 0.08984275092805967 -0.15261819448811528 -0.0334232691500879 -0.02512247773026579 0.05089885364091516 -0.0
+PC.634 -0.35991515863772167 0.11382259543482588 -0.06622034441375366 -0.029757997278778266 0.05722540781828547 -0.1931335061634077 0.14502633103067314 0.14965861173761527 -0.0
+
+Biplot 0 0
+
+Site constraints 0 0
=====================================
skbio/stats/ordination/tests/test_principal_coordinate_analysis.py
=====================================
--- a/skbio/stats/ordination/tests/test_principal_coordinate_analysis.py
+++ b/skbio/stats/ordination/tests/test_principal_coordinate_analysis.py
@@ -9,11 +9,12 @@
import pandas as pd
import numpy as np
import numpy.testing as npt
+from copy import deepcopy
from unittest import TestCase, main
from skbio import DistanceMatrix, OrdinationResults
from skbio.stats.distance import DissimilarityMatrixError
-from skbio.stats.ordination import pcoa
+from skbio.stats.ordination import pcoa, pcoa_biplot
from skbio.util import get_data_path, assert_ordination_results_equal
@@ -126,5 +127,106 @@ class TestPCoA(TestCase):
pcoa([[1, 2], [3, 4]])
+class TestPCoABiplot(TestCase):
+ def setUp(self):
+ # Crawford dataset for unweighted UniFrac
+ fp = get_data_path('PCoA_sample_data_3')
+ self.ordination = pcoa(DistanceMatrix.read(fp))
+
+ fp = get_data_path('PCoA_biplot_descriptors')
+ self.descriptors = pd.read_table(fp, index_col='Taxon').T
+
+ def test_pcoa_biplot_from_ape(self):
+ """Test against a reference implementation from R's ape package
+
+ The test data was generated with the R script below and using a
+ modified version of pcoa.biplot that returns the U matrix.
+
+ library(ape)
+ # files can be found in the test data folder of the ordination module
+ y = t(read.table('PCoA_biplot_descriptors', row.names = 1, header = 1))
+ dm = read.table('PCoA_sample_data_3', row.names = 1, header = 1)
+
+ h = pcoa(dm)
+
+ # biplot.pcoa will only calculate the biplot for two axes at a time
+ acc = NULL
+ for (axes in c(1, 3, 5, 7)) {
+ new = biplot.pcoa(h, y, plot.axes=c(axes, axes+1),
+ rn = rep('.', length(colnames(dm))) )
+
+ if(is.null(acc)) {
+ acc = new
+ }
+ else {
+ b = acc
+ acc <- cbind(acc, new)
+ }
+ }
+ write.csv(acc, file='PCoA_biplot_projected_descriptors')
+ """
+ obs = pcoa_biplot(self.ordination, self.descriptors)
+
+ # we'll build a dummy ordination results object based on the expected
+ # the main thing we'll compare and modify is the features dataframe
+ exp = deepcopy(obs)
+
+ fp = get_data_path('PCoA_biplot_projected_descriptors')
+ # R won't calculate the last dimension, so pad with zeros to make the
+ # arrays comparable
+ exp.features = pd.read_table(fp, sep=',', index_col=0)
+ exp.features['Axis.9'] = np.zeros_like(exp.features['Axis.8'])
+
+ # make the order comparable
+ exp.features = exp.features.reindex(obs.features.index)
+
+ assert_ordination_results_equal(obs, exp, ignore_directionality=True,
+ ignore_axis_labels=True)
+
+ def test_pcoa_biplot_subset_input(self):
+ # create a 2D copy of the full ordination
+ two_dims = deepcopy(self.ordination)
+ two_dims.eigvals = two_dims.eigvals[:2]
+ two_dims.samples = two_dims.samples.iloc[:, :2]
+ two_dims.proportion_explained = two_dims.proportion_explained[:2]
+
+ # only look at the features
+ subset = pcoa_biplot(two_dims, self.descriptors).features
+ full = pcoa_biplot(self.ordination, self.descriptors).features
+
+ # the biplot should be identical regardless of the number of axes used
+ pd.util.testing.assert_almost_equal(subset, full.iloc[:, :2])
+
+ def test_mismatching_samples(self):
+ new_index = self.descriptors.index.tolist()
+ new_index[3] = 'Not.an.id'
+ self.descriptors.index = pd.Index(new_index)
+
+ with self.assertRaisesRegex(ValueError, 'The eigenvectors and the '
+ 'descriptors must describe the same '
+ 'samples.'):
+ pcoa_biplot(self.ordination, self.descriptors)
+
+ def test_not_a_pcoa(self):
+ self.ordination.short_method_name = 'RDA'
+ self.ordination.long_method_name = 'Redundancy Analysis'
+ with self.assertRaisesRegex(ValueError, 'This biplot computation can'
+ ' only be performed in a PCoA matrix.'):
+ pcoa_biplot(self.ordination, self.descriptors)
+
+ def test_from_seralized_results(self):
+ # the current implementation of ordination results loses some
+ # information, test that pcoa_biplot works fine regardless
+ results = OrdinationResults.read(get_data_path('PCoA_skbio'))
+
+ serialized = pcoa_biplot(results, self.descriptors)
+ in_memory = pcoa_biplot(self.ordination, self.descriptors)
+
+ assert_ordination_results_equal(serialized, in_memory,
+ ignore_directionality=True,
+ ignore_axis_labels=True,
+ ignore_method_names=True)
+
+
if __name__ == "__main__":
main()
=====================================
skbio/tree/_tree.py
=====================================
--- a/skbio/tree/_tree.py
+++ b/skbio/tree/_tree.py
@@ -3098,3 +3098,118 @@ class TreeNode(SkbioObject):
yield self
counter += 1
+
+ @experimental(as_of="0.5.3")
+ def support(self):
+ """Return support value of a node if available.
+
+ Returns
+ -------
+ float or None
+ support value of the node, or None if not available
+
+ Notes
+ -----
+ A "support value" is defined as the numeric form of a whole node label
+ without ":", or the part preceding the first ":" in the node label.
+
+ For examples: "(a,b)1.0", "(a,b)1.0:2.5", and "(a,b)'1.0:species_A'".
+ In these cases the support values are all 1.0.
+
+ For examples: "(a,b):1.0" and "(a,b)species_A". In these cases there
+ are no support values.
+
+ Examples
+ --------
+ >>> from skbio import TreeNode
+ >>> tree = TreeNode.read(['((a,b)99,(c,d):1.0);'])
+ >>> tree.lca(['a', 'b']).support()
+ 99.0
+ >>> tree.lca(['c', 'd']).support() is None
+ True
+ """
+ support = None
+ if self.name is not None:
+ left, _, _ = self.name.partition(':')
+ try:
+ support = float(left)
+ except ValueError:
+ pass
+ return support
+
+ @experimental(as_of="0.5.3")
+ def unpack(self):
+ """Unpack an internal node in place.
+
+ Notes
+ -----
+ This function sequentially: 1) elongates child nodes by branch length
+ of self (omit if there is no branch length), 2) removes self from
+ parent node, and 3) grafts child nodes to parent node.
+
+ Raises
+ ------
+ ValueError
+ if input node is root or tip
+
+ See also
+ --------
+ unpack_by_func
+ prune
+
+ Examples
+ --------
+ >>> from skbio import TreeNode
+ >>> tree = TreeNode.read(['((c:2.0,d:3.0)a:1.0,(e:2.0,f:1.0)b:2.0);'])
+ >>> tree.find('b').unpack()
+ >>> print(tree)
+ ((c:2.0,d:3.0)a:1.0,e:4.0,f:3.0);
+ <BLANKLINE>
+ """
+ if self.is_root():
+ raise TreeError('Cannot unpack root.')
+ if self.is_tip():
+ raise TreeError('Cannot unpack tip.')
+ parent = self.parent
+ blen = (self.length or 0.0)
+ for child in self.children:
+ clen = (child.length or 0.0)
+ child.length = (clen + blen or None)
+ parent.remove(self)
+ parent.extend(self.children)
+
+ @experimental(as_of="0.5.3")
+ def unpack_by_func(self, func):
+ """Unpack internal nodes of a tree that meet certain criteria.
+
+ Parameters
+ ----------
+ func : function
+ a function that accepts a TreeNode and returns `True` or `False`,
+ where `True` indicates the node is to be unpacked
+
+ See also
+ --------
+ unpack
+ prune
+
+ Examples
+ --------
+ >>> from skbio import TreeNode
+ >>> tree = TreeNode.read(['((c:2,d:3)a:1,(e:1,f:2)b:2);'])
+ >>> tree.unpack_by_func(lambda x: x.length <= 1)
+ >>> print(tree)
+ ((e:1.0,f:2.0)b:2.0,c:3.0,d:4.0);
+ <BLANKLINE>
+ >>> tree = TreeNode.read(['(((a,b)85,(c,d)78)75,(e,(f,g)64)80);'])
+ >>> tree.unpack_by_func(lambda x: x.support() < 75)
+ >>> print(tree)
+ (((a,b)85,(c,d)78)75,(e,f,g)80);
+ <BLANKLINE>
+ """
+ nodes_to_unpack = []
+ for node in self.non_tips(include_self=False):
+ if func(node):
+ nodes_to_unpack.append(node)
+ for node in nodes_to_unpack:
+ node.unpack()
=====================================
skbio/tree/tests/test_tree.py
=====================================
--- a/skbio/tree/tests/test_tree.py
+++ b/skbio/tree/tests/test_tree.py
@@ -1303,6 +1303,135 @@ class TreeTests(TestCase):
with self.assertRaises(MissingNodeError):
next(self.simple_t.shuffle(names=['x', 'y']))
+ def test_support(self):
+ """Get support value of a node."""
+ # test nodes with support alone as label
+ tree = TreeNode.read(['((a,b)75,(c,d)90);'])
+ node1, node2 = tree.children
+ self.assertEqual(node1.support(), 75.0)
+ self.assertEqual(node2.support(), 90.0)
+
+ # test nodes with support and branch length
+ tree = TreeNode.read(['((a,b)0.85:1.23,(c,d)0.95:4.56);'])
+ node1, node2 = tree.children
+ self.assertEqual(node1.support(), 0.85)
+ self.assertEqual(node2.support(), 0.95)
+
+ # test support values that are negative or scientific notation (not a
+ # common scenario but can happen)
+ tree = TreeNode.read(['((a,b)-1.23,(c,d)1.23e-4);'])
+ node1, node2 = tree.children
+ self.assertEqual(node1.support(), -1.23)
+ self.assertEqual(node2.support(), 0.000123)
+
+ # test nodes with support and extra label (not a common scenario but
+ # can happen)
+ tree = TreeNode.read(['((a,b)\'80:X\',(c,d)\'60:Y\');'])
+ node1, node2 = tree.children
+ self.assertEqual(node1.support(), 80.0)
+ self.assertEqual(node2.support(), 60.0)
+
+ # test nodes without label, with non-numeric label, and with branch
+ # length only
+ tree = TreeNode.read(['((a,b),(c,d)x,(e,f):1.0);'])
+ for node in tree.children:
+ self.assertIsNone(node.support())
+
+ def test_unpack(self):
+ """Unpack an internal node."""
+ # test unpacking a node without branch length
+ tree = TreeNode.read(['((c,d)a,(e,f)b);'])
+ tree.find('b').unpack()
+ exp = '((c,d)a,e,f);\n'
+ self.assertEqual(str(tree), exp)
+
+ # test unpacking a node with branch length
+ tree = TreeNode.read(['((c:2.0,d:3.0)a:1.0,(e:2.0,f:1.0)b:2.0);'])
+ tree.find('b').unpack()
+ exp = '((c:2.0,d:3.0)a:1.0,e:4.0,f:3.0);'
+ self.assertEqual(str(tree).rstrip(), exp)
+
+ # test attempting to unpack root
+ tree = TreeNode.read(['((d,e)b,(f,g)c)a;'])
+ msg = 'Cannot unpack root.'
+ with self.assertRaisesRegex(TreeError, msg):
+ tree.find('a').unpack()
+
+ # test attempting to unpack tip
+ msg = 'Cannot unpack tip.'
+ with self.assertRaisesRegex(TreeError, msg):
+ tree.find('d').unpack()
+
+ def test_unpack_by_func(self):
+ """Unpack internal nodes of a tree by a function."""
+ # unpack internal nodes with branch length <= 1.0
+ def func(x):
+ return x.length <= 1.0
+
+ # will unpack node 'a', but not tip 'e'
+ # will add the branch length of 'a' to its child nodes 'c' and 'd'
+ tree = TreeNode.read(['((c:2,d:3)a:1,(e:1,f:2)b:2);'])
+ tree.unpack_by_func(func)
+ exp = '((e:1.0,f:2.0)b:2.0,c:3.0,d:4.0);'
+ self.assertEqual(str(tree).rstrip(), exp)
+
+ # unpack internal nodes with branch length < 2.01
+ # will unpack both 'a' and 'b'
+ tree = TreeNode.read(['((c:2,d:3)a:1,(e:1,f:2)b:2);'])
+ tree.unpack_by_func(lambda x: x.length <= 2.0)
+ exp = '(c:3.0,d:4.0,e:3.0,f:4.0);'
+ self.assertEqual(str(tree).rstrip(), exp)
+
+ # unpack two nested nodes 'a' and 'c' simultaneously
+ tree = TreeNode.read(['(((e:3,f:2)c:1,d:3)a:1,b:4);'])
+ tree.unpack_by_func(lambda x: x.length <= 2.0)
+ exp = '(b:4.0,d:4.0,e:5.0,f:4.0);'
+ self.assertEqual(str(tree).rstrip(), exp)
+
+ # test a complicated scenario (unpacking nodes 'g', 'h' and 'm')
+ def func(x):
+ return x.length < 2.0
+ tree = TreeNode.read(['(((a:1.04,b:2.32,c:1.44)d:3.20,'
+ '(e:3.91,f:2.47)g:1.21)h:1.75,'
+ '(i:4.14,(j:2.06,k:1.58)l:3.32)m:0.77);'])
+ tree.unpack_by_func(func)
+ exp = ('((a:1.04,b:2.32,c:1.44)d:4.95,e:6.87,f:5.43,i:4.91,'
+ '(j:2.06,k:1.58)l:4.09);')
+ self.assertEqual(str(tree).rstrip(), exp)
+
+ # unpack nodes with support < 75
+ def func(x):
+ return x.support() < 75
+ tree = TreeNode.read(['(((a,b)85,(c,d)78)75,(e,(f,g)64)80);'])
+ tree.unpack_by_func(func)
+ exp = '(((a,b)85,(c,d)78)75,(e,f,g)80);'
+ self.assertEqual(str(tree).rstrip(), exp)
+
+ # unpack nodes with support < 85
+ tree = TreeNode.read(['(((a,b)85,(c,d)78)75,(e,(f,g)64)80);'])
+ tree.unpack_by_func(lambda x: x.support() < 85)
+ exp = '((a,b)85,c,d,e,f,g);'
+ self.assertEqual(str(tree).rstrip(), exp)
+
+ # unpack nodes with support < 0.95
+ tree = TreeNode.read(['(((a,b)0.97,(c,d)0.98)1.0,(e,(f,g)0.88)0.96);'])
+ tree.unpack_by_func(lambda x: x.support() < 0.95)
+ exp = '(((a,b)0.97,(c,d)0.98)1.0,(e,f,g)0.96);'
+ self.assertEqual(str(tree).rstrip(), exp)
+
+ # test a case where there are branch lengths, none support values and
+ # node labels
+ def func(x):
+ sup = x.support()
+ return sup is not None and sup < 75
+ tree = TreeNode.read(['(((a:1.02,b:0.33)85:0.12,(c:0.86,d:2.23)'
+ '70:3.02)75:0.95,(e:1.43,(f:1.69,g:1.92)64:0.20)'
+ 'node:0.35)root;'])
+ tree.unpack_by_func(func)
+ exp = ('(((a:1.02,b:0.33)85:0.12,c:3.88,d:5.25)75:0.95,'
+ '(e:1.43,f:1.89,g:2.12)node:0.35)root;')
+ self.assertEqual(str(tree).rstrip(), exp)
+
sample = """
(
View it on GitLab: https://salsa.debian.org/med-team/python-skbio/compare/ba9bddb165769afd7052c5cbe2737a0fa6a0ab3a...f2418d0e0d131c4475d3f3add2f66bbe4845af84
--
View it on GitLab: https://salsa.debian.org/med-team/python-skbio/compare/ba9bddb165769afd7052c5cbe2737a0fa6a0ab3a...f2418d0e0d131c4475d3f3add2f66bbe4845af84
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20180815/aac9fb65/attachment-0001.html>
More information about the debian-med-commit
mailing list