[med-svn] [Git][med-team/q2-diversity-lib][master] 5 commits: New upstream version 2024.5.0

Tue Jun 25 02:51:48 BST 2024


Michael R. Crusoe pushed to branch master at Debian Med / q2-diversity-lib


Commits:
953fd6e9 by Michael R. Crusoe at 2024-06-25T03:35:06+02:00
New upstream version 2024.5.0
- - - - -
2c273fda by Michael R. Crusoe at 2024-06-25T03:35:06+02:00
routine-update: New upstream version

- - - - -
4dbc891a by Michael R. Crusoe at 2024-06-25T03:35:07+02:00
Update upstream source from tag 'upstream/2024.5.0'

Update to upstream version '2024.5.0'
with Debian dir 006b04920c26dd54b1518da12cfee5345cc0c271
- - - - -
3fbc167d by Michael R. Crusoe at 2024-06-25T03:35:24+02:00
routine-update: Regenerate debian/control from debian/control.in

- - - - -
168f4089 by Michael R. Crusoe at 2024-06-25T03:39:33+02:00
d/patches/configparser.patch: copy from qiime packaging

- - - - -


13 changed files:

- debian/changelog
- debian/control
- + debian/patches/configparser.patch
- + debian/patches/series
- q2_diversity_lib/_version.py
- q2_diversity_lib/alpha.py
- q2_diversity_lib/beta.py
- q2_diversity_lib/examples.py
- + q2_diversity_lib/skbio/LICENSE
- + q2_diversity_lib/skbio/__init__.py
- + q2_diversity_lib/skbio/_methods.py
- + q2_diversity_lib/skbio/test_methods.py
- q2_diversity_lib/tests/test_alpha.py


Changes:

=====================================
debian/changelog
=====================================
@@ -1,3 +1,12 @@
+q2-diversity-lib (2024.5.0-1) UNRELEASED; urgency=medium
+
+  * Team upload.
+  * New upstream version
+  * Regenerate debian/control from debian/control.in (routine-update)
+  * d/patches/configparser.patch: copy from qiime packaging
+
+ -- Michael R. Crusoe <crusoe at debian.org>  Tue, 25 Jun 2024 03:35:06 +0200
+
 q2-diversity-lib (2024.2.0-1) unstable; urgency=medium
 
   * Team upload.


=====================================
debian/control
=====================================
@@ -11,7 +11,7 @@ Build-Depends: debhelper-compat (= 13),
                python3-pytest <!nocheck>,
                python3-statsmodels <!nocheck>,
                python3-unifrac <!nocheck>,
-               qiime (>= 2024.2) <!nocheck>
+               qiime (>= 2024.5) <!nocheck>
 Standards-Version: 4.6.2
 Vcs-Browser: https://salsa.debian.org/med-team/q2-diversity-lib
 Vcs-Git: https://salsa.debian.org/med-team/q2-diversity-lib.git
@@ -25,9 +25,9 @@ Depends: ${shlibs:Depends},
          ${python3:Depends},
          python3-pandas,
          python3-sklearn,
-         qiime (>= 2024.2),
-         q2-emperor (>= 2024.2),
-         q2-feature-table (>= 2024.2),
+         qiime (>= 2024.5),
+         q2-emperor (>= 2024.5),
+         q2-feature-table (>= 2024.5),
          r-base-core,
          r-cran-vegan,
          python3-statsmodels,


=====================================
debian/patches/configparser.patch
=====================================
@@ -0,0 +1,35 @@
+From: Athos Ribeiro <athos.ribeiro at canonical.com>
+Date: Mon, 3 Jun 2024 11:31:54 -0300
+Subject: [PATCH] Use ConfigParser instead of SafeConfigParser
+
+The configparser's SafeConfigParser has been renamed to ConfigParser in
+Python 3.2 [1]. It was finally removed in Python 3.12 [2].
+
+[1] https://docs.python.org/dev/whatsnew/3.2.html#configparser
+[2] https://docs.python.org/3/whatsnew/3.12.html#configparser
+
+Last-Update: 2024-06-03
+Forwarded: not-needed, see https://github.com/qiime2/q2-sample-classifier/pull/229
+---
+ versioneer.py | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/versioneer.py b/versioneer.py
+index a5e7a20..886455f 100644
+--- a/versioneer.py
++++ b/versioneer.py
+@@ -340,9 +340,9 @@ def get_config_from_root(root):
+     # configparser.NoOptionError (if it lacks "VCS="). See the docstring at
+     # the top of versioneer.py for instructions on writing your setup.cfg .
+     setup_cfg = os.path.join(root, "setup.cfg")
+-    parser = configparser.SafeConfigParser()
++    parser = configparser.ConfigParser()
+     with open(setup_cfg, "r") as f:
+-        parser.readfp(f)
++        parser.read_file(f)
+     VCS = parser.get("versioneer", "VCS")  # mandatory
+ 
+     def get(parser, name):
+-- 
+2.43.0
+


=====================================
debian/patches/series
=====================================
@@ -0,0 +1 @@
+configparser.patch


=====================================
q2_diversity_lib/_version.py
=====================================
@@ -23,9 +23,9 @@ def get_keywords():
     # setup.py/versioneer.py will grep for the variable names, so they must
     # each be defined on a line of their own. _version.py will just call
     # get_keywords().
-    git_refnames = " (tag: 2024.2.0, Release-2024.2)"
-    git_full = "ed3f3f3d0cd7f0dd93afe37540dd0833db438d53"
-    git_date = "2024-02-16 21:56:40 +0000"
+    git_refnames = " (tag: 2024.5.0, Release-2024.5)"
+    git_full = "061b6e199fd2464750c8e59fca899fe3353ddb4e"
+    git_date = "2024-05-29 04:14:53 +0000"
     keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
     return keywords
 


=====================================
q2_diversity_lib/alpha.py
=====================================
@@ -7,9 +7,13 @@
 # ----------------------------------------------------------------------------
 
 import pandas as pd
+import numpy as np
+import functools
+
 import skbio.diversity
+import skbio.diversity.alpha
+
 import biom
-import numpy as np
 
 from q2_types.feature_table import BIOMV210Format
 from q2_types.sample_data import AlphaDiversityFormat
@@ -19,6 +23,11 @@ from ._util import (_validate_tables,
                     _validate_requested_cpus,
                     _omp_cmd_wrapper)
 
+from q2_diversity_lib.skbio._methods import (_berger_parker, _brillouin_d,
+                                             _simpsons_dominance, _esty_ci,
+                                             _goods_coverage, _margalef,
+                                             _mcintosh_d, _strong, _shannon,
+                                             _p_evenness)
 
 METRICS = {
     'PHYLO': {
@@ -91,7 +100,10 @@ def pielou_evenness(table: biom.Table,
 
     results = []
     for v in table.iter_data(dense=True):
-        results.append(_skbio_alpha_diversity_from_1d(v, 'pielou_e'))
+        # using in-house metrics temporarily
+        # results.append(_skbio_alpha_diversity_from_1d(v, 'pielou_e'))
+        v = np.reshape(v, (1, len(v)))
+        results.extend([_p_evenness(c)for c in v])
     results = pd.Series(results, index=table.ids(), name='pielou_evenness')
     return results
 
@@ -104,7 +116,10 @@ def shannon_entropy(table: biom.Table,
 
     results = []
     for v in table.iter_data(dense=True):
-        results.append(_skbio_alpha_diversity_from_1d(v, 'shannon'))
+        # using in-house metrics temporarily
+        # results.append(_skbio_alpha_diversity_from_1d(v, 'shannon'))
+        v = np.reshape(v, (1, len(v)))
+        results.extend([_shannon(c)for c in v])
     results = pd.Series(results, index=table.ids(), name='shannon_entropy')
     return results
 
@@ -112,8 +127,23 @@ def shannon_entropy(table: biom.Table,
 @_validate_tables
 def alpha_passthrough(table: biom.Table, metric: str) -> pd.Series:
     results = []
-
-    for v in table.iter_data(dense=True):
-        results.append(_skbio_alpha_diversity_from_1d(v.astype(int), metric))
+    method_map = {"berger_parker_d": _berger_parker,
+                  "brillouin_d": _brillouin_d,
+                  "simpson": _simpsons_dominance,
+                  "esty_ci": _esty_ci,
+                  "goods_coverage": _goods_coverage,
+                  "margalef": _margalef,
+                  "mcintosh_d": _mcintosh_d,
+                  "strong": _strong}
+
+    if metric in method_map:
+        metric = functools.partial(method_map[metric])
+        for v in table.iter_data(dense=True):
+            v = np.reshape(v, (1, len(v)))
+            results.extend([metric(c)for c in v])
+    else:
+        for v in table.iter_data(dense=True):
+            results.append(_skbio_alpha_diversity_from_1d(v.astype(int),
+                                                          metric))
     results = pd.Series(results, index=table.ids(), name=metric)
     return results


=====================================
q2_diversity_lib/beta.py
=====================================
@@ -38,7 +38,7 @@ METRICS = {
         'IMPL': {'braycurtis', 'jaccard'},
         'UNIMPL': {'cityblock', 'euclidean', 'seuclidean', 'sqeuclidean',
                    'cosine', 'correlation', 'hamming', 'chebyshev', 'canberra',
-                   'yule', 'matching', 'dice', 'kulsinski',
+                   'yule', 'matching', 'dice',
                    'rogerstanimoto', 'russellrao', 'sokalmichener',
                    'sokalsneath', 'minkowski', 'aitchison', 'canberra_adkins',
                    'jensenshannon'}


=====================================
q2_diversity_lib/examples.py
=====================================
@@ -322,8 +322,8 @@ def beta_passthrough_n_jobs_example(use):
     result, = use.action(
         use.UsageAction(plugin_id='diversity_lib',
                         action_id='beta_passthrough'),
-        use.UsageInputs(table=ft, metric='kulsinski', n_jobs=1),
-        use.UsageOutputNames(distance_matrix='kulsinski_dm')
+        use.UsageInputs(table=ft, metric='euclidean', n_jobs=1),
+        use.UsageOutputNames(distance_matrix='euclidean_dm')
     )
     result.assert_output_type('DistanceMatrix')
 


=====================================
q2_diversity_lib/skbio/LICENSE
=====================================
@@ -0,0 +1,29 @@
+# sourced from https://github.com/scikit-bio/scikit-bio/blob/main/LICENSE.txt
+
+Copyright (c) 2013--, scikit-bio development team.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice, this
+  list of conditions and the following disclaimer in the documentation and/or
+  other materials provided with the distribution.
+
+* Neither the names scikit-bio, skbio, or biocore nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


=====================================
q2_diversity_lib/skbio/__init__.py
=====================================
@@ -0,0 +1,7 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2013--, scikit-bio development team.
+#
+# Distributed under the terms of the Modified BSD License.
+#
+# The full license is in the file LICENSE, distributed with this software.
+# ----------------------------------------------------------------------------


=====================================
q2_diversity_lib/skbio/_methods.py
=====================================
@@ -0,0 +1,79 @@
+import numpy as np
+
+from skbio.diversity._util import _validate_counts_vector
+import skbio.diversity.alpha
+
+from scipy.special import gammaln
+
+
+# c&p methods from skbio
+def _berger_parker(counts):
+    counts = _validate_counts_vector(counts)
+    return counts.max() / counts.sum()
+
+
+def _brillouin_d(counts):
+    counts = _validate_counts_vector(counts)
+    nz = counts[counts.nonzero()]
+    n = nz.sum()
+    return (gammaln(n + 1) - gammaln(nz + 1).sum()) / n
+
+
+def _simpsons_dominance(counts):
+    counts = _validate_counts_vector(counts)
+    return 1 - skbio.diversity.alpha.dominance(counts)
+
+
+def _esty_ci(counts):
+    counts = _validate_counts_vector(counts)
+
+    f1 = skbio.diversity.alpha.singles(counts)
+    f2 = skbio.diversity.alpha.doubles(counts)
+    n = counts.sum()
+    z = 1.959963985
+    W = (f1 * (n - f1) + 2 * n * f2) / (n ** 3)
+
+    return f1 / n - z * np.sqrt(W), f1 / n + z * np.sqrt(W)
+
+
+def _goods_coverage(counts):
+    counts = _validate_counts_vector(counts)
+    f1 = skbio.diversity.alpha.singles(counts)
+    N = counts.sum()
+    return 1 - (f1 / N)
+
+
+def _margalef(counts):
+    counts = _validate_counts_vector(counts)
+    # replaced observed_otu call to sobs
+    return (skbio.diversity.alpha.sobs(counts) - 1) / np.log(counts.sum())
+
+
+def _mcintosh_d(counts):
+    counts = _validate_counts_vector(counts)
+    u = np.sqrt((counts * counts).sum())
+    n = counts.sum()
+    return (n - u) / (n - np.sqrt(n))
+
+
+def _strong(counts):
+    counts = _validate_counts_vector(counts)
+    n = counts.sum()
+    # replaced observed_otu call to sobs
+    s = skbio.diversity.alpha.sobs(counts)
+    i = np.arange(1, len(counts) + 1)
+    sorted_sum = np.sort(counts)[::-1].cumsum()
+    return (sorted_sum / n - (i / s)).max()
+
+
+def _p_evenness(counts):
+    counts = _validate_counts_vector(counts)
+    return _shannon(counts, base=np.e) / np.log(
+        skbio.diversity.alpha.sobs(counts=counts))
+
+
+def _shannon(counts, base=2):
+    counts = _validate_counts_vector(counts)
+    freqs = counts / counts.sum()
+    nonzero_freqs = freqs[freqs.nonzero()]
+    return -(nonzero_freqs * np.log(nonzero_freqs)).sum() / np.log(base)


=====================================
q2_diversity_lib/skbio/test_methods.py
=====================================
@@ -0,0 +1,76 @@
+import numpy as np
+import numpy.testing as npt
+
+from qiime2.plugin.testing import TestPluginBase
+
+from q2_diversity_lib.skbio._methods import (_berger_parker, _brillouin_d,
+                                             _simpsons_dominance, _esty_ci,
+                                             _goods_coverage, _margalef,
+                                             _mcintosh_d, _strong)
+
+
+class SkbioTests(TestPluginBase):
+    package = 'q2_diversity_lib.skbio'
+
+# tests for passthrough metrics were sourced from skbio
+    def test_berger_parker_d(self):
+        self.assertEqual(_berger_parker(np.array([5, 5])), 0.5)
+        self.assertEqual(_berger_parker(np.array([1, 1, 1, 1, 0])), 0.25)
+
+    def test_brillouin_d(self):
+        self.assertAlmostEqual(_brillouin_d(np.array([1, 2, 0, 0, 3, 1])),
+                               0.86289353018248782)
+
+    def test_esty_ci(self):
+        def _diversity(indices, f):
+            """Calculate diversity index for each window of size 1.
+            indices: vector of indices of taxa
+            f: f(counts) -> diversity measure
+            """
+            result = []
+            max_size = max(indices) + 1
+            freqs = np.zeros(max_size, dtype=int)
+            for i in range(len(indices)):
+                freqs += np.bincount(indices[i:i + 1], minlength=max_size)
+                try:
+                    curr = f(freqs)
+                except (ZeroDivisionError, FloatingPointError):
+                    curr = 0
+                result.append(curr)
+            return np.array(result)
+
+        data = [1, 1, 2, 1, 1, 3, 2, 1, 3, 4]
+
+        observed_lower, observed_upper = zip(*_diversity(data, _esty_ci))
+
+        expected_lower = np.array([1, -1.38590382, -0.73353593, -0.17434465,
+                                   -0.15060902, -0.04386191, -0.33042054,
+                                   -0.29041008, -0.43554755, -0.33385652])
+        expected_upper = np.array([1, 1.38590382, 1.40020259, 0.67434465,
+                                   0.55060902, 0.71052858, 0.61613483,
+                                   0.54041008, 0.43554755, 0.53385652])
+
+        npt.assert_array_almost_equal(observed_lower, expected_lower)
+        npt.assert_array_almost_equal(observed_upper, expected_upper)
+
+    def test_simpson(self):
+        self.assertAlmostEqual(_simpsons_dominance(np.array([1, 0, 2, 5, 2])),
+                               0.66)
+        self.assertAlmostEqual(_simpsons_dominance(np.array([5])), 0)
+
+    def test_goods_coverage(self):
+        counts = [1] * 75 + [2, 2, 2, 2, 2, 2, 3, 4, 4]
+        obs = _goods_coverage(counts)
+        self.assertAlmostEqual(obs, 0.23469387755)
+
+    def test_margalef(self):
+
+        self.assertEqual(_margalef(np.array([0, 1, 1, 4, 2, 5, 2, 4, 1, 2])),
+                         8 / np.log(22))
+
+    def test_mcintosh_d(self):
+        self.assertAlmostEqual(_mcintosh_d(np.array([1, 2, 3])),
+                               0.636061424871458)
+
+    def test_strong(self):
+        self.assertAlmostEqual(_strong(np.array([1, 2, 3, 1])), 0.214285714)


=====================================
q2_diversity_lib/tests/test_alpha.py
=====================================
@@ -9,6 +9,7 @@
 from subprocess import CalledProcessError
 
 import numpy as np
+import numpy.testing as npt
 import pandas as pd
 import pandas.testing as pdt
 import biom
@@ -17,7 +18,12 @@ from qiime2.plugin.testing import TestPluginBase
 from qiime2 import Artifact
 
 from ..alpha import (pielou_evenness, observed_features,
-                     shannon_entropy, METRICS)
+                     shannon_entropy, METRICS,
+                     _berger_parker, _brillouin_d,
+                     _simpsons_dominance, _esty_ci,
+                     _goods_coverage, _margalef,
+                     _mcintosh_d, _strong
+                     )
 
 
 class SmokeTests(TestPluginBase):
@@ -154,7 +160,9 @@ class PielouEvennessTests(TestPluginBase):
                                          [0, 0, 0, 1, 0, 1]]),
                                ['A', 'B', 'C'],
                                ['S1', 'S2', 'S3', 'S4', 'S5', 'S6'])
-        expected = pd.Series({'S5': 1, 'S6': 1}, name='pielou_evenness')
+        # pandas supports floating point correction for float dtype only,
+        # these 1 ints were changed to 1.0 floats to get that support
+        expected = pd.Series({'S5': 1.0, 'S6': 1.0}, name='pielou_evenness')
         actual = pielou_evenness(table=NaN_table, drop_undefined_samples=True)
         pdt.assert_series_equal(actual, expected, check_dtype=False)
 
@@ -250,3 +258,68 @@ class AlphaPassthroughTests(TestPluginBase):
         for metric in METRICS['NONPHYLO']['IMPL']:
             with self.assertRaisesRegex(TypeError, f"{metric}.*incompatible"):
                 self.method(table=self.crawford_tbl, metric=metric)
+
+# tests for passthrough metrics were sourced from skbio
+    def test_berger_parker_d(self):
+        self.assertEqual(_berger_parker(np.array([5, 5])), 0.5)
+        self.assertEqual(_berger_parker(np.array([1, 1, 1, 1, 0])), 0.25)
+
+    def test_brillouin_d(self):
+        self.assertAlmostEqual(_brillouin_d(np.array([1, 2, 0, 0, 3, 1])),
+                               0.86289353018248782)
+
+    def test_esty_ci(self):
+        def _diversity(indices, f):
+            """Calculate diversity index for each window of size 1.
+
+            indices: vector of indices of taxa
+            f: f(counts) -> diversity measure
+
+            """
+            result = []
+            max_size = max(indices) + 1
+            freqs = np.zeros(max_size, dtype=int)
+            for i in range(len(indices)):
+                freqs += np.bincount(indices[i:i + 1], minlength=max_size)
+                try:
+                    curr = f(freqs)
+                except (ZeroDivisionError, FloatingPointError):
+                    curr = 0
+                result.append(curr)
+            return np.array(result)
+
+        data = [1, 1, 2, 1, 1, 3, 2, 1, 3, 4]
+
+        observed_lower, observed_upper = zip(*_diversity(data, _esty_ci))
+
+        expected_lower = np.array([1, -1.38590382, -0.73353593, -0.17434465,
+                                   -0.15060902, -0.04386191, -0.33042054,
+                                   -0.29041008, -0.43554755, -0.33385652])
+        expected_upper = np.array([1, 1.38590382, 1.40020259, 0.67434465,
+                                   0.55060902, 0.71052858, 0.61613483,
+                                   0.54041008, 0.43554755, 0.53385652])
+
+        npt.assert_array_almost_equal(observed_lower, expected_lower)
+        npt.assert_array_almost_equal(observed_upper, expected_upper)
+
+    def test_simpson(self):
+        self.assertAlmostEqual(_simpsons_dominance(np.array([1, 0, 2, 5, 2])),
+                               0.66)
+        self.assertAlmostEqual(_simpsons_dominance(np.array([5])), 0)
+
+    def test_goods_coverage(self):
+        counts = [1] * 75 + [2, 2, 2, 2, 2, 2, 3, 4, 4]
+        obs = _goods_coverage(counts)
+        self.assertAlmostEqual(obs, 0.23469387755)
+
+    def test_margalef(self):
+
+        self.assertEqual(_margalef(np.array([0, 1, 1, 4, 2, 5, 2, 4, 1, 2])),
+                         8 / np.log(22))
+
+    def test_mcintosh_d(self):
+        self.assertAlmostEqual(_mcintosh_d(np.array([1, 2, 3])),
+                               0.636061424871458)
+
+    def test_strong(self):
+        self.assertAlmostEqual(_strong(np.array([1, 2, 3, 1])), 0.214285714)



View it on GitLab: https://salsa.debian.org/med-team/q2-diversity-lib/-/compare/3f1b61bc9258f399556931575b26a204db0628b2...168f4089abeb656040e19e85ab771232f03b076d

-- 
This project does not include diff previews in email notifications.
View it on GitLab: https://salsa.debian.org/med-team/q2-diversity-lib/-/compare/3f1b61bc9258f399556931575b26a204db0628b2...168f4089abeb656040e19e85ab771232f03b076d
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20240625/7e9cf84f/attachment-0001.htm>