[med-svn] [Git][med-team/q2-diversity-lib][upstream] New upstream version 2024.2.0

Andreas Tille (@tille) gitlab at salsa.debian.org
Sun Feb 18 13:47:15 GMT 2024



Andreas Tille pushed to branch upstream at Debian Med / q2-diversity-lib


Commits:
0a6126a1 by Andreas Tille at 2024-02-18T12:46:41+01:00
New upstream version 2024.2.0
- - - - -


10 changed files:

- .github/workflows/ci-dev.yaml
- README.md
- ci/recipe/meta.yaml
- q2_diversity_lib/_util.py
- q2_diversity_lib/_version.py
- q2_diversity_lib/alpha.py
- q2_diversity_lib/beta.py
- q2_diversity_lib/plugin_setup.py
- q2_diversity_lib/tests/test_beta.py
- q2_diversity_lib/tests/test_util.py


Changes:

=====================================
.github/workflows/ci-dev.yaml
=====================================
@@ -9,4 +9,4 @@ jobs:
   ci:
     uses: qiime2/distributions/.github/workflows/lib-ci-dev.yaml at dev
     with:
-      distro: core
\ No newline at end of file
+      distro: amplicon


=====================================
README.md
=====================================
@@ -1,5 +1,5 @@
 # q2-diversity-lib
 
-![](https://github.com/qiime2/q2-diversity-lib/workflows/ci/badge.svg)
+![](https://github.com/qiime2/q2-diversity-lib/workflows/ci-dev/badge.svg)
 
 This is a QIIME 2 plugin. For details on QIIME 2, see https://qiime2.org.
\ No newline at end of file


=====================================
ci/recipe/meta.yaml
=====================================
@@ -28,6 +28,7 @@ requirements:
     - qiime2 {{ qiime2_epoch }}.*
     - q2-types {{ qiime2_epoch }}.*
     - scikit-bio {{ scikit_bio }}
+    - scikit-learn {{ scikit_learn }}
     - scipy {{ scipy }}
     - unifrac {{ unifrac }}
     - unifrac-binaries {{ unifrac_binaries }}


=====================================
q2_diversity_lib/_util.py
=====================================
@@ -40,7 +40,7 @@ def _partition(table, block_size=100):
 
 
 @decorator
-def _disallow_empty_tables(wrapped_function, *args, **kwargs):
+def _validate_tables(wrapped_function, *args, **kwargs):
     bound_arguments = signature(wrapped_function).bind(*args, **kwargs)
     table = bound_arguments.arguments.get('table')
     if table is None:
@@ -66,6 +66,12 @@ def _disallow_empty_tables(wrapped_function, *args, **kwargs):
         if tab_obj.is_empty():
             raise ValueError("The provided table is empty")
 
+        if np.isnan(tab_obj.matrix_data.data).sum() > 0:
+            raise ValueError("The provided table contains NaN")
+
+        if (tab_obj.matrix_data.data < 0).sum() > 0:
+            raise ValueError("The provided table contains negative values")
+
     return wrapped_function(*args, **kwargs)
 
 
@@ -98,7 +104,7 @@ def _validate_requested_cpus(wrapped_function, *args, **kwargs):
 
     cpus_requested = b_a_arguments[param_name]
 
-    if cpus_requested == 'auto':
+    if cpus_requested == 0:
         # mutate bound_arguments.arguments 'auto' to the requested # of cpus...
         b_a_arguments[param_name] = cpus_available
         # ...and update cpus requested to prevent TypeError


=====================================
q2_diversity_lib/_version.py
=====================================
@@ -23,9 +23,9 @@ def get_keywords():
     # setup.py/versioneer.py will grep for the variable names, so they must
     # each be defined on a line of their own. _version.py will just call
     # get_keywords().
-    git_refnames = " (tag: 2023.9.0, Release-2023.9)"
-    git_full = "7f3dce4f02eaddca45bc0afb5819bb433127521a"
-    git_date = "2023-10-03 21:55:05 +0000"
+    git_refnames = " (tag: 2024.2.0, Release-2024.2)"
+    git_full = "ed3f3f3d0cd7f0dd93afe37540dd0833db438d53"
+    git_date = "2024-02-16 21:56:40 +0000"
     keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
     return keywords
 


=====================================
q2_diversity_lib/alpha.py
=====================================
@@ -9,12 +9,13 @@
 import pandas as pd
 import skbio.diversity
 import biom
+import numpy as np
 
 from q2_types.feature_table import BIOMV210Format
 from q2_types.sample_data import AlphaDiversityFormat
 from q2_types.tree import NewickFormat
-from ._util import (_drop_undefined_samples, _partition,
-                    _disallow_empty_tables,
+
+from ._util import (_validate_tables,
                     _validate_requested_cpus,
                     _omp_cmd_wrapper)
 
@@ -44,7 +45,7 @@ METRICS = {
 
 
 # --------------------- Phylogenetic -----------------------------------------
- at _disallow_empty_tables
+ at _validate_tables
 @_validate_requested_cpus
 def faith_pd(table: BIOMV210Format, phylogeny: NewickFormat,
              threads: int = 1) -> AlphaDiversityFormat:
@@ -55,59 +56,64 @@ def faith_pd(table: BIOMV210Format, phylogeny: NewickFormat,
 
 
 # --------------------- Non-Phylogenetic -------------------------------------
- at _disallow_empty_tables
+def _skbio_alpha_diversity_from_1d(v, metric):
+    # alpha_diversity expects a 2d structure
+    v = np.reshape(v, (1, len(v)))
+    result = skbio.diversity.alpha_diversity(metric=metric,
+                                             counts=v,
+                                             ids=['placeholder', ],
+                                             validate=False)
+    return result.iloc[0]
+
+
+ at _validate_tables
 def observed_features(table: biom.Table) -> pd.Series:
     presence_absence_table = table.pa(inplace=False)
-    return pd.Series(presence_absence_table.sum('sample').astype(int),
-                     index=table.ids(), name='observed_features')
+    results = []
+    for v in presence_absence_table.iter_data(dense=True):
+        results.append(_skbio_alpha_diversity_from_1d(v.astype(int),
+                                                      'observed_otus'))
+    results = pd.Series(results, index=table.ids(), name='observed_features')
+    return results
 
 
- at _disallow_empty_tables
+ at _validate_tables
 def pielou_evenness(table: biom.Table,
                     drop_undefined_samples: bool = False) -> pd.Series:
     if drop_undefined_samples:
-        table = _drop_undefined_samples(table, minimum_nonzero_elements=2)
+        def transform_(v, i, m):
+            if (v > 0).sum() < 2:
+                return np.zeros(len(v))
+            else:
+                return v
+
+        table = table.transform(transform_, inplace=False).remove_empty()
 
     results = []
-    for partition in _partition(table):
-        counts = partition.matrix_data.T.toarray()
-        sample_ids = partition.ids(axis='sample')
-        results.append(skbio.diversity.alpha_diversity(metric='pielou_e',
-                                                       counts=counts,
-                                                       ids=sample_ids))
-    result = pd.concat(results)
-    result.name = 'pielou_evenness'
-    return result
-
-
- at _disallow_empty_tables
+    for v in table.iter_data(dense=True):
+        results.append(_skbio_alpha_diversity_from_1d(v, 'pielou_e'))
+    results = pd.Series(results, index=table.ids(), name='pielou_evenness')
+    return results
+
+
+ at _validate_tables
 def shannon_entropy(table: biom.Table,
                     drop_undefined_samples: bool = False) -> pd.Series:
     if drop_undefined_samples:
-        table = _drop_undefined_samples(table, minimum_nonzero_elements=1)
+        table = table.remove_empty(inplace=False)
 
     results = []
-    for partition in _partition(table):
-        counts = partition.matrix_data.T.toarray()
-        sample_ids = partition.ids(axis='sample')
-        results.append(skbio.diversity.alpha_diversity(metric='shannon',
-                                                       counts=counts,
-                                                       ids=sample_ids))
-    result = pd.concat(results)
-    result.name = 'shannon_entropy'
-    return result
-
-
- at _disallow_empty_tables
+    for v in table.iter_data(dense=True):
+        results.append(_skbio_alpha_diversity_from_1d(v, 'shannon'))
+    results = pd.Series(results, index=table.ids(), name='shannon_entropy')
+    return results
+
+
+ at _validate_tables
 def alpha_passthrough(table: biom.Table, metric: str) -> pd.Series:
     results = []
-    for partition in _partition(table):
-        counts = partition.matrix_data.astype(int).T.toarray()
-        sample_ids = partition.ids(axis='sample')
-
-        results.append(skbio.diversity.alpha_diversity(metric=metric,
-                                                       counts=counts,
-                                                       ids=sample_ids))
-    result = pd.concat(results)
-    result.name = metric
-    return result
+
+    for v in table.iter_data(dense=True):
+        results.append(_skbio_alpha_diversity_from_1d(v.astype(int), metric))
+    results = pd.Series(results, index=table.ids(), name=metric)
+    return results


=====================================
q2_diversity_lib/beta.py
=====================================
@@ -18,7 +18,8 @@ import numpy as np
 from q2_types.distance_matrix import LSMatFormat
 from q2_types.feature_table import BIOMV210Format
 from q2_types.tree import NewickFormat
-from ._util import (_disallow_empty_tables,
+
+from ._util import (_validate_tables,
                     _validate_requested_cpus,
                     _omp_cmd_wrapper)
 
@@ -51,7 +52,7 @@ METRICS = {
 
 
 # -------------------- Method Dispatch -----------------------
- at _disallow_empty_tables
+ at _validate_tables
 @_validate_requested_cpus
 def beta_passthrough(table: biom.Table, metric: str, pseudocount: int = 1,
                      n_jobs: int = 1) -> skbio.DistanceMatrix:
@@ -82,11 +83,11 @@ def beta_passthrough(table: biom.Table, metric: str, pseudocount: int = 1,
         pass
 
     return skbio.diversity.beta_diversity(
-            metric=metric, counts=counts, ids=sample_ids, validate=True,
+            metric=metric, counts=counts, ids=sample_ids, validate=False,
             pairwise_func=sklearn.metrics.pairwise_distances, n_jobs=n_jobs)
 
 
- at _disallow_empty_tables
+ at _validate_tables
 @_validate_requested_cpus
 def beta_phylogenetic_passthrough(table: BIOMV210Format,
                                   phylogeny: NewickFormat,
@@ -134,9 +135,7 @@ def beta_phylogenetic_passthrough(table: BIOMV210Format,
     return result
 
 
-# Note, this method doesn't have a corresponding cli invocation, so we'll
-# just rely on unifrac doing the right thing with `threads` here.
- at _disallow_empty_tables
+ at _validate_tables
 @_validate_requested_cpus
 def beta_phylogenetic_meta_passthrough(tables: BIOMV210Format,
                                        phylogenies: NewickFormat,
@@ -168,7 +167,7 @@ def beta_phylogenetic_meta_passthrough(tables: BIOMV210Format,
 
 
 # --------------------Non-Phylogenetic-----------------------
- at _disallow_empty_tables
+ at _validate_tables
 @_validate_requested_cpus
 def bray_curtis(table: biom.Table, n_jobs: int = 1) -> skbio.DistanceMatrix:
     counts = table.matrix_data.toarray().T
@@ -177,13 +176,13 @@ def bray_curtis(table: biom.Table, n_jobs: int = 1) -> skbio.DistanceMatrix:
         metric='braycurtis',
         counts=counts,
         ids=sample_ids,
-        validate=True,
+        validate=False,
         pairwise_func=sklearn.metrics.pairwise_distances,
         n_jobs=n_jobs
     )
 
 
- at _disallow_empty_tables
+ at _validate_tables
 @_validate_requested_cpus
 def jaccard(table: biom.Table, n_jobs: int = 1) -> skbio.DistanceMatrix:
     counts = table.matrix_data.toarray().T
@@ -192,14 +191,14 @@ def jaccard(table: biom.Table, n_jobs: int = 1) -> skbio.DistanceMatrix:
         metric='jaccard',
         counts=counts,
         ids=sample_ids,
-        validate=True,
+        validate=False,
         pairwise_func=sklearn.metrics.pairwise_distances,
         n_jobs=n_jobs
     )
 
 
 # ------------------------Phylogenetic-----------------------
- at _disallow_empty_tables
+ at _validate_tables
 @_validate_requested_cpus
 def unweighted_unifrac(table: BIOMV210Format,
                        phylogeny: NewickFormat,
@@ -223,7 +222,7 @@ def unweighted_unifrac(table: BIOMV210Format,
     return result
 
 
- at _disallow_empty_tables
+ at _validate_tables
 @_validate_requested_cpus
 def weighted_unifrac(table: BIOMV210Format, phylogeny: NewickFormat,
                      threads: int = 1, bypass_tips: bool = False


=====================================
q2_diversity_lib/plugin_setup.py
=====================================
@@ -7,7 +7,7 @@
 # ----------------------------------------------------------------------------
 
 from qiime2.plugin import (Plugin, Citations, Bool, Int, Range, Choices, Str,
-                           Float, List)
+                           Float, List, Threads)
 from q2_types.feature_table import (FeatureTable, Frequency, RelativeFrequency,
                                     PresenceAbsence)
 from q2_types.tree import Phylogeny, Rooted
@@ -56,7 +56,7 @@ plugin.methods.register_function(
     inputs={'table': FeatureTable[Frequency | RelativeFrequency
             | PresenceAbsence],
             'phylogeny': Phylogeny[Rooted]},
-    parameters={'threads': Int % Range(1, None) | Str % Choices(['auto'])},
+    parameters={'threads': Threads},
     outputs=[('vector', SampleData[AlphaDiversity])],
     input_descriptions={
         'table': "The feature table containing the samples for which Faith's "
@@ -145,8 +145,8 @@ plugin.methods.register_function(
 # TODO: Augment citations as needed
 plugin.methods.register_function(
     function=beta.bray_curtis,
-    inputs={'table': FeatureTable[Frequency]},
-    parameters={'n_jobs': Int % Range(1, None) | Str % Choices(['auto'])},
+    inputs={'table': FeatureTable[Frequency | RelativeFrequency]},
+    parameters={'n_jobs': Threads},
     outputs=[('distance_matrix', DistanceMatrix)],
     input_descriptions={
         'table': "The feature table containing the samples for which "
@@ -174,7 +174,7 @@ plugin.methods.register_function(
     function=beta.jaccard,
     inputs={'table': FeatureTable[Frequency | RelativeFrequency
             | PresenceAbsence]},
-    parameters={'n_jobs': Int % Range(1, None) | Str % Choices(['auto'])},
+    parameters={'n_jobs': Threads},
     outputs=[('distance_matrix', DistanceMatrix)],
     input_descriptions={
         'table': "The feature table containing the samples for which "
@@ -202,7 +202,7 @@ plugin.methods.register_function(
     inputs={'table': FeatureTable[Frequency | RelativeFrequency
             | PresenceAbsence],
             'phylogeny': Phylogeny[Rooted]},
-    parameters={'threads': Int % Range(1, None) | Str % Choices(['auto']),
+    parameters={'threads': Threads,
                 'bypass_tips': Bool},
     outputs=[('distance_matrix', DistanceMatrix)],
     input_descriptions={
@@ -246,7 +246,7 @@ plugin.methods.register_function(
     function=beta.weighted_unifrac,
     inputs={'table': FeatureTable[Frequency | RelativeFrequency],
             'phylogeny': Phylogeny[Rooted]},
-    parameters={'threads': Int % Range(1, None) | Str % Choices(['auto']),
+    parameters={'threads': Threads,
                 'bypass_tips': Bool},
     outputs=[('distance_matrix', DistanceMatrix)],
     input_descriptions={
@@ -310,8 +310,7 @@ plugin.methods.register_function(
     function=beta.beta_passthrough,
     inputs={'table': FeatureTable[Frequency]},
     parameters={'metric': Str % Choices(beta.METRICS['NONPHYLO']['UNIMPL']),
-                'pseudocount': Int % Range(1, None),
-                'n_jobs': Int % Range(1, None) | Str % Choices(['auto'])},
+                'pseudocount': Int % Range(1, None), 'n_jobs': Threads},
     outputs=[('distance_matrix', DistanceMatrix)],
     input_descriptions={
         'table': 'The feature table containing the samples over which beta '
@@ -347,7 +346,7 @@ plugin.methods.register_function(
     inputs={'table': FeatureTable[Frequency],
             'phylogeny': Phylogeny[Rooted]},
     parameters={'metric': Str % Choices(beta.METRICS['PHYLO']['UNIMPL']),
-                'threads': Int % Range(1, None) | Str % Choices(['auto']),
+                'threads': Threads,
                 'variance_adjusted': Bool,
                 'alpha': Float % Range(0, 1, inclusive_end=True),
                 'bypass_tips': Bool},
@@ -412,13 +411,12 @@ plugin.methods.register_function(
     ]
 )
 
-
 plugin.methods.register_function(
     function=beta.beta_phylogenetic_meta_passthrough,
     inputs={'tables': List[FeatureTable[Frequency]],
             'phylogenies': List[Phylogeny[Rooted]]},
     parameters={'metric': Str % Choices(beta.METRICS['PHYLO']['UNIMPL']),
-                'threads': Int % Range(1, None) | Str % Choices(['auto']),
+                'threads': Threads,
                 'variance_adjusted': Bool,
                 'alpha': Float % Range(0, 1, inclusive_end=True),
                 'bypass_tips': Bool,


=====================================
q2_diversity_lib/tests/test_beta.py
=====================================
@@ -92,6 +92,33 @@ class BrayCurtisTests(TestPluginBase):
                     npt.assert_almost_equal(actual[id1, id2],
                                             self.expected[id1, id2])
 
+    def test_bray_curtis_relative_frequency(self):
+        input_table = biom.Table(
+            np.array([
+                [0.3, 0, 0.77, 0.5],
+                [0.1, 0, 0.15, 0.25],
+                [0.6, 1, 0.08, 0.25]
+            ]),
+            ['A', 'B', 'C'],
+            ['S1', 'S2', 'S3', 'S4']
+        )
+        expected = skbio.DistanceMatrix(
+            [
+                [0.0000000, 0.4, 0.52, 0.35],
+                [0.4, 0.0000000, 0.92, 0.75],
+                [0.52, 0.92, 0.0000000, 0.27],
+                [0.35, 0.75, 0.27, 0.0000000]
+            ],
+            ids=['S1', 'S2', 'S3', 'S4']
+        )
+        actual = bray_curtis(table=input_table, n_jobs=1)
+        self.assertEqual(actual.ids, self.expected.ids)
+        for id1 in actual.ids:
+            for id2 in actual.ids:
+                npt.assert_almost_equal(
+                    actual[id1, id2], expected[id1, id2]
+                )
+
 
 class JaccardTests(TestPluginBase):
     package = 'q2_diversity_lib.tests'
@@ -403,7 +430,7 @@ class BetaPassthroughTests(TestPluginBase):
                        ['S1', 'S2'])
         t = Artifact.import_data('FeatureTable[Frequency]', t)
 
-        with self.assertRaisesRegex(ValueError, 'cannot.*negative values'):
+        with self.assertRaisesRegex(ValueError, '.*negative values'):
             self.method(table=t, metric='canberra_adkins')
 
     def test_jensenshannon(self):


=====================================
q2_diversity_lib/tests/test_util.py
=====================================
@@ -17,8 +17,8 @@ from qiime2.plugin.testing import TestPluginBase
 from q2_types.feature_table import BIOMV210Format
 from q2_types.tree import NewickFormat
 
-from .._util import (_disallow_empty_tables, _validate_requested_cpus,
-                     _partition)
+from .._util import (_validate_requested_cpus,
+                     _partition, _validate_tables)
 
 
 class PartitionTests(TestPluginBase):
@@ -44,7 +44,7 @@ class PartitionTests(TestPluginBase):
         self.assertEqual(tab, partitions[0])
 
 
-class DisallowEmptyTablesTests(TestPluginBase):
+class ValidateTablesTests(TestPluginBase):
     package = 'q2_diversity_lib.tests'
 
     def setUp(self):
@@ -66,17 +66,34 @@ class DisallowEmptyTablesTests(TestPluginBase):
                                    self.invalid_view_type]
         self.has_empty_table_list = [self.empty_table_as_BIOMV210Format,
                                      self.valid_table_as_BIOMV210Format]
-
-        @_disallow_empty_tables
+        self.has_nan = biom.Table(np.array([[np.nan, 0, 1],
+                                            [2, 3, 4]]),
+                                  ['a', 'b'],
+                                  ['x', 'y', 'z'])
+        self.has_neg = biom.Table(np.array([[-1, 0, 1],
+                                            [2, 3, 4]]),
+                                  ['a', 'b'],
+                                  ['x', 'y', 'z'])
+
+        @_validate_tables
         def f1(table: biom.Table):
             pass
         self.function_with_table_param = f1
 
-        @_disallow_empty_tables
+        @_validate_tables
         def f2():
             pass
         self.function_without_table_param = f2
 
+    def test_pass_table_with_nan(self):
+        with self.assertRaisesRegex(ValueError, "table.*contains NaN"):
+            self.function_with_table_param(self.has_nan)
+
+    def test_pass_table_with_negative_values(self):
+        with self.assertRaisesRegex(ValueError,
+                                    "table.*contains negative values"):
+            self.function_with_table_param(self.has_neg)
+
     def test_pass_empty_table_positionally(self):
         with self.assertRaisesRegex(ValueError, "table.*is empty"):
             self.function_with_table_param(self.empty_table_as_BIOMV210Format)
@@ -97,12 +114,12 @@ class DisallowEmptyTablesTests(TestPluginBase):
 
     def test_decorated_lambda_with_table_param(self):
         with self.assertRaisesRegex(ValueError, "table.*is empty"):
-            decorated_lambda = _disallow_empty_tables(lambda table: None)
+            decorated_lambda = _validate_tables(lambda table: None)
             decorated_lambda(self.empty_table_as_BIOMV210Format)
 
     def test_decorated_lambda_with_table_param_list(self):
         with self.assertRaisesRegex(ValueError, "table.*is empty"):
-            decorated_lambda = _disallow_empty_tables(lambda table: None)
+            decorated_lambda = _validate_tables(lambda table: None)
             decorated_lambda(self.has_empty_table_list)
 
     def test_wrapped_function_has_no_table_param(self):
@@ -217,13 +234,13 @@ class ValidateRequestedCPUsTests(TestPluginBase):
         self.assertEqual(self.function_w_n_jobs_param(), 3)
 
     @mock.patch("q2_diversity_lib._util.psutil.Process")
-    def test_auto_passed_to_cpu_request(self, mock_process):
+    def test_zero_passed_to_cpu_request(self, mock_process):
         mock_process = psutil.Process()
         mock_process.cpu_affinity = mock.MagicMock(return_value=[0, 1, 2])
-        self.assertEqual(self.function_w_n_jobs_param('auto'), 3)
-        self.assertEqual(self.function_w_n_jobs_param(n_jobs='auto'), 3)
-        self.assertEqual(self.function_w_threads_param('auto'), 3)
-        self.assertEqual(self.function_w_threads_param(threads='auto'), 3)
+        self.assertEqual(self.function_w_n_jobs_param(0), 3)
+        self.assertEqual(self.function_w_n_jobs_param(n_jobs=0), 3)
+        self.assertEqual(self.function_w_threads_param(0), 3)
+        self.assertEqual(self.function_w_threads_param(threads=0), 3)
 
     @mock.patch("q2_diversity_lib._util.psutil.Process")
     def test_cpu_request_through_framework(self, mock_process):



View it on GitLab: https://salsa.debian.org/med-team/q2-diversity-lib/-/commit/0a6126a1185708e02ffff1b424600e3f99df71da

-- 
View it on GitLab: https://salsa.debian.org/med-team/q2-diversity-lib/-/commit/0a6126a1185708e02ffff1b424600e3f99df71da
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20240218/3db20b2c/attachment-0001.htm>


More information about the debian-med-commit mailing list