[med-svn] [Git][med-team/q2-metadata][upstream] New upstream version 2022.8.0

Wed Sep 7 13:28:55 BST 2022


Mohd  Bilal pushed to branch upstream at Debian Med / q2-metadata


Commits:
7d00dcc9 by Mohammed Bilal at 2022-09-07T11:59:24+00:00
New upstream version 2022.8.0
- - - - -


6 changed files:

- ci/recipe/meta.yaml
- q2_metadata/__init__.py
- + q2_metadata/_random.py
- q2_metadata/_version.py
- q2_metadata/plugin_setup.py
- + q2_metadata/tests/test_random.py


Changes:

=====================================
ci/recipe/meta.yaml
=====================================
@@ -19,9 +19,9 @@ requirements:
   run:
     - python {{ python }}
     - numpy
-    - scipy
-    - pandas
-    - scikit-bio
+    - scipy {{ scipy }}
+    - pandas {{ pandas }}
+    - scikit-bio {{ scikit_bio }}
     - qiime2 {{ qiime2_epoch }}.*
     - q2templates {{ qiime2_epoch }}.*
     - q2-types {{ qiime2_epoch }}.*


=====================================
q2_metadata/__init__.py
=====================================
@@ -8,9 +8,10 @@
 
 from ._tabulate import tabulate
 from ._distance import distance_matrix
+from ._random import shuffle_groups
 from ._version import get_versions
 
 __version__ = get_versions()['version']
 del get_versions
 
-__all__ = ['tabulate', 'distance_matrix']
+__all__ = ['tabulate', 'distance_matrix', 'shuffle_groups']


=====================================
q2_metadata/_random.py
=====================================
@@ -0,0 +1,38 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2017-2022, QIIME 2 development team.
+#
+# Distributed under the terms of the Modified BSD License.
+#
+# The full license is in the file LICENSE, distributed with this software.
+# ----------------------------------------------------------------------------
+
+import qiime2
+import numpy as np
+import pandas as pd
+
+
+def shuffle_groups(metadata: qiime2.CategoricalMetadataColumn,
+                   n_columns: int = 3,
+                   column_name_prefix: str = 'shuffled.grouping.',
+                   column_value_prefix: str = 'fake.group.') -> pd.DataFrame:
+
+    input_column_name = metadata.name
+    df = metadata.to_dataframe()
+
+    value_mapping = {}
+    for i, value in enumerate(df[input_column_name].unique()):
+        value_mapping[value] = '%s%d' % (column_value_prefix, i)
+
+    first_column_id = '%s0' % column_name_prefix
+    df[first_column_id] = df[input_column_name].map(value_mapping)
+
+    df[first_column_id] = \
+        np.random.permutation(df[first_column_id].values)
+
+    for i in range(1, n_columns):
+        column_id = '%s%d' % (column_name_prefix, i)
+        df[column_id] = \
+            np.random.permutation(df[first_column_id].values)
+
+    df = df.drop(input_column_name, axis=1)
+    return df


=====================================
q2_metadata/_version.py
=====================================
@@ -23,9 +23,9 @@ def get_keywords():
     # setup.py/versioneer.py will grep for the variable names, so they must
     # each be defined on a line of their own. _version.py will just call
     # get_keywords().
-    git_refnames = " (tag: 2022.2.0)"
-    git_full = "47baad6a097b8b97591e7cf70acb89b26af1eb02"
-    git_date = "2022-02-18 18:30:30 +0000"
+    git_refnames = " (tag: 2022.8.0)"
+    git_full = "671cbd512c4b9fe498e631ad250f80eb383331cc"
+    git_date = "2022-08-23 16:29:07 +0000"
     keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
     return keywords
 


=====================================
q2_metadata/plugin_setup.py
=====================================
@@ -7,12 +7,17 @@
 # ----------------------------------------------------------------------------
 
 import qiime2.plugin
-from qiime2.plugin import MetadataColumn, Numeric
+from qiime2.plugin import (MetadataColumn, Numeric, SemanticType, Categorical,
+                           Int, Str, ValidationError)
+import qiime2.plugin.model as model
 
 import q2_metadata
 
-from q2_metadata import tabulate, distance_matrix
+from q2_metadata import tabulate, distance_matrix, shuffle_groups
 from q2_types.distance_matrix import DistanceMatrix
+from q2_types.sample_data import SampleData
+
+import pandas as pd
 
 
 plugin = qiime2.plugin.Plugin(
@@ -59,3 +64,77 @@ plugin.visualizers.register_function(
                 'visualization supports interactive filtering, sorting, and '
                 'exporting to common file formats.',
 )
+
+ArtificialGrouping = \
+    SemanticType('ArtificialGrouping', variant_of=SampleData.field['type'])
+
+plugin.register_semantic_types(ArtificialGrouping)
+
+
+class ArtificialGroupingFormat(model.TextFileFormat):
+    def validate(self, *args):
+        try:
+            md = qiime2.Metadata.load(str(self))
+        except qiime2.metadata.MetadataFileError as md_exc:
+            raise ValidationError(md_exc) from md_exc
+
+        if md.column_count == 0:
+            raise ValidationError('Format must contain at least 1 column')
+
+        filtered_md = md.filter_columns(column_type='categorical')
+        if filtered_md.column_count != md.column_count:
+            raise ValidationError('Must only contain categorical values.')
+
+
+ArtificialGroupingDirectoryFormat = model.SingleFileDirectoryFormat(
+    'ArtificialGroupingDirectoryFormat', 'artificial-groupings.tsv',
+    ArtificialGroupingFormat)
+
+plugin.register_formats(ArtificialGroupingFormat,
+                        ArtificialGroupingDirectoryFormat)
+
+plugin.register_semantic_type_to_format(
+    SampleData[ArtificialGrouping],
+    artifact_format=ArtificialGroupingDirectoryFormat)
+
+
+ at plugin.register_transformer
+def _1(df: pd.DataFrame) -> (ArtificialGroupingFormat):
+    ff = ArtificialGroupingFormat()
+    md = qiime2.Metadata(df)
+    md.save(str(ff))
+    return ff
+
+
+ at plugin.register_transformer
+def _2(ff: ArtificialGroupingFormat) -> (qiime2.Metadata):
+    return qiime2.Metadata.load(str(ff))
+
+
+plugin.methods.register_function(
+    function=shuffle_groups,
+    inputs={},
+    parameters={'metadata': MetadataColumn[Categorical],
+                'n_columns': Int,
+                'column_name_prefix': Str,
+                'column_value_prefix': Str},
+    parameter_descriptions={
+        'metadata': ('Categorical metadata column to shuffle.'),
+        'n_columns': 'The number of shuffled metadata columns to create.',
+        'column_name_prefix': ('Prefix to use in naming the shuffled '
+                               'metadata columns.'),
+        'column_value_prefix': ('Prefix to use in naming the values in the '
+                                'shuffled metadata columns.')},
+    output_descriptions={
+        'shuffled_groups': 'Randomized metadata columns'},
+    outputs=[('shuffled_groups', SampleData[ArtificialGrouping])],
+    name='Shuffle values in a categorical sample metadata column.',
+    description=('Create one or more categorical sample metadata '
+                 'columns by shuffling the values in an input metadata '
+                 'column. To avoid confusion, the column name and values '
+                 'will be derived from the provided prefixes. The number of '
+                 'different values (or groups), and the counts of each value, '
+                 'will match the input metadata column but the association of '
+                 'values with sample ids will be random. These data will be '
+                 'written to an artifact that can be used as sample metadata.')
+)


=====================================
q2_metadata/tests/test_random.py
=====================================
@@ -0,0 +1,196 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2017-2022, QIIME 2 development team.
+#
+# Distributed under the terms of the Modified BSD License.
+#
+# The full license is in the file LICENSE, distributed with this software.
+# ----------------------------------------------------------------------------
+
+import unittest
+
+import pandas as pd
+import qiime2
+
+from q2_metadata import shuffle_groups
+
+
+class ShuffleGroupsTests(unittest.TestCase):
+
+    # number of iterations to run for tests of randomization
+    n_iterations = 500
+
+    def test_shuffle_groups_shape_41(self):
+        md = qiime2.CategoricalMetadataColumn(
+            pd.Series(['a', 'b', 'a', 'b'], name='groups',
+                      index=pd.Index(['sample1', 'sample2', 'sample3', 's4'],
+                                     name='id'))
+        )
+
+        # expected number of rows and columns in result
+        obs = shuffle_groups(md, n_columns=1,
+                             column_name_prefix='shuffled.grouping.',
+                             column_value_prefix='fake.group.')
+        self.assertEqual(obs.shape, (4, 1))
+
+        # expected column names (the original should not be in the result)
+        self.assertFalse('groups' in obs.columns)
+        self.assertTrue('shuffled.grouping.0' in obs.columns)
+
+        # correct number of groups in the new column
+        self.assertEqual(len(obs['shuffled.grouping.0'].unique()), 2)
+
+        # correct group names in new column
+        self.assertEqual(set(obs['shuffled.grouping.0'].unique()),
+                         {'fake.group.1', 'fake.group.0'})
+
+        # distributions of value counts are equal in input and output
+        self.assertEqual(
+            sorted(list(obs['shuffled.grouping.0'].value_counts())),
+            sorted(list(md.to_series().value_counts())))
+
+        # randomization of key/value associations is occurring
+        random_check = []
+        for i in range(self.n_iterations):
+            obs2 = shuffle_groups(md, n_columns=1,
+                                  column_name_prefix='shuffled.grouping.',
+                                  column_value_prefix='fake.group.')
+            random_check.append(
+                list(obs['shuffled.grouping.0']) ==
+                list(obs2['shuffled.grouping.0']))
+        self.assertIn(False, random_check,
+                      "All random groupings in %d iterations were "
+                      "identicial, suggesting that values are not "
+                      "randomly assigned." % self.n_iterations)
+
+    def test_shuffle_groups_shape_33(self):
+        md = qiime2.CategoricalMetadataColumn(
+            pd.Series(['a', 'b', 'c', 'a', 'b', 'c', 'a', 'b', 'c'],
+                      name='groups',
+                      index=pd.Index(['sample1', 'sample2', 'sample3',
+                                      'samplea', 'sampleb', 'sc',
+                                      'sample1_w', 'ctl1', 'ctl3'],
+                                     name='id'))
+        )
+
+        # expected number of rows and columns
+        obs = shuffle_groups(md, n_columns=3,
+                             column_name_prefix='shuffled.grouping.',
+                             column_value_prefix='fake.group.')
+        self.assertEqual(obs.shape, (9, 3))
+
+        # original column name should not be in the result
+        self.assertFalse('groups' in obs.columns)
+
+        for i in range(3):
+            column_id = 'shuffled.grouping.%d' % i
+            self.assertTrue(column_id in obs.columns)
+
+            # correct number of groups in the new column
+            self.assertEqual(len(obs[column_id].unique()), 3)
+
+            self.assertEqual(
+                set(obs[column_id].unique()),
+                {'fake.group.1', 'fake.group.0', 'fake.group.2'})
+
+        # randomization of key/value associations is occurring
+        random_check1 = []
+        random_check2 = []
+        random_check3 = []
+        for i in range(self.n_iterations):
+            random_check1.append(
+                list(obs['shuffled.grouping.0']) ==
+                list(obs['shuffled.grouping.1']))
+            random_check2.append(
+                list(obs['shuffled.grouping.0']) ==
+                list(obs['shuffled.grouping.2']))
+            random_check3.append(
+                list(obs['shuffled.grouping.1']) ==
+                list(obs['shuffled.grouping.2']))
+        self.assertIn(
+            False, random_check1,
+            "All random groupings in %d iterations were "
+            "identicial, suggesting that values are not "
+            "randomly assigned." % self.n_iterations)
+        self.assertIn(
+            False, random_check2,
+            "All random groupings in %d iterations were "
+            "identicial, suggesting that values are not "
+            "randomly assigned." % self.n_iterations)
+        self.assertIn(
+            False, random_check3,
+            "All random groupings in %d iterations were "
+            "identicial, suggesting that values are not "
+            "randomly assigned." % self.n_iterations)
+
+    def test_shuffle_groups_alt_input_column_name(self):
+        md = qiime2.CategoricalMetadataColumn(
+            pd.Series(['a', 'b', 'a', 'b'], name='xyz',
+                      index=pd.Index(['sample1', 'sample2', 'sample3', 's4'],
+                                     name='id'))
+        )
+
+        # expected number of rows and columns in result
+        obs = shuffle_groups(md, n_columns=1,
+                             column_name_prefix='shuffled.grouping.',
+                             column_value_prefix='fake.group.')
+        self.assertEqual(obs.shape, (4, 1))
+
+        # expected column names (the original should not be in the result)
+        self.assertFalse('xyz' in obs.columns)
+        self.assertTrue('shuffled.grouping.0' in obs.columns)
+
+        # correct number of groups in the new column
+        self.assertEqual(len(obs['shuffled.grouping.0'].unique()), 2)
+
+        # correct group names in new column
+        self.assertEqual(set(obs['shuffled.grouping.0'].unique()),
+                         {'fake.group.1', 'fake.group.0'})
+
+    def test_shuffle_groups_alt_column_name_prefix(self):
+        md = qiime2.CategoricalMetadataColumn(
+            pd.Series(['a', 'b', 'a', 'b'], name='groups',
+                      index=pd.Index(['sample1', 'sample2', 'sample3', 's4'],
+                                     name='id'))
+        )
+
+        # expected number of rows and columns in result
+        obs = shuffle_groups(md, n_columns=1,
+                             column_name_prefix='1',
+                             column_value_prefix='fake.group.')
+        self.assertEqual(obs.shape, (4, 1))
+
+        # expected column names (the original should not be in the result)
+        self.assertFalse('groups' in obs.columns)
+        self.assertTrue('10' in obs.columns)
+
+        # correct number of groups in the new column
+        self.assertEqual(len(obs['10'].unique()), 2)
+
+        # correct group names in new column
+        self.assertEqual(set(obs['10'].unique()),
+                         {'fake.group.1', 'fake.group.0'})
+
+    def test_shuffle_groups_alt_column_value_prefix(self):
+        md = qiime2.CategoricalMetadataColumn(
+            pd.Series(['a', 'b', 'a', 'b'], name='groups',
+                      index=pd.Index(['sample1', 'sample2', 'sample3', 's4'],
+                                     name='id'))
+        )
+
+        # expected number of rows and columns in result
+        obs = shuffle_groups(md, n_columns=1,
+                             column_name_prefix='shuffled.grouping.',
+                             column_value_prefix='1')
+        self.assertEqual(obs.shape, (4, 1))
+
+        # expected column names (the original should not be in the result)
+        self.assertFalse('groups' in obs.columns)
+        self.assertTrue('shuffled.grouping.0' in obs.columns)
+
+        # correct number of groups in the new column
+        self.assertEqual(len(obs['shuffled.grouping.0'].unique()), 2)
+
+        # correct group names in new column
+        self.assertEqual(
+            set(obs['shuffled.grouping.0'].unique()),
+            {'11', '10'})



View it on GitLab: https://salsa.debian.org/med-team/q2-metadata/-/commit/7d00dcc99af22c38adb2e0eb543fd206bc1fbe5d

-- 
View it on GitLab: https://salsa.debian.org/med-team/q2-metadata/-/commit/7d00dcc99af22c38adb2e0eb543fd206bc1fbe5d
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20220907/dca9a9a5/attachment-0001.htm>