[med-svn] [Git][med-team/q2-metadata][upstream] New upstream version 2022.8.0
Mohd Bilal (@rmb)
gitlab at salsa.debian.org
Wed Sep 7 13:28:55 BST 2022
Mohd Bilal pushed to branch upstream at Debian Med / q2-metadata
Commits:
7d00dcc9 by Mohammed Bilal at 2022-09-07T11:59:24+00:00
New upstream version 2022.8.0
- - - - -
6 changed files:
- ci/recipe/meta.yaml
- q2_metadata/__init__.py
- + q2_metadata/_random.py
- q2_metadata/_version.py
- q2_metadata/plugin_setup.py
- + q2_metadata/tests/test_random.py
Changes:
=====================================
ci/recipe/meta.yaml
=====================================
@@ -19,9 +19,9 @@ requirements:
run:
- python {{ python }}
- numpy
- - scipy
- - pandas
- - scikit-bio
+ - scipy {{ scipy }}
+ - pandas {{ pandas }}
+ - scikit-bio {{ scikit_bio }}
- qiime2 {{ qiime2_epoch }}.*
- q2templates {{ qiime2_epoch }}.*
- q2-types {{ qiime2_epoch }}.*
=====================================
q2_metadata/__init__.py
=====================================
@@ -8,9 +8,10 @@
from ._tabulate import tabulate
from ._distance import distance_matrix
+from ._random import shuffle_groups
from ._version import get_versions
__version__ = get_versions()['version']
del get_versions
-__all__ = ['tabulate', 'distance_matrix']
+__all__ = ['tabulate', 'distance_matrix', 'shuffle_groups']
=====================================
q2_metadata/_random.py
=====================================
@@ -0,0 +1,38 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2017-2022, QIIME 2 development team.
+#
+# Distributed under the terms of the Modified BSD License.
+#
+# The full license is in the file LICENSE, distributed with this software.
+# ----------------------------------------------------------------------------
+
+import qiime2
+import numpy as np
+import pandas as pd
+
+
+def shuffle_groups(metadata: qiime2.CategoricalMetadataColumn,
+ n_columns: int = 3,
+ column_name_prefix: str = 'shuffled.grouping.',
+ column_value_prefix: str = 'fake.group.') -> pd.DataFrame:
+
+ input_column_name = metadata.name
+ df = metadata.to_dataframe()
+
+ value_mapping = {}
+ for i, value in enumerate(df[input_column_name].unique()):
+ value_mapping[value] = '%s%d' % (column_value_prefix, i)
+
+ first_column_id = '%s0' % column_name_prefix
+ df[first_column_id] = df[input_column_name].map(value_mapping)
+
+ df[first_column_id] = \
+ np.random.permutation(df[first_column_id].values)
+
+ for i in range(1, n_columns):
+ column_id = '%s%d' % (column_name_prefix, i)
+ df[column_id] = \
+ np.random.permutation(df[first_column_id].values)
+
+ df = df.drop(input_column_name, axis=1)
+ return df
=====================================
q2_metadata/_version.py
=====================================
@@ -23,9 +23,9 @@ def get_keywords():
# setup.py/versioneer.py will grep for the variable names, so they must
# each be defined on a line of their own. _version.py will just call
# get_keywords().
- git_refnames = " (tag: 2022.2.0)"
- git_full = "47baad6a097b8b97591e7cf70acb89b26af1eb02"
- git_date = "2022-02-18 18:30:30 +0000"
+ git_refnames = " (tag: 2022.8.0)"
+ git_full = "671cbd512c4b9fe498e631ad250f80eb383331cc"
+ git_date = "2022-08-23 16:29:07 +0000"
keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
return keywords
=====================================
q2_metadata/plugin_setup.py
=====================================
@@ -7,12 +7,17 @@
# ----------------------------------------------------------------------------
import qiime2.plugin
-from qiime2.plugin import MetadataColumn, Numeric
+from qiime2.plugin import (MetadataColumn, Numeric, SemanticType, Categorical,
+ Int, Str, ValidationError)
+import qiime2.plugin.model as model
import q2_metadata
-from q2_metadata import tabulate, distance_matrix
+from q2_metadata import tabulate, distance_matrix, shuffle_groups
from q2_types.distance_matrix import DistanceMatrix
+from q2_types.sample_data import SampleData
+
+import pandas as pd
plugin = qiime2.plugin.Plugin(
@@ -59,3 +64,77 @@ plugin.visualizers.register_function(
'visualization supports interactive filtering, sorting, and '
'exporting to common file formats.',
)
+
+ArtificialGrouping = \
+ SemanticType('ArtificialGrouping', variant_of=SampleData.field['type'])
+
+plugin.register_semantic_types(ArtificialGrouping)
+
+
+class ArtificialGroupingFormat(model.TextFileFormat):
+ def validate(self, *args):
+ try:
+ md = qiime2.Metadata.load(str(self))
+ except qiime2.metadata.MetadataFileError as md_exc:
+ raise ValidationError(md_exc) from md_exc
+
+ if md.column_count == 0:
+ raise ValidationError('Format must contain at least 1 column')
+
+ filtered_md = md.filter_columns(column_type='categorical')
+ if filtered_md.column_count != md.column_count:
+ raise ValidationError('Must only contain categorical values.')
+
+
+ArtificialGroupingDirectoryFormat = model.SingleFileDirectoryFormat(
+ 'ArtificialGroupingDirectoryFormat', 'artificial-groupings.tsv',
+ ArtificialGroupingFormat)
+
+plugin.register_formats(ArtificialGroupingFormat,
+ ArtificialGroupingDirectoryFormat)
+
+plugin.register_semantic_type_to_format(
+ SampleData[ArtificialGrouping],
+ artifact_format=ArtificialGroupingDirectoryFormat)
+
+
+ at plugin.register_transformer
+def _1(df: pd.DataFrame) -> (ArtificialGroupingFormat):
+ ff = ArtificialGroupingFormat()
+ md = qiime2.Metadata(df)
+ md.save(str(ff))
+ return ff
+
+
+ at plugin.register_transformer
+def _2(ff: ArtificialGroupingFormat) -> (qiime2.Metadata):
+ return qiime2.Metadata.load(str(ff))
+
+
+plugin.methods.register_function(
+ function=shuffle_groups,
+ inputs={},
+ parameters={'metadata': MetadataColumn[Categorical],
+ 'n_columns': Int,
+ 'column_name_prefix': Str,
+ 'column_value_prefix': Str},
+ parameter_descriptions={
+ 'metadata': ('Categorical metadata column to shuffle.'),
+ 'n_columns': 'The number of shuffled metadata columns to create.',
+ 'column_name_prefix': ('Prefix to use in naming the shuffled '
+ 'metadata columns.'),
+ 'column_value_prefix': ('Prefix to use in naming the values in the '
+ 'shuffled metadata columns.')},
+ output_descriptions={
+ 'shuffled_groups': 'Randomized metadata columns'},
+ outputs=[('shuffled_groups', SampleData[ArtificialGrouping])],
+ name='Shuffle values in a categorical sample metadata column.',
+ description=('Create one or more categorical sample metadata '
+ 'columns by shuffling the values in an input metadata '
+ 'column. To avoid confusion, the column name and values '
+ 'will be derived from the provided prefixes. The number of '
+ 'different values (or groups), and the counts of each value, '
+ 'will match the input metadata column but the association of '
+ 'values with sample ids will be random. These data will be '
+ 'written to an artifact that can be used as sample metadata.')
+)
=====================================
q2_metadata/tests/test_random.py
=====================================
@@ -0,0 +1,196 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2017-2022, QIIME 2 development team.
+#
+# Distributed under the terms of the Modified BSD License.
+#
+# The full license is in the file LICENSE, distributed with this software.
+# ----------------------------------------------------------------------------
+
+import unittest
+
+import pandas as pd
+import qiime2
+
+from q2_metadata import shuffle_groups
+
+
+class ShuffleGroupsTests(unittest.TestCase):
+
+ # number of iterations to run for tests of randomization
+ n_iterations = 500
+
+ def test_shuffle_groups_shape_41(self):
+ md = qiime2.CategoricalMetadataColumn(
+ pd.Series(['a', 'b', 'a', 'b'], name='groups',
+ index=pd.Index(['sample1', 'sample2', 'sample3', 's4'],
+ name='id'))
+ )
+
+ # expected number of rows and columns in result
+ obs = shuffle_groups(md, n_columns=1,
+ column_name_prefix='shuffled.grouping.',
+ column_value_prefix='fake.group.')
+ self.assertEqual(obs.shape, (4, 1))
+
+ # expected column names (the original should not be in the result)
+ self.assertFalse('groups' in obs.columns)
+ self.assertTrue('shuffled.grouping.0' in obs.columns)
+
+ # correct number of groups in the new column
+ self.assertEqual(len(obs['shuffled.grouping.0'].unique()), 2)
+
+ # correct group names in new column
+ self.assertEqual(set(obs['shuffled.grouping.0'].unique()),
+ {'fake.group.1', 'fake.group.0'})
+
+ # distributions of value counts are equal in input and output
+ self.assertEqual(
+ sorted(list(obs['shuffled.grouping.0'].value_counts())),
+ sorted(list(md.to_series().value_counts())))
+
+ # randomization of key/value associations is occurring
+ random_check = []
+ for i in range(self.n_iterations):
+ obs2 = shuffle_groups(md, n_columns=1,
+ column_name_prefix='shuffled.grouping.',
+ column_value_prefix='fake.group.')
+ random_check.append(
+ list(obs['shuffled.grouping.0']) ==
+ list(obs2['shuffled.grouping.0']))
+ self.assertIn(False, random_check,
+ "All random groupings in %d iterations were "
+ "identicial, suggesting that values are not "
+ "randomly assigned." % self.n_iterations)
+
+ def test_shuffle_groups_shape_33(self):
+ md = qiime2.CategoricalMetadataColumn(
+ pd.Series(['a', 'b', 'c', 'a', 'b', 'c', 'a', 'b', 'c'],
+ name='groups',
+ index=pd.Index(['sample1', 'sample2', 'sample3',
+ 'samplea', 'sampleb', 'sc',
+ 'sample1_w', 'ctl1', 'ctl3'],
+ name='id'))
+ )
+
+ # expected number of rows and columns
+ obs = shuffle_groups(md, n_columns=3,
+ column_name_prefix='shuffled.grouping.',
+ column_value_prefix='fake.group.')
+ self.assertEqual(obs.shape, (9, 3))
+
+ # original column name should not be in the result
+ self.assertFalse('groups' in obs.columns)
+
+ for i in range(3):
+ column_id = 'shuffled.grouping.%d' % i
+ self.assertTrue(column_id in obs.columns)
+
+ # correct number of groups in the new column
+ self.assertEqual(len(obs[column_id].unique()), 3)
+
+ self.assertEqual(
+ set(obs[column_id].unique()),
+ {'fake.group.1', 'fake.group.0', 'fake.group.2'})
+
+ # randomization of key/value associations is occurring
+ random_check1 = []
+ random_check2 = []
+ random_check3 = []
+ for i in range(self.n_iterations):
+ random_check1.append(
+ list(obs['shuffled.grouping.0']) ==
+ list(obs['shuffled.grouping.1']))
+ random_check2.append(
+ list(obs['shuffled.grouping.0']) ==
+ list(obs['shuffled.grouping.2']))
+ random_check3.append(
+ list(obs['shuffled.grouping.1']) ==
+ list(obs['shuffled.grouping.2']))
+ self.assertIn(
+ False, random_check1,
+ "All random groupings in %d iterations were "
+ "identicial, suggesting that values are not "
+ "randomly assigned." % self.n_iterations)
+ self.assertIn(
+ False, random_check2,
+ "All random groupings in %d iterations were "
+ "identicial, suggesting that values are not "
+ "randomly assigned." % self.n_iterations)
+ self.assertIn(
+ False, random_check3,
+ "All random groupings in %d iterations were "
+ "identicial, suggesting that values are not "
+ "randomly assigned." % self.n_iterations)
+
+ def test_shuffle_groups_alt_input_column_name(self):
+ md = qiime2.CategoricalMetadataColumn(
+ pd.Series(['a', 'b', 'a', 'b'], name='xyz',
+ index=pd.Index(['sample1', 'sample2', 'sample3', 's4'],
+ name='id'))
+ )
+
+ # expected number of rows and columns in result
+ obs = shuffle_groups(md, n_columns=1,
+ column_name_prefix='shuffled.grouping.',
+ column_value_prefix='fake.group.')
+ self.assertEqual(obs.shape, (4, 1))
+
+ # expected column names (the original should not be in the result)
+ self.assertFalse('xyz' in obs.columns)
+ self.assertTrue('shuffled.grouping.0' in obs.columns)
+
+ # correct number of groups in the new column
+ self.assertEqual(len(obs['shuffled.grouping.0'].unique()), 2)
+
+ # correct group names in new column
+ self.assertEqual(set(obs['shuffled.grouping.0'].unique()),
+ {'fake.group.1', 'fake.group.0'})
+
+ def test_shuffle_groups_alt_column_name_prefix(self):
+ md = qiime2.CategoricalMetadataColumn(
+ pd.Series(['a', 'b', 'a', 'b'], name='groups',
+ index=pd.Index(['sample1', 'sample2', 'sample3', 's4'],
+ name='id'))
+ )
+
+ # expected number of rows and columns in result
+ obs = shuffle_groups(md, n_columns=1,
+ column_name_prefix='1',
+ column_value_prefix='fake.group.')
+ self.assertEqual(obs.shape, (4, 1))
+
+ # expected column names (the original should not be in the result)
+ self.assertFalse('groups' in obs.columns)
+ self.assertTrue('10' in obs.columns)
+
+ # correct number of groups in the new column
+ self.assertEqual(len(obs['10'].unique()), 2)
+
+ # correct group names in new column
+ self.assertEqual(set(obs['10'].unique()),
+ {'fake.group.1', 'fake.group.0'})
+
+ def test_shuffle_groups_alt_column_value_prefix(self):
+ md = qiime2.CategoricalMetadataColumn(
+ pd.Series(['a', 'b', 'a', 'b'], name='groups',
+ index=pd.Index(['sample1', 'sample2', 'sample3', 's4'],
+ name='id'))
+ )
+
+ # expected number of rows and columns in result
+ obs = shuffle_groups(md, n_columns=1,
+ column_name_prefix='shuffled.grouping.',
+ column_value_prefix='1')
+ self.assertEqual(obs.shape, (4, 1))
+
+ # expected column names (the original should not be in the result)
+ self.assertFalse('groups' in obs.columns)
+ self.assertTrue('shuffled.grouping.0' in obs.columns)
+
+ # correct number of groups in the new column
+ self.assertEqual(len(obs['shuffled.grouping.0'].unique()), 2)
+
+ # correct group names in new column
+ self.assertEqual(
+ set(obs['shuffled.grouping.0'].unique()),
+ {'11', '10'})
View it on GitLab: https://salsa.debian.org/med-team/q2-metadata/-/commit/7d00dcc99af22c38adb2e0eb543fd206bc1fbe5d
--
View it on GitLab: https://salsa.debian.org/med-team/q2-metadata/-/commit/7d00dcc99af22c38adb2e0eb543fd206bc1fbe5d
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20220907/dca9a9a5/attachment-0001.htm>
More information about the debian-med-commit
mailing list