[med-svn] [Git][med-team/q2-metadata][upstream] New upstream version 2024.5.0+dfsg
Michael R. Crusoe (@crusoe)
gitlab at salsa.debian.org
Wed Jun 26 13:54:04 BST 2024
Michael R. Crusoe pushed to branch upstream at Debian Med / q2-metadata
Commits:
fb7fdbf9 by Michael R. Crusoe at 2024-06-26T14:28:59+02:00
New upstream version 2024.5.0+dfsg
- - - - -
3 changed files:
- q2_metadata/_merge.py
- q2_metadata/_version.py
- q2_metadata/tests/test_merge.py
Changes:
=====================================
q2_metadata/_merge.py
=====================================
@@ -19,21 +19,46 @@ def merge(metadata1: qiime2.Metadata,
n_overlapping_ids = len(overlapping_ids)
n_overlapping_columns = len(overlapping_columns)
- if len(overlapping_ids) > 0 and len(overlapping_columns) > 0:
- raise ValueError(f"Merging can currently handle overlapping ids "
- f"or overlapping columns, but not both. "
- f"{n_overlapping_ids} overlapping ids were "
- f"identified ({', '.join(overlapping_ids)}) and"
- f"{n_overlapping_columns} overlapping columns "
- f"were identified {', '.join(overlapping_columns)}.")
+ if n_overlapping_ids and n_overlapping_columns:
+ raise ValueError(
+ "Merging can currently handle overlapping ids or overlapping "
+ f"columns but not both. {n_overlapping_ids} overlapping ids were "
+ f"identified ({', '.join(overlapping_ids)}) and "
+ f"{n_overlapping_columns} overlapping columns were identified "
+ f"({', '.join(overlapping_columns)})."
+ )
df1 = metadata1.to_dataframe()
df2 = metadata2.to_dataframe()
- if n_overlapping_columns == 0:
+ if df1.index.name != df2.index.name:
+ raise ValueError(
+ "Metadata files contain different ID column names. "
+ f"Metadata1 file contains '{df1.index.name}' and metadata2 "
+ f"contains '{df2.index.name}'. These column names must match."
+ )
+
+ if not n_overlapping_columns:
result = pd.merge(df1, df2, how='outer', left_index=True,
right_index=True)
- else: # i.e., n_overlapping_ids == 0
+
+ else:
+ for column in overlapping_columns:
+ if df1[column].dtype != df2[column].dtype:
+ column_type1 = type(
+ qiime2.Metadata(df1[[column]]).get_column(column))
+ column_type2 = type(
+ qiime2.Metadata(df2[[column]]).get_column(column))
+ raise ValueError(
+ f"Metadata files contain the shared column '{column}' "
+ "with different type designations. "
+ f"In 'metadata1', the column '{column}' is of type "
+ f"'{column_type1.__name__}', "
+ f"and in 'metadata2', it is of type "
+ f"'{column_type2.__name__}'. These type designations must "
+ "match."
+ )
+
result = pd.merge(df1, df2, how='outer', left_index=True,
right_index=True, suffixes=('', '_'))
for c in overlapping_columns:
=====================================
q2_metadata/_version.py
=====================================
@@ -23,9 +23,9 @@ def get_keywords():
# setup.py/versioneer.py will grep for the variable names, so they must
# each be defined on a line of their own. _version.py will just call
# get_keywords().
- git_refnames = " (tag: 2024.2.0, Release-2024.2)"
- git_full = "ad67a7ff4caab35b0b7aa265ce3f446d126b0b7c"
- git_date = "2024-02-16 21:58:48 +0000"
+ git_refnames = " (tag: 2024.5.0, Release-2024.5)"
+ git_full = "ffce5b42f4a7458222d107c4184e729fea2d29f8"
+ git_date = "2024-05-29 04:16:40 +0000"
keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
return keywords
=====================================
q2_metadata/tests/test_merge.py
=====================================
@@ -122,8 +122,6 @@ class MergeTests(unittest.TestCase):
columns=['col1', 'col2', 'col3']))
obs1 = merge(md1, md2)
- print(obs1.to_dataframe())
-
index_exp1 = pd.Index(['sample1', 'sample2', 'sample3',
'sample4', 'sample5', 'sample6'], name='id')
data_exp1 = [['a', 'd', 'h'],
@@ -135,8 +133,6 @@ class MergeTests(unittest.TestCase):
exp1 = qiime2.Metadata(
pd.DataFrame(data_exp1, index=index_exp1, dtype=object,
columns=['col1', 'col2', 'col3']))
-
- print(exp1.to_dataframe())
self.assertEqual(obs1, exp1)
def test_merge_some_columns_overlapping(self):
@@ -201,3 +197,58 @@ class MergeTests(unittest.TestCase):
'col4', 'col5', 'col6']))
self.assertEqual(obs1, exp1)
+
+ def test_merge_mismatched_columnID_names_in_error_message(self):
+ index1 = pd.Index(['sample1', 'sample2', 'sample3'], name='id')
+ data1 = [['a', 'd', 'h'],
+ ['b', 'e', 'i'],
+ ['c', 'f', 'j']]
+ md1 = qiime2.Metadata(pd.DataFrame(data1, index=index1, dtype=object,
+ columns=['col1', 'col2', 'col3']))
+
+ index2 = pd.Index(['sample4', 'sample5', 'sample6'], name='sample-id')
+ data2 = [['k', 'n', 'q'],
+ ['l', 'o', 'r'],
+ ['m', 'p', 's']]
+ md2 = qiime2.Metadata(pd.DataFrame(data2, index=index2, dtype=object,
+ columns=['col4', 'col5', 'col6']))
+
+ with self.assertRaisesRegex(
+ ValueError,
+ "Metadata files contain different ID column names.*id.*sample-id"
+ ):
+ merge(md1, md2)
+
+ def test_merge_mismatched_md_column_type_designations(self):
+ index1 = pd.Index(['sample1', 'sample2', 'sample3'], name='id')
+ data1 = [['a', 'd', 'h'],
+ ['b', 'e', 'i'],
+ ['c', 'f', 'j']]
+ md1 = qiime2.Metadata(
+ pd.DataFrame(
+ data1,
+ index=index1,
+ dtype=object,
+ columns=['col1', 'col2', 'col3']
+ )
+ )
+ index2 = pd.Index(['sample4', 'sample5', 'sample6'], name='id')
+ data2 = [['k', 'n', 40.0],
+ ['l', 'o', 41.0],
+ ['m', 'p', 42.0]]
+ md2 = qiime2.Metadata(
+ pd.DataFrame(
+ data2,
+ index=index2,
+ columns=['col1', 'col2', 'col3']
+ )
+ )
+ with self.assertRaisesRegex(
+ ValueError,
+ "Metadata files contain the shared column 'col3' with different "
+ "type designations. In 'metadata1', the column 'col3' is of type "
+ r"\'CategoricalMetadataColumn\', and in 'metadata2', it is of "
+ r"type \'NumericMetadataColumn\'. These type designations must "
+ "match."
+ ):
+ merge(md1, md2)
View it on GitLab: https://salsa.debian.org/med-team/q2-metadata/-/commit/fb7fdbf96b4f9124343b51480cf44c46a740dfe1
--
This project does not include diff previews in email notifications.
View it on GitLab: https://salsa.debian.org/med-team/q2-metadata/-/commit/fb7fdbf96b4f9124343b51480cf44c46a740dfe1
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20240626/42ca1758/attachment-0001.htm>
More information about the debian-med-commit
mailing list