[med-svn] [Git][med-team/q2-sample-classifier][upstream] New upstream version 2024.5.0
Michael R. Crusoe (@crusoe)
gitlab at salsa.debian.org
Tue Jun 25 03:18:04 BST 2024
Michael R. Crusoe pushed to branch upstream at Debian Med / q2-sample-classifier
Commits:
ae87a6bf by Michael R. Crusoe at 2024-06-25T03:40:51+02:00
New upstream version 2024.5.0
- - - - -
7 changed files:
- q2_sample_classifier/_version.py
- q2_sample_classifier/classify.py
- q2_sample_classifier/tests/test_actions.py
- q2_sample_classifier/tests/test_estimators.py
- q2_sample_classifier/tests/test_types_formats_transformers.py
- q2_sample_classifier/utilities.py
- q2_sample_classifier/visuals.py
Changes:
=====================================
q2_sample_classifier/_version.py
=====================================
@@ -23,9 +23,9 @@ def get_keywords():
# setup.py/versioneer.py will grep for the variable names, so they must
# each be defined on a line of their own. _version.py will just call
# get_keywords().
- git_refnames = " (tag: 2024.2.0, Release-2024.2)"
- git_full = "e32969bfe9c0e177ca0d5cfba270216c98bbbd9e"
- git_date = "2024-02-16 21:57:23 +0000"
+ git_refnames = " (tag: 2024.5.0, Release-2024.5)"
+ git_full = "8fa3c948c14a8ff6668d92fa7cb05364679efc8f"
+ git_date = "2024-05-29 04:18:14 +0000"
keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
return keywords
=====================================
q2_sample_classifier/classify.py
=====================================
@@ -83,7 +83,7 @@ def metatable(ctx,
raise ValueError('Missing samples in metadata: %r' %
table_ids.difference(metadata_ids))
else:
- metadata = metadata.loc[sample_ids]
+ metadata = metadata.loc[list(sample_ids)]
if len(sample_ids) < len(table_ids):
tab = tab.filter(
ids_to_keep=sample_ids, axis='sample', inplace=False)
=====================================
q2_sample_classifier/tests/test_actions.py
=====================================
@@ -59,7 +59,7 @@ class NowLetsTestTheActions(SampleClassifierTestPluginBase):
self.assertEqual(y_train.name, 'bugs')
# test if complete target column is covered
- y_all = y_train.append(y_test).sort_index()
+ y_all = pd.concat([y_train, y_test]).sort_index()
y_all.index.name = 'SampleID'
pdt.assert_series_equal(y_all, self.md._series)
=====================================
q2_sample_classifier/tests/test_estimators.py
=====================================
@@ -117,7 +117,7 @@ class EstimatorsTests(SampleClassifierTestPluginBase):
index_col=0, names=['feature', 'importance'])
self.exp_pred = pd.read_csv(
self.get_data_path('predictions.tsv'), sep='\t', header=0,
- index_col=0, squeeze=True)
+ index_col=0).squeeze('columns')
index = pd.Index(['A', 'B', 'C', 'D'], name='id')
self.table_percnorm = qiime2.Artifact.import_data(
FeatureTable[PercentileNormalized], pd.DataFrame(
@@ -135,7 +135,7 @@ class EstimatorsTests(SampleClassifierTestPluginBase):
dv = DictVectorizer()
dv.fit(dicts)
features = table.ids('observation')
- self.assertEqual(set(dv.get_feature_names()), set(features))
+ self.assertEqual(set(dv.get_feature_names_out()), set(features))
self.assertEqual(len(dicts), len(table.ids()))
for dict_row, (table_row, _, _) in zip(dicts, table.iter()):
for feature, count in zip(features, table_row):
@@ -398,7 +398,7 @@ class EstimatorsTests(SampleClassifierTestPluginBase):
parameter_tuning=True, classification=True,
missing_samples='ignore', base_estimator="DecisionTree")
self.assertEqual(type(abe.named_steps.est), AdaBoostClassifier)
- self.assertEqual(type(abe.named_steps.est.base_estimator),
+ self.assertEqual(type(abe.named_steps.est.estimator),
DecisionTreeClassifier)
def test_train_adaboost_extra_trees(self):
@@ -408,7 +408,7 @@ class EstimatorsTests(SampleClassifierTestPluginBase):
parameter_tuning=True, classification=True,
missing_samples='ignore', base_estimator="ExtraTrees")
self.assertEqual(type(abe.named_steps.est), AdaBoostClassifier)
- self.assertEqual(type(abe.named_steps.est.base_estimator),
+ self.assertEqual(type(abe.named_steps.est.estimator),
ExtraTreeClassifier)
# test some invalid inputs/edge cases
@@ -504,7 +504,7 @@ class EstimatorsTests(SampleClassifierTestPluginBase):
ls_pred_classes = prob.columns.tolist()
ls_correct_range = [col for col in ls_pred_classes if
prob[col].between(
- 0, 1, inclusive=True).all()]
+ 0, 1, inclusive="both").all()]
self.assertEqual(len(ls_correct_range), prob.shape[1],
msg='Predicted probabilities of class {}'
'are not in range [0,1]'.format(
=====================================
q2_sample_classifier/tests/test_types_formats_transformers.py
=====================================
@@ -85,7 +85,7 @@ class TestSemanticTypes(SampleClassifierTestPluginBase):
name='outlier', index=exp_index)
obs = transformer(exp)
obs = pd.read_csv(str(obs), sep='\t', header=0, index_col=0,
- squeeze=True)
+ ).squeeze('columns')
self.assertEqual(sorted(exp), sorted(obs))
def test_boolean_format_to_pd_series(self):
@@ -152,7 +152,7 @@ class TestSemanticTypes(SampleClassifierTestPluginBase):
name='prediction', index=['a', 'b', 'c', 'd'])
obs = transformer(exp)
obs = pd.read_csv(str(obs), sep='\t', header=0, index_col=0,
- squeeze=True)
+ ).squeeze('columns')
pdt.assert_series_equal(obs, exp)
def test_pd_series_to_Predictions_format_allow_nans(self):
@@ -161,7 +161,7 @@ class TestSemanticTypes(SampleClassifierTestPluginBase):
name='prediction', index=['a', 'b', 'c', 'd'])
obs = transformer(exp)
obs = pd.read_csv(str(obs), sep='\t', header=0, index_col=0,
- squeeze=True)
+ ).squeeze('columns')
pdt.assert_series_equal(obs, exp)
def test_Predictions_format_to_pd_series(self):
=====================================
q2_sample_classifier/utilities.py
=====================================
@@ -239,7 +239,7 @@ def _rfecv_feature_selection(feature_data, targets, estimator,
# Describe top features
n_opt = rfecv.named_steps.est.n_features_
importance = _extract_important_features(
- rfecv.named_steps.dv.get_feature_names(),
+ rfecv.named_steps.dv.get_feature_names_out(),
rfecv.named_steps.est.ranking_)
importance = sort_importances(importance, ascending=True)[:n_opt]
@@ -249,16 +249,17 @@ def _rfecv_feature_selection(feature_data, targets, estimator,
def _extract_rfe_scores(rfecv):
+ grid_scores_ = rfecv.cv_results_['mean_test_score']
n_features = len(rfecv.ranking_)
# If using fractional step, step = integer of fraction * n_features
if rfecv.step < 1:
rfecv.step = int(rfecv.step * n_features)
- # Need to manually calculate x-axis, as rfecv.grid_scores_ are a 1-d array
+ # Need to manually calculate x-axis, grid_scores_ is a 1-d array
x = [n_features - (n * rfecv.step)
- for n in range(len(rfecv.grid_scores_)-1, -1, -1)]
+ for n in range(len(grid_scores_)-1, -1, -1)]
if x[0] < 1:
x[0] = 1
- return pd.Series(rfecv.grid_scores_, index=x, name='Accuracy')
+ return pd.Series(grid_scores_, index=x, name='Accuracy')
def nested_cross_validation(table, metadata, cv, random_state, n_jobs,
@@ -411,12 +412,12 @@ def _calculate_feature_importances(estimator):
# feature_importances_ or coef_ to report feature importance/weights
try:
importances = _extract_important_features(
- estimator.named_steps.dv.get_feature_names(),
+ estimator.named_steps.dv.get_feature_names_out(),
estimator.named_steps.est.feature_importances_)
# is there a better way to determine whether estimator has coef_ ?
except AttributeError:
importances = _extract_important_features(
- estimator.named_steps.dv.get_feature_names(),
+ estimator.named_steps.dv.get_feature_names_out(),
estimator.named_steps.est.coef_)
return importances
@@ -718,7 +719,7 @@ def _mean_feature_importance(importances):
def _null_feature_importance(table):
feature_extractor = DictVectorizer()
feature_extractor.fit(table)
- imp = pd.DataFrame(index=feature_extractor.get_feature_names())
+ imp = pd.DataFrame(index=feature_extractor.get_feature_names_out())
imp.index.name = "feature"
imp["importance"] = 1
return imp
@@ -827,8 +828,9 @@ def _train_adaboost_base_estimator(table, metadata, column, base_estimator,
return Pipeline(
[('dv', estimator.named_steps.dv),
- ('est', adaboost_estimator(estimator.named_steps.est,
- n_estimators, random_state=random_state))])
+ ('est', adaboost_estimator(estimator=estimator.named_steps.est,
+ n_estimators=n_estimators,
+ random_state=random_state))])
def _disable_feature_selection(estimator, optimize_feature_selection):
=====================================
q2_sample_classifier/visuals.py
=====================================
@@ -167,9 +167,9 @@ def _plot_confusion_matrix(y_test, y_pred, classes, normalize, palette,
predictions.loc["Overall Accuracy"] = ""
predictions.loc["Baseline Accuracy"] = ""
predictions.loc["Accuracy Ratio"] = ""
- predictions.loc["Overall Accuracy"]["Overall Accuracy"] = accuracy
- predictions.loc["Baseline Accuracy"]["Overall Accuracy"] = basline_accuracy
- predictions.loc["Accuracy Ratio"]["Overall Accuracy"] = accuracy_ratio
+ predictions.loc["Overall Accuracy", "Overall Accuracy"] = accuracy
+ predictions.loc["Baseline Accuracy", "Overall Accuracy"] = basline_accuracy
+ predictions.loc["Accuracy Ratio", "Overall Accuracy"] = accuracy_ratio
return predictions, confusion
View it on GitLab: https://salsa.debian.org/med-team/q2-sample-classifier/-/commit/ae87a6bfa20df424d76483b3f9771a3c136c4abf
--
This project does not include diff previews in email notifications.
View it on GitLab: https://salsa.debian.org/med-team/q2-sample-classifier/-/commit/ae87a6bfa20df424d76483b3f9771a3c136c4abf
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20240625/698a1c5b/attachment-0001.htm>
More information about the debian-med-commit
mailing list