[med-svn] [Git][med-team/q2-sample-classifier][upstream] New upstream version 2024.2.0

Sun Feb 18 17:03:37 GMT 2024


Andreas Tille pushed to branch upstream at Debian Med / q2-sample-classifier


Commits:
4f644668 by Andreas Tille at 2024-02-18T15:53:56+01:00
New upstream version 2024.2.0
- - - - -


8 changed files:

- .github/workflows/ci-dev.yaml
- README.md
- q2_sample_classifier/_transformer.py
- q2_sample_classifier/_version.py
- q2_sample_classifier/classify.py
- q2_sample_classifier/plugin_setup.py
- q2_sample_classifier/tests/test_types_formats_transformers.py
- q2_sample_classifier/utilities.py


Changes:

=====================================
.github/workflows/ci-dev.yaml
=====================================
@@ -9,4 +9,4 @@ jobs:
   ci:
     uses: qiime2/distributions/.github/workflows/lib-ci-dev.yaml at dev
     with:
-      distro: core
\ No newline at end of file
+      distro: amplicon


=====================================
README.md
=====================================
@@ -1,5 +1,5 @@
 # q2-sample-classifier
 
-![](https://github.com/qiime2/q2-sample-classifier/workflows/ci/badge.svg)
+![](https://github.com/qiime2/q2-sample-classifier/workflows/ci-dev/badge.svg)
 
 This is a QIIME 2 plugin. For details on QIIME 2, see https://qiime2.org.
\ No newline at end of file


=====================================
q2_sample_classifier/_transformer.py
=====================================
@@ -136,7 +136,26 @@ def _a(dirfmt: SampleEstimatorDirFmt) -> Pipeline:
     with tarfile.open(str(sklearn_pipeline)) as tar:
         tmpdir = model.DirectoryFormat()
         dirname = str(tmpdir)
-        tar.extractall(dirname)
+
+        def is_within_directory(directory, target):
+
+            abs_directory = os.path.abspath(directory)
+            abs_target = os.path.abspath(target)
+
+            prefix = os.path.commonprefix([abs_directory, abs_target])
+
+            return prefix == abs_directory
+
+        def safe_extract(tar, path=".", members=None, *, numeric_owner=False):
+
+            for member in tar.getmembers():
+                member_path = os.path.join(path, member.name)
+                if not is_within_directory(path, member_path):
+                    raise Exception("Attempted Path Traversal in Tar File")
+
+            tar.extractall(path, members, numeric_owner=numeric_owner)
+
+        safe_extract(tar, dirname)
         pipeline = joblib.load(os.path.join(dirname, 'sklearn_pipeline.pkl'))
         for fn in tar.getnames():
             os.unlink(os.path.join(dirname, fn))


=====================================
q2_sample_classifier/_version.py
=====================================
@@ -23,9 +23,9 @@ def get_keywords():
     # setup.py/versioneer.py will grep for the variable names, so they must
     # each be defined on a line of their own. _version.py will just call
     # get_keywords().
-    git_refnames = " (tag: 2023.9.0, Release-2023.9)"
-    git_full = "8c6fb31849f929d00ae6b7a5b6b92fd1cfebb10b"
-    git_date = "2023-10-03 22:04:15 +0000"
+    git_refnames = " (tag: 2024.2.0, Release-2024.2)"
+    git_full = "e32969bfe9c0e177ca0d5cfba270216c98bbbd9e"
+    git_date = "2024-02-16 21:57:23 +0000"
     keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
     return keywords
 


=====================================
q2_sample_classifier/classify.py
=====================================
@@ -15,6 +15,7 @@ from sklearn.neighbors import KNeighborsClassifier
 from sklearn.pipeline import Pipeline
 
 import qiime2
+from qiime2.plugin import get_available_cores
 import pandas as pd
 import biom
 import skbio
@@ -107,6 +108,9 @@ def _fit_predict_knn_cv(
         x: pd.DataFrame, y: pd.Series, k: int, cv: int,
         random_state: int, n_jobs: int
 ) -> (pd.Series, pd.Series):
+    if n_jobs == 0:
+        n_jobs = get_available_cores()
+
     kf = KFold(n_splits=cv, shuffle=True, random_state=random_state)
 
     # train and test with CV
@@ -291,6 +295,9 @@ def fit_regressor(table: biom.Table,
 
 
 def predict_base(table, sample_estimator, n_jobs):
+    if n_jobs == 0:
+        n_jobs = get_available_cores()
+
     # extract feature data from biom
     feature_data = _extract_features(table)
     index = table.ids()


=====================================
q2_sample_classifier/plugin_setup.py
=====================================
@@ -10,10 +10,10 @@ import importlib
 
 from qiime2.plugin import (
     Int, Str, Float, Range, Bool, Plugin, Metadata, Choices, MetadataColumn,
-    Numeric, Categorical, Citations, Visualization, TypeMatch)
+    Numeric, Categorical, Citations, Visualization, TypeMatch, Threads)
 from q2_types.feature_table import (
     FeatureTable, Frequency, RelativeFrequency, PresenceAbsence, Balance,
-    PercentileNormalized, Design)
+    PercentileNormalized, Design, Composition)
 from q2_types.sample_data import SampleData
 from q2_types.feature_data import FeatureData
 from q2_types.distance_matrix import DistanceMatrix
@@ -89,7 +89,8 @@ predict_description = (
     'contain overlapping features with the feature table used to train '
     'the estimator.')
 
-inputs = {'table': FeatureTable[Frequency]}
+inputs = {'table': FeatureTable[
+    Frequency | RelativeFrequency | PresenceAbsence | Composition]}
 
 input_descriptions = {'table': 'Feature table containing all features that '
                                'should be used for target prediction.',
@@ -99,7 +100,7 @@ input_descriptions = {'table': 'Feature table containing all features that '
 parameters = {
     'base': {
         'random_state': Int,
-        'n_jobs': Int,
+        'n_jobs': Threads,
         'n_estimators': Int % Range(1, None),
         'missing_samples': Str % Choices(['error', 'ignore'])},
     'splitter': {
@@ -492,7 +493,7 @@ plugin.visualizers.register_function(
 
 
 T = TypeMatch([Frequency, RelativeFrequency, PresenceAbsence, Balance,
-               PercentileNormalized, Design])
+               PercentileNormalized, Design, Composition])
 plugin.methods.register_function(
     function=split_table,
     inputs={'table': FeatureTable[T]},


=====================================
q2_sample_classifier/tests/test_types_formats_transformers.py
=====================================
@@ -421,7 +421,28 @@ class TestTransformers(SampleEstimatorTestBase):
         def read_pipeline(pipeline_filepath):
             with tarfile.open(pipeline_filepath) as tar:
                 dirname = tempfile.mkdtemp()
-                tar.extractall(dirname)
+
+                def is_within_directory(directory, target):
+
+                    abs_directory = os.path.abspath(directory)
+                    abs_target = os.path.abspath(target)
+
+                    prefix = os.path.commonprefix([abs_directory, abs_target])
+
+                    return prefix == abs_directory
+
+                def safe_extract(tar, path=".", members=None, *,
+                                 numeric_owner=False):
+
+                    for member in tar.getmembers():
+                        member_path = os.path.join(path, member.name)
+                        if not is_within_directory(path, member_path):
+                            raise Exception("Attempted Path Traversal in Tar"
+                                            "File")
+
+                    tar.extractall(path, members, numeric_owner=numeric_owner)
+
+                safe_extract(tar, dirname)
                 pipeline = joblib.load(os.path.join(dirname,
                                        'sklearn_pipeline.pkl'))
                 for fn in tar.getnames():


=====================================
q2_sample_classifier/utilities.py
=====================================
@@ -27,6 +27,7 @@ from sklearn.tree import (
 )
 from sklearn.pipeline import Pipeline
 
+from qiime2.plugin import get_available_cores
 import q2templates
 import pandas as pd
 import numpy as np
@@ -264,6 +265,9 @@ def nested_cross_validation(table, metadata, cv, random_state, n_jobs,
                             n_estimators, estimator, stratify,
                             parameter_tuning, classification, scoring,
                             missing_samples='error'):
+    if n_jobs == 0:
+        n_jobs = get_available_cores()
+
     # extract column name from NumericMetadataColumn
     column = metadata.name
 
@@ -301,6 +305,9 @@ def _fit_estimator(features, targets, estimator, n_estimators=100, step=0.05,
                    cv=5, random_state=None, n_jobs=1,
                    optimize_feature_selection=False, parameter_tuning=False,
                    missing_samples='error', classification=True):
+    if n_jobs == 0:
+        n_jobs = get_available_cores()
+
     # extract column name from CategoricalMetadataColumn
     column = targets.to_series().name
 



View it on GitLab: https://salsa.debian.org/med-team/q2-sample-classifier/-/commit/4f644668c22494fee3cfce04fe208b5a3301592c

-- 
View it on GitLab: https://salsa.debian.org/med-team/q2-sample-classifier/-/commit/4f644668c22494fee3cfce04fe208b5a3301592c
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20240218/a620050b/attachment-0001.htm>