[med-svn] [Git][med-team/q2-sample-classifier][upstream] New upstream version 2024.2.0
Andreas Tille (@tille)
gitlab at salsa.debian.org
Sun Feb 18 17:03:37 GMT 2024
Andreas Tille pushed to branch upstream at Debian Med / q2-sample-classifier
Commits:
4f644668 by Andreas Tille at 2024-02-18T15:53:56+01:00
New upstream version 2024.2.0
- - - - -
8 changed files:
- .github/workflows/ci-dev.yaml
- README.md
- q2_sample_classifier/_transformer.py
- q2_sample_classifier/_version.py
- q2_sample_classifier/classify.py
- q2_sample_classifier/plugin_setup.py
- q2_sample_classifier/tests/test_types_formats_transformers.py
- q2_sample_classifier/utilities.py
Changes:
=====================================
.github/workflows/ci-dev.yaml
=====================================
@@ -9,4 +9,4 @@ jobs:
ci:
uses: qiime2/distributions/.github/workflows/lib-ci-dev.yaml at dev
with:
- distro: core
\ No newline at end of file
+ distro: amplicon
=====================================
README.md
=====================================
@@ -1,5 +1,5 @@
# q2-sample-classifier
-
+
This is a QIIME 2 plugin. For details on QIIME 2, see https://qiime2.org.
\ No newline at end of file
=====================================
q2_sample_classifier/_transformer.py
=====================================
@@ -136,7 +136,26 @@ def _a(dirfmt: SampleEstimatorDirFmt) -> Pipeline:
with tarfile.open(str(sklearn_pipeline)) as tar:
tmpdir = model.DirectoryFormat()
dirname = str(tmpdir)
- tar.extractall(dirname)
+
+ def is_within_directory(directory, target):
+
+ abs_directory = os.path.abspath(directory)
+ abs_target = os.path.abspath(target)
+
+ prefix = os.path.commonprefix([abs_directory, abs_target])
+
+ return prefix == abs_directory
+
+ def safe_extract(tar, path=".", members=None, *, numeric_owner=False):
+
+ for member in tar.getmembers():
+ member_path = os.path.join(path, member.name)
+ if not is_within_directory(path, member_path):
+ raise Exception("Attempted Path Traversal in Tar File")
+
+ tar.extractall(path, members, numeric_owner=numeric_owner)
+
+ safe_extract(tar, dirname)
pipeline = joblib.load(os.path.join(dirname, 'sklearn_pipeline.pkl'))
for fn in tar.getnames():
os.unlink(os.path.join(dirname, fn))
=====================================
q2_sample_classifier/_version.py
=====================================
@@ -23,9 +23,9 @@ def get_keywords():
# setup.py/versioneer.py will grep for the variable names, so they must
# each be defined on a line of their own. _version.py will just call
# get_keywords().
- git_refnames = " (tag: 2023.9.0, Release-2023.9)"
- git_full = "8c6fb31849f929d00ae6b7a5b6b92fd1cfebb10b"
- git_date = "2023-10-03 22:04:15 +0000"
+ git_refnames = " (tag: 2024.2.0, Release-2024.2)"
+ git_full = "e32969bfe9c0e177ca0d5cfba270216c98bbbd9e"
+ git_date = "2024-02-16 21:57:23 +0000"
keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
return keywords
=====================================
q2_sample_classifier/classify.py
=====================================
@@ -15,6 +15,7 @@ from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import Pipeline
import qiime2
+from qiime2.plugin import get_available_cores
import pandas as pd
import biom
import skbio
@@ -107,6 +108,9 @@ def _fit_predict_knn_cv(
x: pd.DataFrame, y: pd.Series, k: int, cv: int,
random_state: int, n_jobs: int
) -> (pd.Series, pd.Series):
+ if n_jobs == 0:
+ n_jobs = get_available_cores()
+
kf = KFold(n_splits=cv, shuffle=True, random_state=random_state)
# train and test with CV
@@ -291,6 +295,9 @@ def fit_regressor(table: biom.Table,
def predict_base(table, sample_estimator, n_jobs):
+ if n_jobs == 0:
+ n_jobs = get_available_cores()
+
# extract feature data from biom
feature_data = _extract_features(table)
index = table.ids()
=====================================
q2_sample_classifier/plugin_setup.py
=====================================
@@ -10,10 +10,10 @@ import importlib
from qiime2.plugin import (
Int, Str, Float, Range, Bool, Plugin, Metadata, Choices, MetadataColumn,
- Numeric, Categorical, Citations, Visualization, TypeMatch)
+ Numeric, Categorical, Citations, Visualization, TypeMatch, Threads)
from q2_types.feature_table import (
FeatureTable, Frequency, RelativeFrequency, PresenceAbsence, Balance,
- PercentileNormalized, Design)
+ PercentileNormalized, Design, Composition)
from q2_types.sample_data import SampleData
from q2_types.feature_data import FeatureData
from q2_types.distance_matrix import DistanceMatrix
@@ -89,7 +89,8 @@ predict_description = (
'contain overlapping features with the feature table used to train '
'the estimator.')
-inputs = {'table': FeatureTable[Frequency]}
+inputs = {'table': FeatureTable[
+ Frequency | RelativeFrequency | PresenceAbsence | Composition]}
input_descriptions = {'table': 'Feature table containing all features that '
'should be used for target prediction.',
@@ -99,7 +100,7 @@ input_descriptions = {'table': 'Feature table containing all features that '
parameters = {
'base': {
'random_state': Int,
- 'n_jobs': Int,
+ 'n_jobs': Threads,
'n_estimators': Int % Range(1, None),
'missing_samples': Str % Choices(['error', 'ignore'])},
'splitter': {
@@ -492,7 +493,7 @@ plugin.visualizers.register_function(
T = TypeMatch([Frequency, RelativeFrequency, PresenceAbsence, Balance,
- PercentileNormalized, Design])
+ PercentileNormalized, Design, Composition])
plugin.methods.register_function(
function=split_table,
inputs={'table': FeatureTable[T]},
=====================================
q2_sample_classifier/tests/test_types_formats_transformers.py
=====================================
@@ -421,7 +421,28 @@ class TestTransformers(SampleEstimatorTestBase):
def read_pipeline(pipeline_filepath):
with tarfile.open(pipeline_filepath) as tar:
dirname = tempfile.mkdtemp()
- tar.extractall(dirname)
+
+ def is_within_directory(directory, target):
+
+ abs_directory = os.path.abspath(directory)
+ abs_target = os.path.abspath(target)
+
+ prefix = os.path.commonprefix([abs_directory, abs_target])
+
+ return prefix == abs_directory
+
+ def safe_extract(tar, path=".", members=None, *,
+ numeric_owner=False):
+
+ for member in tar.getmembers():
+ member_path = os.path.join(path, member.name)
+ if not is_within_directory(path, member_path):
+ raise Exception("Attempted Path Traversal in Tar"
+ "File")
+
+ tar.extractall(path, members, numeric_owner=numeric_owner)
+
+ safe_extract(tar, dirname)
pipeline = joblib.load(os.path.join(dirname,
'sklearn_pipeline.pkl'))
for fn in tar.getnames():
=====================================
q2_sample_classifier/utilities.py
=====================================
@@ -27,6 +27,7 @@ from sklearn.tree import (
)
from sklearn.pipeline import Pipeline
+from qiime2.plugin import get_available_cores
import q2templates
import pandas as pd
import numpy as np
@@ -264,6 +265,9 @@ def nested_cross_validation(table, metadata, cv, random_state, n_jobs,
n_estimators, estimator, stratify,
parameter_tuning, classification, scoring,
missing_samples='error'):
+ if n_jobs == 0:
+ n_jobs = get_available_cores()
+
# extract column name from NumericMetadataColumn
column = metadata.name
@@ -301,6 +305,9 @@ def _fit_estimator(features, targets, estimator, n_estimators=100, step=0.05,
cv=5, random_state=None, n_jobs=1,
optimize_feature_selection=False, parameter_tuning=False,
missing_samples='error', classification=True):
+ if n_jobs == 0:
+ n_jobs = get_available_cores()
+
# extract column name from CategoricalMetadataColumn
column = targets.to_series().name
View it on GitLab: https://salsa.debian.org/med-team/q2-sample-classifier/-/commit/4f644668c22494fee3cfce04fe208b5a3301592c
--
View it on GitLab: https://salsa.debian.org/med-team/q2-sample-classifier/-/commit/4f644668c22494fee3cfce04fe208b5a3301592c
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20240218/a620050b/attachment-0001.htm>
More information about the debian-med-commit
mailing list