[med-svn] [Git][med-team/q2-dada2][upstream] New upstream version 2022.11.2
Andreas Tille (@tille)
gitlab at salsa.debian.org
Thu Jan 12 20:21:24 GMT 2023
Andreas Tille pushed to branch upstream at Debian Med / q2-dada2
Commits:
7214e78b by Andreas Tille at 2023-01-12T20:57:46+01:00
New upstream version 2022.11.2
- - - - -
16 changed files:
- + .github/workflows/add-to-project-ci.yml
- ci/recipe/meta.yaml
- q2_dada2/_denoise.py
- + q2_dada2/_examples.py
- q2_dada2/_version.py
- q2_dada2/assets/run_dada.R
- q2_dada2/plugin_setup.py
- + q2_dada2/tests/data/mixed_barcodes_and_ids/MANIFEST
- + q2_dada2/tests/data/mixed_barcodes_and_ids/V130_166_L001_R1_001.fastq.gz
- + q2_dada2/tests/data/mixed_barcodes_and_ids/V130_2_167_L001_R1_001.fastq.gz
- + q2_dada2/tests/data/mixed_barcodes_and_ids/V130_2_743_L001_R2_001.fastq.gz
- + q2_dada2/tests/data/mixed_barcodes_and_ids/V130_742_L001_R2_001.fastq.gz
- + q2_dada2/tests/data/mixed_barcodes_and_ids/metadata.yml
- q2_dada2/tests/data/single_feature.tsv
- q2_dada2/tests/test_denoise.py
- setup.py
Changes:
=====================================
.github/workflows/add-to-project-ci.yml
=====================================
@@ -0,0 +1,21 @@
+name: Add new issues and PRs to triage project board
+
+on:
+ issues:
+ types:
+ - opened
+ pull_request_target:
+ types:
+ - opened
+
+jobs:
+ add-to-project:
+ name: Add issue to project
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/add-to-project at v0.3.0
+ with:
+ project-url: https://github.com/orgs/qiime2/projects/36
+ github-token: ${{ secrets.ADD_TO_PROJECT_PAT }}
+ labeled: skip-triage
+ label-operator: NOT
=====================================
ci/recipe/meta.yaml
=====================================
@@ -19,7 +19,8 @@ requirements:
run:
- python {{ python }}
- biom-format {{ biom_format }}
- - bioconductor-dada2 1.22.0
+ - bioconductor-dada2
+ - r-base {{ r_base }}
- r-optparse >=1.7.1
# openjdk is not a real dependency, but, r-base has a post-link and post-
# activation hook that calls R CMD javareconf, which pokes around for any
=====================================
q2_dada2/_denoise.py
=====================================
@@ -18,7 +18,7 @@ import pandas as pd
from q2_types.feature_data import DNAIterator
from q2_types.per_sample_sequences import (
- FastqGzFormat, SingleLanePerSampleSingleEndFastqDirFmt,
+ SingleLanePerSampleSingleEndFastqDirFmt,
SingleLanePerSamplePairedEndFastqDirFmt)
@@ -38,7 +38,7 @@ def run_commands(cmds, verbose=True):
def _check_featureless_table(fp):
with open(fp) as fh:
- # There is a header before the feature data
+ # There is a comment line and a header before the feature data
for line_count, _ in zip(range(1, 3), fh):
pass
if line_count < 2:
@@ -98,22 +98,33 @@ def _check_inputs(**kwargs):
% (param, arg, explanation))
-def _filepath_to_sample(fp):
+def _filepath_to_sample_single(fp):
return fp.rsplit('_', 4)[0]
+def _filepath_to_sample_paired(fp):
+ return fp.rsplit('_', 3)[0]
+
+
# Since `denoise-single` and `denoise-pyro` are almost identical, break out
# the bulk of the functionality to this helper util. Typechecking is assumed
# to have occurred in the calling functions, this is primarily for making
# sure that DADA2 is able to do what it needs to do.
-def _denoise_helper(biom_fp, track_fp, hashed_feature_ids):
+def _denoise_helper(biom_fp, track_fp, hashed_feature_ids, paired=False):
_check_featureless_table(biom_fp)
with open(biom_fp) as fh:
table = biom.Table.from_tsv(fh, None, None, None)
+ # If we used denoise_paired the barcode was already stripped from the
+ # filename to force the files to sort by id and pair up properly
+ # see https://github.com/qiime2/q2-dada2/issues/102
+ # and https://github.com/qiime2/q2-dada2/pull/125
+ filepath_to_sample = _filepath_to_sample_paired if paired \
+ else _filepath_to_sample_single
+
df = pd.read_csv(track_fp, sep='\t', index_col=0)
df.index.name = 'sample-id'
- df = df.rename(index=_filepath_to_sample)
+ df = df.rename(index=filepath_to_sample)
PASSED_FILTER = 'percentage of input passed filter'
NON_CHIMERIC = 'percentage of input non-chimeric'
@@ -149,7 +160,7 @@ def _denoise_helper(biom_fp, track_fp, hashed_feature_ids):
# Currently the sample IDs in DADA2 are the file names. We make
# them the sample id part of the filename here.
- sid_map = {id_: _filepath_to_sample(id_)
+ sid_map = {id_: filepath_to_sample(id_)
for id_ in table.ids(axis='sample')}
table.update_ids(sid_map, axis='sample', inplace=True)
# The feature IDs in DADA2 are the sequences themselves.
@@ -274,14 +285,21 @@ def denoise_paired(demultiplexed_seqs: SingleLanePerSamplePairedEndFastqDirFmt,
track_fp = os.path.join(temp_dir, 'track.tsv')
filt_forward = os.path.join(temp_dir, 'filt_f')
filt_reverse = os.path.join(temp_dir, 'filt_r')
+ manifest_df = demultiplexed_seqs.manifest.view(pd.DataFrame)
+
for fp in tmp_forward, tmp_reverse, filt_forward, filt_reverse:
os.mkdir(fp)
- for rp, view in demultiplexed_seqs.sequences.iter_views(FastqGzFormat):
- fp = str(view)
- if 'R1_001.fastq' in rp.name:
- qiime2.util.duplicate(fp, os.path.join(tmp_forward, rp.name))
- elif 'R2_001.fastq' in rp.name:
- qiime2.util.duplicate(fp, os.path.join(tmp_reverse, rp.name))
+ for _, fps in manifest_df.iterrows():
+ fwd_fp = fps['forward']
+ rev_fp = fps['reverse']
+
+ fwd_no_barcode = _remove_barcode(os.path.basename(fps['forward']))
+ rev_no_barcode = _remove_barcode(os.path.basename(fps['reverse']))
+
+ qiime2.util.duplicate(fwd_fp, os.path.join(tmp_forward,
+ fwd_no_barcode))
+ qiime2.util.duplicate(rev_fp, os.path.join(tmp_reverse,
+ rev_no_barcode))
cmd = ['run_dada.R',
'--input_directory', tmp_forward,
@@ -321,7 +339,19 @@ def denoise_paired(demultiplexed_seqs: SingleLanePerSamplePairedEndFastqDirFmt,
raise Exception("An error was encountered while running DADA2"
" in R (return code %d), please inspect stdout"
" and stderr to learn more." % e.returncode)
- return _denoise_helper(biom_fp, track_fp, hashed_feature_ids)
+
+ return _denoise_helper(biom_fp, track_fp, hashed_feature_ids,
+ paired=True)
+
+
+def _remove_barcode(filename):
+ cut = filename.rsplit('_', 3)
+ id_ = cut[0].rsplit('_', 1)[0]
+
+ cut = cut[1:]
+ cut.insert(0, id_)
+
+ return ('_'.join(cut))
def denoise_pyro(demultiplexed_seqs: SingleLanePerSampleSingleEndFastqDirFmt,
=====================================
q2_dada2/_examples.py
=====================================
@@ -0,0 +1,58 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2016-2022, QIIME 2 development team.
+#
+# Distributed under the terms of the Modified BSD License.
+#
+# The full license is in the file LICENSE, distributed with this software.
+# ----------------------------------------------------------------------------
+
+
+demux_single_url = \
+ 'https://data.qiime2.org/usage-examples/moving-pictures/demux.qza'
+
+demux_paired_url = \
+ 'https://data.qiime2.org/usage-examples/atacama-soils/demux-full.qza'
+
+
+def denoise_single(use):
+ demux_single = use.init_artifact_from_url('demux_single', demux_single_url)
+
+ rep_seqs, table_dada2, denoise_stats = use.action(
+ use.UsageAction('dada2', 'denoise_single'),
+ use.UsageInputs(
+ demultiplexed_seqs=demux_single,
+ trim_left=0,
+ trunc_len=120
+ ),
+ use.UsageOutputNames(
+ representative_sequences='representative_sequences',
+ table='table',
+ denoising_stats='denoising_stats'
+ )
+ )
+
+ rep_seqs.assert_output_type('FeatureData[Sequence]')
+ table_dada2.assert_output_type('FeatureTable[Frequency]')
+ denoise_stats.assert_output_type('SampleData[DADA2Stats]')
+
+
+def denoise_paired(use):
+ demux_paired = use.init_artifact_from_url('demux_paired', demux_paired_url)
+
+ rep_seqs, table_dada2, denoise_stats = use.action(
+ use.UsageAction('dada2', 'denoise_paired'),
+ use.UsageInputs(
+ demultiplexed_seqs=demux_paired,
+ trunc_len_f=150,
+ trunc_len_r=140,
+ ),
+ use.UsageOutputNames(
+ representative_sequences='representative_sequences',
+ table='table',
+ denoising_stats='denoising_stats'
+ )
+ )
+
+ rep_seqs.assert_output_type('FeatureData[Sequence]')
+ table_dada2.assert_output_type('FeatureTable[Frequency]')
+ denoise_stats.assert_output_type('SampleData[DADA2Stats]')
=====================================
q2_dada2/_version.py
=====================================
@@ -23,9 +23,9 @@ def get_keywords():
# setup.py/versioneer.py will grep for the variable names, so they must
# each be defined on a line of their own. _version.py will just call
# get_keywords().
- git_refnames = " (tag: 2022.8.0)"
- git_full = "dc9bdc5b0346353ebf9a63bc63a60bd45a3405b6"
- git_date = "2022-08-23 17:11:59 +0000"
+ git_refnames = " (HEAD -> master, tag: 2022.11.2)"
+ git_full = "0f6c02100a7d2c7775b026bd7204c7c762e0b942"
+ git_date = "2023-01-12 11:23:23 -0700"
keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
return keywords
=====================================
q2_dada2/assets/run_dada.R
=====================================
@@ -128,6 +128,16 @@
# Ex: 32
#
+# error handling -----------------
+options(error = function() {
+ sink(stderr())
+ on.exit(sink(NULL))
+ traceback(3)
+ if (!interactive()) {
+ q(status = 1)
+ }
+})
+
library("optparse")
cat(R.version$version.string, "\n")
@@ -222,7 +232,7 @@ minOverlap <- if(opt$min_overlap=='NULL') NULL else as.integer(opt$min_overlap)
poolMethod <- opt$pooling_method
chimeraMethod <- opt$chimera_method
minParentFold <- if(opt$min_parental_fold=='NULL') NULL else as.numeric(opt$min_parental_fold)
-allowOneOff <-if(opt$allow_one_off=='NULL') NULL else as.logical(opt$allow_one_off)
+allowOneOff <-if(opt$allow_one_off=='NULL') NULL else as.logical(opt$allow_one_off)
nthreads <- if(opt$num_threads=='NULL') NULL else as.integer(opt$num_threads)
nreads.learn <- if(opt$learn_min_reads=='NULL') NULL else as.integer(opt$learn_min_reads)
# The following args are not directly exposed to end users in q2-dada2,
@@ -256,7 +266,7 @@ if(!dir.exists(inp.dir)) {
if(length(unfilts) != length(unfiltsR)) {
errQuit("Different numbers of forward and reverse .fastq.gz files.")
}
-
+
}
}
@@ -311,7 +321,7 @@ if(primer.removed.dir!='NULL'){ #for CCS read analysis
out <- suppressWarnings(filterAndTrim(nop, filts, truncLen = truncLen, trimLeft = trimLeft,
maxEE = maxEE, truncQ = truncQ, rm.phix = FALSE,
multithread = multithread, maxLen = maxLen, minLen = minLen, minQ = 3))
-}else{
+}else{
filts <- file.path(filtered.dir, basename(unfilts))
if(inp.dirR!='NULL'){#for paired read analysis
filtsR <- file.path(filtered.dirR, basename(unfiltsR))
@@ -346,10 +356,10 @@ if(primer.removed.dir!='NULL'){#for CCS read analysis
multithread=multithread, BAND_SIZE=BAND_SIZE))
}else if(inp.dirR!='NULL'){#for paired read analysis
-
+
err <- suppressWarnings(learnErrors(filts, nreads=nreads.learn, multithread=multithread))
errR <- suppressWarnings(learnErrors(filtsR, nreads=nreads.learn, multithread=multithread))
-
+
}else{#for sinlge/pyro read analysis
err <- suppressWarnings(learnErrors(filts, nreads=nreads.learn, multithread=multithread,
HOMOPOLYMER_GAP_PENALTY=HOMOPOLYMER_GAP_PENALTY, BAND_SIZE=BAND_SIZE))
@@ -397,7 +407,7 @@ if(inp.dirR =='NULL'){#for CCS/sinlge/pyro read analysis
ddsR <- vector("list", length(filts))
mergers <- vector("list", length(filts))
cat("3) Denoise samples ")
-
+
for(j in seq(length(filts))) {
drpF <- derepFastq(filts[[j]])
ddsF[[j]] <- dada(drpF, err=err, multithread=multithread, verbose=FALSE)
@@ -431,7 +441,7 @@ if(inp.dirR =='NULL'){#for CCS/sinlge/pyro read analysis
cat("\n")
### \code copied from previous loop through samples in this script
}
-
+
### Now loop through and do merging
for(j in seq(length(filts))) {
drpF <- derepFastq(filts[[j]])
@@ -443,7 +453,7 @@ if(inp.dirR =='NULL'){#for CCS/sinlge/pyro read analysis
cat("\n")
# Make sequence table
seqtab <- makeSequenceTable(mergers)
-
+
}
@@ -493,4 +503,4 @@ write.table(seqtab.nochim, out.path, sep="\t",
row.names=TRUE, col.names=col.names, quote=FALSE)
saveRDS(seqtab.nochim, gsub("tsv", "rds", out.path)) ### TESTING
-q(status=0)
\ No newline at end of file
+q(status=0)
=====================================
q2_dada2/plugin_setup.py
=====================================
@@ -16,6 +16,7 @@ from q2_types.feature_table import FeatureTable, Frequency
import q2_dada2
from q2_dada2 import DADA2Stats, DADA2StatsFormat, DADA2StatsDirFmt
+import q2_dada2._examples as ex
_POOL_OPT = {'pseudo', 'independent'}
_CHIM_OPT = {'pooled', 'consensus', 'none'}
@@ -128,7 +129,10 @@ plugin.methods.register_function(
},
name='Denoise and dereplicate single-end sequences',
description=('This method denoises single-end sequences, dereplicates '
- 'them, and filters chimeras.')
+ 'them, and filters chimeras.'),
+ examples={
+ 'denoise_single': ex.denoise_single
+ }
)
@@ -254,7 +258,10 @@ plugin.methods.register_function(
},
name='Denoise and dereplicate paired-end sequences',
description=('This method denoises paired-end sequences, dereplicates '
- 'them, and filters chimeras.')
+ 'them, and filters chimeras.'),
+ examples={
+ 'denoise_paired': ex.denoise_paired
+ }
)
@@ -304,7 +311,7 @@ plugin.methods.register_function(
'longer than this value. If 0 is provided no reads will '
'be removed based on length.',
'pooling_method': 'The method used to pool samples for denoising. '
- '"independent": Samples are denoised indpendently. '
+ '"independent": Samples are denoised independently. '
'"pseudo": The pseudo-pooling method is used to '
'approximate pooling of samples. In short, samples '
'are denoised independently once, ASVs detected '
=====================================
q2_dada2/tests/data/mixed_barcodes_and_ids/MANIFEST
=====================================
@@ -0,0 +1,5 @@
+sample-id,filename,direction
+V130,V130_166_L001_R1_001.fastq.gz,forward
+V130,V130_742_L001_R2_001.fastq.gz,reverse
+V130_2,V130_2_167_L001_R1_001.fastq.gz,forward
+V130_2,V130_2_743_L001_R2_001.fastq.gz,reverse
=====================================
q2_dada2/tests/data/mixed_barcodes_and_ids/V130_166_L001_R1_001.fastq.gz
=====================================
Binary files /dev/null and b/q2_dada2/tests/data/mixed_barcodes_and_ids/V130_166_L001_R1_001.fastq.gz differ
=====================================
q2_dada2/tests/data/mixed_barcodes_and_ids/V130_2_167_L001_R1_001.fastq.gz
=====================================
Binary files /dev/null and b/q2_dada2/tests/data/mixed_barcodes_and_ids/V130_2_167_L001_R1_001.fastq.gz differ
=====================================
q2_dada2/tests/data/mixed_barcodes_and_ids/V130_2_743_L001_R2_001.fastq.gz
=====================================
Binary files /dev/null and b/q2_dada2/tests/data/mixed_barcodes_and_ids/V130_2_743_L001_R2_001.fastq.gz differ
=====================================
q2_dada2/tests/data/mixed_barcodes_and_ids/V130_742_L001_R2_001.fastq.gz
=====================================
Binary files /dev/null and b/q2_dada2/tests/data/mixed_barcodes_and_ids/V130_742_L001_R2_001.fastq.gz differ
=====================================
q2_dada2/tests/data/mixed_barcodes_and_ids/metadata.yml
=====================================
@@ -0,0 +1 @@
+{phred-offset: 33}
=====================================
q2_dada2/tests/data/single_feature.tsv
=====================================
@@ -1,2 +1,2 @@
#OTU ID L1S208 L1S257 L1S57 L1S76 L2S155 L2S175 L2S309 L2S357 L3S294 L3S313 L4S112 L4S63 L5S155 L5S174 L6S20 L6S68
-b32621bcd86cb99e846d8f6fee7c9ab8 43.0 49.0 75.0 40.0 0.0 5.0 0.0 0.0 0.0 5.0 5.0 0.0 0.0 0.0 0.0 0.0
+b32621bcd86cb99e846d8f6fee7c9ab8 43.0 49.0 75.0 40.0 0.0 5.0 0.0 0.0 0.0 5.0 5.0 0.0 0.0 0.0 0.0 0.0
\ No newline at end of file
=====================================
q2_dada2/tests/test_denoise.py
=====================================
@@ -24,6 +24,13 @@ def _sort_seqs(seqs):
return sorted(list(seqs), key=lambda x: x.metadata['id'])
+class TestExamples(TestPluginBase):
+ package = 'q2_dada2.tests'
+
+ def test_examples(self):
+ self.execute_examples()
+
+
class TestDenoiseSingle(TestPluginBase):
package = 'q2_dada2.tests'
@@ -73,6 +80,14 @@ class TestDenoiseSingle(TestPluginBase):
_sort_seqs(exp_rep_seqs))
self.assertEqual(md, exp_md)
+ def test_mixed_barcodes_and_ids(self):
+ demux_seqs = SingleLanePerSamplePairedEndFastqDirFmt(
+ self.get_data_path('mixed_barcodes_and_ids'), 'r')
+
+ denoise_paired(demux_seqs, 150, 150)
+
+ self.assertTrue(True)
+
def test_all_reads_filtered(self):
with self.assertRaisesRegex(ValueError, 'filter'):
denoise_single(self.demux_seqs, 10000)
=====================================
setup.py
=====================================
@@ -26,6 +26,7 @@ setup(
'q2_dada2': ['citations.bib'],
'q2_dada2.tests': ['data/*',
'data/expected/*',
+ 'data/mixed_barcodes_and_ids/*',
'data/underscore_samples/*',
'data/sample_seqs_single/*',
'data/sample_seqs_ccs/*',
View it on GitLab: https://salsa.debian.org/med-team/q2-dada2/-/commit/7214e78ba98199cce46aab1de4988106c44ff56c
--
View it on GitLab: https://salsa.debian.org/med-team/q2-dada2/-/commit/7214e78ba98199cce46aab1de4988106c44ff56c
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20230112/4d06c4d8/attachment-0001.htm>
More information about the debian-med-commit
mailing list