[med-svn] [Git][med-team/flexbar][upstream] New upstream version 3.2.0
Andreas Tille
gitlab at salsa.debian.org
Tue May 22 15:21:41 BST 2018
Andreas Tille pushed to branch upstream at Debian Med / flexbar
Commits:
9856de0a by Andreas Tille at 2018-05-22T15:54:34+02:00
New upstream version 3.2.0
- - - - -
13 changed files:
- CMakeLists.txt
- README.md
- src/Flexbar.cpp
- src/Flexbar.h
- src/FlexbarTypes.h
- src/LoadFasta.h
- src/Options.h
- src/PairedAlign.h
- src/PairedInput.h
- src/PairedOutput.h
- src/SeqAlign.h
- src/SeqAlignAlgo.h
- src/SeqInput.h
Changes:
=====================================
CMakeLists.txt
=====================================
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -2,7 +2,7 @@ cmake_minimum_required( VERSION 2.8.2 )
project( FLEXBAR )
-set( SEQAN_APP_VERSION "3.1.0" )
+set( SEQAN_APP_VERSION "3.2.0" )
include_directories( ${FLEXBAR_SOURCE_DIR}/include )
# link_directories( ${FLEXBAR_SOURCE_DIR}/lib )
=====================================
README.md
=====================================
--- a/README.md
+++ b/README.md
@@ -28,20 +28,20 @@ Flexbar source code as well as binaries for Linux and Mac OS can be downloaded o
Make sure that `cmake` is available, as well as development and runtime files of the TBB library 4.0 or later (Intel Threading Building Blocks). For example on Debian systems, install the packages `libtbb-dev` and `libtbb2`. Furthermore, the SeqAn library and a compiler that supports C++14 is required:
* Get SeqAn library version 2.2.0 [here](https://github.com/seqan/seqan/releases/download/seqan-v2.2.0/seqan-library-2.2.0.tar.xz)
-* Download Flexbar 3.1 source code [release](https://github.com/seqan/flexbar/releases)
+* Download Flexbar 3.2 source code [release](https://github.com/seqan/flexbar/releases)
Decompress both files:
- tar xzf flexbar-3.1.0.tar.gz
+ tar xzf flexbar-3.2.0.tar.gz
tar xJf seqan-library-2.2.0.tar.xz
Move SeqAn include folder to Flexbar:
- mv seqan-library-2.2.0/include flexbar-3.1.0
+ mv seqan-library-2.2.0/include flexbar-3.2.0
Use these commands for building:
- cd flexbar-3.1.0
+ cd flexbar-3.2.0
cmake .
make
@@ -77,9 +77,9 @@ In this example, reads that are barcoded on left side are demultiplexed by speci
flexbar -r reads.fq -t target -b brc.fa -be LTAIL -a adp.fa
-The second example shows how to trim compressed reads based on their quality scores in illumina version 1.8 format. Afterwards, provided adapters are removed in right trim-end mode, only if the overlap of adapter and read has at least length five with at most 40% errors.
+The second example shows how to trim compressed reads based on their quality scores in illumina version 1.8 format. Afterwards, provided adapters are removed in right trim-end mode, only if the overlap of adapter and read has at least length 5 with at most 20% errors.
- flexbar -r reads.fq.gz -q TAIL -qf i1.8 -a adp.fa -ao 5 -at 0.4
+ flexbar -r reads.fq.gz -q TAIL -qf i1.8 -a adp.fa -ao 5 -at 0.2
For further examples visit the [manual](https://github.com/seqan/flexbar/wiki) page.
=====================================
src/Flexbar.cpp
=====================================
--- a/src/Flexbar.cpp
+++ b/src/Flexbar.cpp
@@ -2,7 +2,9 @@
Flexbar - flexible barcode and adapter removal
- Version 3.1.0
+ Version 3.2.0
+
+ BSD 3-Clause License
uses SeqAn library release 2.2.0
and TBB library 4.0 or later
@@ -27,8 +29,8 @@ int main(int argc, const char* argv[]){
using namespace std;
using namespace seqan;
- const string version = "3.1";
- const string date = "April 2018";
+ const string version = "3.2";
+ const string date = "May 2018";
ArgumentParser parser("flexbar");
=====================================
src/Flexbar.h
=====================================
--- a/src/Flexbar.h
+++ b/src/Flexbar.h
@@ -98,17 +98,18 @@ void loadAdapters(Options &o, const bool secondSet, const bool useAdapterFile){
}
}
else{
- TBar bar;
- bar.id = "cmdline";
- bar.seq = o.adapterSeq;
- o.adapters.push_back(bar);
-
- if(o.revCompAdapter){
+ if(o.rcMode == RCOFF || o.rcMode == RCON){
+ TBar bar;
+ bar.id = "cmdline";
+ bar.seq = o.adapterSeq;
+ o.adapters.push_back(bar);
+ }
+ if(o.rcMode == RCON || o.rcMode == RCONLY){
TSeqStr adapterSeqRC = o.adapterSeq;
seqan::reverseComplement(adapterSeqRC);
TBar barRC;
- barRC.id = "cmdline revcomp";
+ barRC.id = "cmdline_rc";
barRC.seq = adapterSeqRC;
o.adapters.push_back(barRC);
}
=====================================
src/FlexbarTypes.h
=====================================
--- a/src/FlexbarTypes.h
+++ b/src/FlexbarTypes.h
@@ -11,17 +11,21 @@ class SeqRead {
TSeqStr seq;
TString id, qual, umi;
+ bool rmAdapter, rmAdapterRC;
+
SeqRead(TSeqStr& sequence, TString& seqID) :
seq(sequence),
id(seqID),
- umi(""){
+ rmAdapter(false),
+ rmAdapterRC(false){
}
SeqRead(TSeqStr& sequence, TString& seqID, TString& quality) :
seq(sequence),
id(seqID),
qual(quality),
- umi(""){
+ rmAdapter(false),
+ rmAdapterRC(false){
}
};
@@ -60,7 +64,7 @@ struct AlignResults{
int overlapLength, queryLength, tailLength;
float allowedErrors;
- TSeqStr randTag;
+ TSeqStr umiTag;
std::string alString;
AlignResults(){}
@@ -116,21 +120,35 @@ namespace flexbar{
FString id;
FSeqStr seq;
+ bool rcAdapter;
tbb::atomic<unsigned long> rmOverlap, rmFull;
TBar() :
rmOverlap(0),
- rmFull(0){
+ rmFull(0),
+ rcAdapter(false){
}
};
- enum ComputeCycle {
- PRELOAD,
- COMPUTE,
- RESULTS
- };
+ enum RevCompMode {
+ RCOFF,
+ RCON,
+ RCONLY
+ };
+
+ enum AlignmentMode {
+ ALIGNALL,
+ ALIGNRCOFF,
+ ALIGNRC
+ };
+
+ enum ComputeCycle {
+ PRELOAD,
+ COMPUTE,
+ RESULTS
+ };
enum LogAlign {
NONE,
=====================================
src/LoadFasta.h
=====================================
--- a/src/LoadFasta.h
+++ b/src/LoadFasta.h
@@ -12,16 +12,16 @@ private:
std::ostream *out;
tbb::concurrent_vector<flexbar::TBar> bars;
- bool m_revComp, m_isAdapter;
+ const bool m_isAdapter;
+ const flexbar::RevCompMode m_rcMode;
public:
LoadFasta(const Options &o, const bool isAdapter) :
out(o.out),
+ m_rcMode(o.rcMode),
m_isAdapter(isAdapter){
-
- m_revComp = o.revCompAdapter && isAdapter;
};
@@ -64,21 +64,24 @@ public:
}
else idMap[ids[i]] = 1;
- TBar bar;
- bar.id = ids[i];
- bar.seq = seqs[i];
- bars.push_back(bar);
+ if(! m_isAdapter || m_rcMode == RCOFF || m_rcMode == RCON){
+ TBar bar;
+ bar.id = ids[i];
+ bar.seq = seqs[i];
+ bars.push_back(bar);
+ }
- if(m_revComp){
- TSeqStr seq = seqs[i];
+ if(m_isAdapter && (m_rcMode == RCON || m_rcMode == RCONLY)){
TString id = ids[i];
+ TSeqStr seq = seqs[i];
- append(id, " revcomp");
+ append(id, "_rc");
seqan::reverseComplement(seq);
TBar barRC;
- barRC.id = id;
- barRC.seq = seq;
+ barRC.id = id;
+ barRC.seq = seq;
+ barRC.rcAdapter = true;
bars.push_back(barRC);
}
}
=====================================
src/Options.h
=====================================
--- a/src/Options.h
+++ b/src/Options.h
@@ -17,20 +17,21 @@ struct Options{
std::string readsFile, readsFile2, barReadsFile;
std::string barcodeFile, adapterFile, barcode2File, adapter2File;
std::string adapterSeq, targetName, logAlignStr, outCompression;
- std::string trimLeftNucs, trimRightNucs;
+ std::string htrimLeft, htrimRight;
- bool isPaired, useAdapterFile, useNumberTag, useRemovalTag, randTag, logStdout;
+ bool isPaired, useAdapterFile, useNumberTag, useRemovalTag, umiTags, logStdout;
bool switch2Fasta, writeUnassigned, writeSingleReads, writeSingleReadsP, writeLengthDist;
- bool useStdin, useStdout, relaxRegion, revCompAdapter, qtrimPostRm;
+ bool useStdin, useStdout, relaxRegion, useRcTrimEnd, qtrimPostRm;
+ bool interleavedInput, htrimAdapterRm, htrimMaxFirstOnly;
int cutLen_begin, cutLen_end, cutLen_read, a_tail_len, b_tail_len;
- int qtrimThresh, qtrimWinSize, a_overhang, hpsMinLength;
+ int qtrimThresh, qtrimWinSize, a_overhang, htrimMinLength, htrimMaxLength, a_cycles;
int maxUncalled, min_readLen, a_min_overlap, b_min_overlap, nThreads, bundleSize;
int match, mismatch, gapCost, b_match, b_mismatch, b_gapCost;
- float a_errorRate, b_errorRate;
+ float a_errorRate, b_errorRate, h_errorRate;
- flexbar::TrimEnd end, b_end;
+ flexbar::TrimEnd a_end, b_end, arc_end;
flexbar::FileFormat format;
flexbar::QualityType qual;
flexbar::QualTrimType qTrim;
@@ -39,6 +40,7 @@ struct Options{
flexbar::RunType runType;
flexbar::BarcodeDetect barDetect;
flexbar::AdapterRemoval adapRm;
+ flexbar::RevCompMode rcMode;
tbb::concurrent_vector<flexbar::TBar> barcodes, adapters, barcodes2, adapters2;
@@ -54,8 +56,8 @@ struct Options{
barcode2File = "";
adapter2File = "";
outCompression = "";
- trimLeftNucs = "";
- trimRightNucs = "";
+ htrimLeft = "";
+ htrimRight = "";
isPaired = false;
useAdapterFile = false;
@@ -67,23 +69,25 @@ struct Options{
writeLengthDist = false;
switch2Fasta = false;
logStdout = false;
- randTag = false;
+ umiTags = false;
+ interleavedInput = false;
useStdin = false;
useStdout = false;
relaxRegion = false;
- revCompAdapter = false;
+ useRcTrimEnd = false;
qtrimPostRm = false;
-
- cutLen_begin = 0;
- cutLen_end = 0;
- cutLen_read = 0;
- qtrimThresh = 0;
- qtrimWinSize = 0;
- a_tail_len = 0;
- b_tail_len = 0;
- b_min_overlap = 0;
-
- a_errorRate = 0.1;
+ htrimAdapterRm = false;
+ htrimMaxFirstOnly = false;
+
+ cutLen_begin = 0;
+ cutLen_end = 0;
+ cutLen_read = 0;
+ qtrimThresh = 0;
+ qtrimWinSize = 0;
+ a_tail_len = 0;
+ b_tail_len = 0;
+ b_min_overlap = 0;
+ htrimMaxLength = 0;
format = flexbar::FASTA;
qual = flexbar::SANGER;
@@ -92,6 +96,7 @@ struct Options{
cmprsType = flexbar::UNCOMPRESSED;
barDetect = flexbar::BOFF;
adapRm = flexbar::AOFF;
+ rcMode = flexbar::RCOFF;
}
};
@@ -117,7 +122,7 @@ const std::string getFlexbarBanner(const seqan::CharString version){
const std::string getFlexbarCitation(){
- return "Matthias Dodt, Johannes T. Roehr, Rina Ahmed, Christoph Dieterich:\nFlexbar - flexible barcode and adapter processing for next-generation\nsequencing platforms. Biology 2012, 1(3):895-905.\n";
+ return "Johannes T. Roehr, Christoph Dieterich, Knut Reinert:\nFlexbar 3.0 - SIMD and multicore parallelization. Bioinformatics 2017.\n\nMatthias Dodt, Johannes T. Roehr, Rina Ahmed, Christoph Dieterich:\nFlexbar - flexible barcode and adapter processing for next-generation\nsequencing platforms. Biology 2012.\n";
}
@@ -137,10 +142,9 @@ void defineOptions(seqan::ArgumentParser &parser, const std::string version, con
// setCitation(parser, "\n\n" + getFlexbarCitation());
- // setAppName(parser, "");
- // setShortCopyright(parser, "");
+ // setAppName(parser, "Flexbar");
+ // setShortCopyright(parser, "BSD 3-Clause License");
// setLongCopyright(parser, "");
- // ARG::OUTPUTPREFIX
setShortDescription(parser, "flexible barcode and adapter removal");
@@ -148,23 +152,24 @@ void defineOptions(seqan::ArgumentParser &parser, const std::string version, con
addOption(parser, ArgParseOption("hm", "man-help", "Print advanced options as man document."));
addOption(parser, ArgParseOption("v", "versions", "Print Flexbar and SeqAn version numbers."));
- addOption(parser, ArgParseOption("c", "cite", "Show program reference for citation."));
+ addOption(parser, ArgParseOption("c", "cite", "Show program references for citation."));
addSection(parser, "Basic options");
addOption(parser, ArgParseOption("n", "threads", "Number of threads to employ.", ARG::INTEGER));
addOption(parser, ArgParseOption("N", "bundle", "Number of read pairs per thread.", ARG::INTEGER));
- addOption(parser, ArgParseOption("t", "target", "Prefix for output file names or paths.", ARG::STRING));
+ addOption(parser, ArgParseOption("t", "target", "Prefix for output file names or paths.", ARG::OUTPUT_PREFIX));
addOption(parser, ArgParseOption("r", "reads", "Fasta/q file or stdin (-) with reads that may contain barcodes.", ARG::INPUT_FILE));
addOption(parser, ArgParseOption("p", "reads2", "Second input file of paired reads, gz and bz2 files supported.", ARG::INPUT_FILE));
+ addOption(parser, ArgParseOption("i", "interleaved", "Interleaved format for first input set with paired reads."));
addSection(parser, "Barcode detection");
addOption(parser, ArgParseOption("b", "barcodes", "Fasta file with barcodes for demultiplexing, may contain N.", ARG::INPUT_FILE));
addOption(parser, ArgParseOption("b2", "barcodes2", "Additional barcodes file for second read set in paired mode.", ARG::INPUT_FILE));
addOption(parser, ArgParseOption("br", "barcode-reads", "Fasta/q file containing separate barcode reads for detection.", ARG::INPUT_FILE));
- addOption(parser, ArgParseOption("be", "barcode-trim-end", "Type of detection, see section trim-end modes.", ARG::STRING));
- addOption(parser, ArgParseOption("bn", "barcode-tail-length", "Region size in tail trim-end modes. Default: barcode length.", ARG::INTEGER));
addOption(parser, ArgParseOption("bo", "barcode-min-overlap", "Minimum overlap of barcode and read. Default: barcode length.", ARG::INTEGER));
addOption(parser, ArgParseOption("bt", "barcode-error-rate", "Error rate threshold for mismatches and gaps.", ARG::DOUBLE));
+ addOption(parser, ArgParseOption("be", "barcode-trim-end", "Type of detection, see section trim-end modes.", ARG::STRING));
+ addOption(parser, ArgParseOption("bn", "barcode-tail-length", "Region size in tail trim-end modes. Default: barcode length.", ARG::INTEGER));
addOption(parser, ArgParseOption("bk", "barcode-keep", "Keep barcodes within reads instead of removal."));
addOption(parser, ArgParseOption("bu", "barcode-unassigned", "Include unassigned reads in output generation."));
addOption(parser, ArgParseOption("bm", "barcode-match", "Alignment match score.", ARG::INTEGER));
@@ -175,14 +180,17 @@ void defineOptions(seqan::ArgumentParser &parser, const std::string version, con
addOption(parser, ArgParseOption("a", "adapters", "Fasta file with adapters for removal that may contain N.", ARG::INPUT_FILE));
addOption(parser, ArgParseOption("a2", "adapters2", "File with extra adapters for second read set in paired mode.", ARG::INPUT_FILE));
addOption(parser, ArgParseOption("as", "adapter-seq", "Single adapter sequence as alternative to adapters option.", ARG::STRING));
- addOption(parser, ArgParseOption("ar", "adapter-read-set", "Consider only single read set for adapters.", ARG::STRING));
- addOption(parser, ArgParseOption("ac", "adapter-revcomp", "Consider also reverse complement of each adapter in search."));
+ // addOption(parser, ArgParseOption("ap", "adapter-pair-overlap", "Overlap detection for paired reads.", ARG::STRING));
+ addOption(parser, ArgParseOption("ao", "adapter-min-overlap", "Minimum overlap of adapter and read for removal.", ARG::INTEGER));
+ addOption(parser, ArgParseOption("at", "adapter-error-rate", "Error rate threshold for mismatches and gaps.", ARG::DOUBLE));
addOption(parser, ArgParseOption("ae", "adapter-trim-end", "Type of removal, see section trim-end modes.", ARG::STRING));
addOption(parser, ArgParseOption("an", "adapter-tail-length", "Region size for tail trim-end modes. Default: adapter length.", ARG::INTEGER));
// addOption(parser, ArgParseOption("ah", "adapter-overhang", "Overhang at read ends in right and left modes.", ARG::INTEGER));
- addOption(parser, ArgParseOption("ad", "adapter-relaxed", "Skip restriction to pass read ends in right and left modes."));
- addOption(parser, ArgParseOption("ao", "adapter-min-overlap", "Minimum overlap of adapter and read for removal.", ARG::INTEGER));
- addOption(parser, ArgParseOption("at", "adapter-error-rate", "Error rate threshold for mismatches and gaps.", ARG::DOUBLE));
+ addOption(parser, ArgParseOption("ax", "adapter-relaxed", "Skip restriction to pass read ends in right and left modes."));
+ addOption(parser, ArgParseOption("ac", "adapter-revcomp", "Include reverse complements of adapters.", ARG::STRING));
+ addOption(parser, ArgParseOption("ad", "adapter-revcomp-end", "Use different trim-end for reverse complement of adapters.", ARG::STRING));
+ addOption(parser, ArgParseOption("ar", "adapter-read-set", "Consider only single read set for adapters.", ARG::STRING));
+ addOption(parser, ArgParseOption("ay", "adapter-cycles", "Number of adapter removal cycles.", ARG::INTEGER));
addOption(parser, ArgParseOption("am", "adapter-match", "Alignment match score.", ARG::INTEGER));
addOption(parser, ArgParseOption("ai", "adapter-mismatch", "Alignment mismatch score.", ARG::INTEGER));
addOption(parser, ArgParseOption("ag", "adapter-gap", "Alignment gap score.", ARG::INTEGER));
@@ -191,9 +199,6 @@ void defineOptions(seqan::ArgumentParser &parser, const std::string version, con
addOption(parser, ArgParseOption("u", "max-uncalled", "Allowed uncalled bases N for each read.", ARG::INTEGER));
addOption(parser, ArgParseOption("x", "pre-trim-left", "Trim given number of bases on 5' read end before detection.", ARG::INTEGER));
addOption(parser, ArgParseOption("y", "pre-trim-right", "Trim specified number of bases on 3' end prior to detection.", ARG::INTEGER));
- addOption(parser, ArgParseOption("X", "post-trim-left-hps", "Trim certain homopolymers on the left read end after removal.", ARG::STRING));
- addOption(parser, ArgParseOption("Y", "post-trim-right-hps", "Trim certain homopolymers on the right read end after removal.", ARG::STRING));
- addOption(parser, ArgParseOption("Z", "post-trim-hps-length", "Minimum length of homopolymers at read ends.", ARG::INTEGER));
addOption(parser, ArgParseOption("k", "post-trim-length", "Trim to specified read length from 3' end after removal.", ARG::INTEGER));
addOption(parser, ArgParseOption("m", "min-read-length", "Minimum read length to remain after removal.", ARG::INTEGER));
@@ -201,10 +206,18 @@ void defineOptions(seqan::ArgumentParser &parser, const std::string version, con
addOption(parser, ArgParseOption("q", "qtrim", "Quality-based trimming mode.", ARG::STRING));
addOption(parser, ArgParseOption("qf", "qtrim-format", "Quality format.", ARG::STRING));
addOption(parser, ArgParseOption("qt", "qtrim-threshold", "Minimum quality as threshold for trimming.", ARG::INTEGER));
- // addOption(parser, ArgParseOption("qm", "qtrim-win-mean", "Different threshold for min mean quality of window.", ARG::INTEGER));
addOption(parser, ArgParseOption("qw", "qtrim-win-size", "Region size for sliding window approach.", ARG::INTEGER));
addOption(parser, ArgParseOption("qa", "qtrim-post-removal", "Perform quality-based trimming after removal steps."));
+ addSection(parser, "Trimming of homopolymers");
+ addOption(parser, ArgParseOption("X", "htrim-left", "Trim certain homopolymers on left read end after removal.", ARG::STRING));
+ addOption(parser, ArgParseOption("Y", "htrim-right", "Trim certain homopolymers on right read end after removal.", ARG::STRING));
+ addOption(parser, ArgParseOption("M", "htrim-min-length", "Minimum length of homopolymers at read ends.", ARG::INTEGER));
+ addOption(parser, ArgParseOption("L", "htrim-max-length", "Maximum length of homopolymers at read ends.", ARG::INTEGER));
+ addOption(parser, ArgParseOption("F", "htrim-max-first", "Max length of homopolymers only for first type."));
+ addOption(parser, ArgParseOption("T", "htrim-error-rate", "Error rate threshold for mismatches.", ARG::DOUBLE));
+ addOption(parser, ArgParseOption("A", "htrim-adapter", "Trim only in case of adapter removal on same side."));
+
addSection(parser, "Output selection");
addOption(parser, ArgParseOption("f", "fasta-output", "Prefer non-quality format fasta for output."));
addOption(parser, ArgParseOption("z", "zip-output", "Direct compression of output files.", ARG::STRING));
@@ -218,7 +231,7 @@ void defineOptions(seqan::ArgumentParser &parser, const std::string version, con
addOption(parser, ArgParseOption("o", "stdout-log", "Write statistics to console instead of target log file."));
addOption(parser, ArgParseOption("g", "removal-tags", "Tag reads that are subject to adapter or barcode removal."));
addOption(parser, ArgParseOption("e", "number-tags", "Replace read tags by ascending number to save space."));
- addOption(parser, ArgParseOption("i", "umi-tags", "Capture UMIs in reads at barcode or adapter N positions."));
+ addOption(parser, ArgParseOption("d", "umi-tags", "Capture UMIs in reads at barcode or adapter N positions."));
hideOption(parser, "version");
@@ -232,23 +245,27 @@ void defineOptions(seqan::ArgumentParser &parser, const std::string version, con
setAdvanced(parser, "barcode-gap");
setAdvanced(parser, "adapter-revcomp");
+ setAdvanced(parser, "adapter-revcomp-end");
setAdvanced(parser, "adapter-tail-length");
// setAdvanced(parser, "adapter-overhang");
setAdvanced(parser, "adapter-relaxed");
setAdvanced(parser, "adapter-read-set");
+ setAdvanced(parser, "adapter-cycles");
setAdvanced(parser, "adapter-match");
setAdvanced(parser, "adapter-mismatch");
setAdvanced(parser, "adapter-gap");
- setAdvanced(parser, "post-trim-left-hps");
- setAdvanced(parser, "post-trim-right-hps");
- setAdvanced(parser, "post-trim-hps-length");
setAdvanced(parser, "post-trim-length");
setAdvanced(parser, "qtrim-win-size");
setAdvanced(parser, "qtrim-post-removal");
+ setAdvanced(parser, "htrim-left");
+ setAdvanced(parser, "htrim-max-length");
+ setAdvanced(parser, "htrim-max-first");
+ setAdvanced(parser, "htrim-adapter");
setAdvanced(parser, "man-help");
setAdvanced(parser, "bundle");
+ setAdvanced(parser, "interleaved");
setAdvanced(parser, "stdout-reads");
setAdvanced(parser, "length-dist");
setAdvanced(parser, "single-reads-paired");
@@ -294,7 +311,9 @@ void defineOptions(seqan::ArgumentParser &parser, const std::string version, con
setValidValues(parser, "qtrim-format", "sanger solexa i1.3 i1.5 i1.8");
setValidValues(parser, "align-log", "ALL MOD TAB");
setValidValues(parser, "zip-output", "GZ BZ2");
+ // setValidValues(parser, "adapter-pair-overlap", "ON ONLY");
setValidValues(parser, "adapter-read-set", "1 2");
+ setValidValues(parser, "adapter-revcomp", "ON ONLY");
setDefaultValue(parser, "target", "flexbarOut");
setDefaultValue(parser, "threads", "1");
@@ -302,7 +321,6 @@ void defineOptions(seqan::ArgumentParser &parser, const std::string version, con
setDefaultValue(parser, "max-uncalled", "0");
setDefaultValue(parser, "min-read-length", "18");
- setDefaultValue(parser, "post-trim-hps-length", "3");
setDefaultValue(parser, "barcode-trim-end", "LTAIL");
setDefaultValue(parser, "barcode-error-rate", "0.0");
@@ -313,6 +331,7 @@ void defineOptions(seqan::ArgumentParser &parser, const std::string version, con
setDefaultValue(parser, "adapter-trim-end", "RIGHT");
setDefaultValue(parser, "adapter-min-overlap", "3");
setDefaultValue(parser, "adapter-error-rate", "0.1");
+ setDefaultValue(parser, "adapter-cycles", "1");
// setDefaultValue(parser, "adapter-overhang", "0");
setDefaultValue(parser, "adapter-match", "1");
setDefaultValue(parser, "adapter-mismatch", "-1");
@@ -321,6 +340,8 @@ void defineOptions(seqan::ArgumentParser &parser, const std::string version, con
setDefaultValue(parser, "qtrim-threshold", "20");
setDefaultValue(parser, "qtrim-win-size", "5");
+ setDefaultValue(parser, "htrim-min-length", "3");
+ setDefaultValue(parser, "htrim-error-rate", "0.1");
addTextSection(parser, "TRIM-END MODES");
addText(parser._toolDoc, "\\fBANY:\\fP longer side of read remains after removal of overlap", false);
@@ -331,7 +352,7 @@ void defineOptions(seqan::ArgumentParser &parser, const std::string version, con
addTextSection(parser, "EXAMPLES");
addText(parser._toolDoc, "\\fBflexbar\\fP \\fB-r\\fP reads.fq \\fB-t\\fP target \\fB-b\\fP brc.fa \\fB-be\\fP LTAIL \\fB-a\\fP adp.fa", false);
- addText(parser._toolDoc, "\\fBflexbar\\fP \\fB-r\\fP reads.fq.gz \\fB-q\\fP TAIL \\fB-qf\\fP i1.8 \\fB-a\\fP adp.fa \\fB-ao\\fP 5 \\fB-at\\fP 0.4");
+ addText(parser._toolDoc, "\\fBflexbar\\fP \\fB-r\\fP reads.fq.gz \\fB-q\\fP TAIL \\fB-qf\\fP i1.8 \\fB-a\\fP adp.fa \\fB-ao\\fP 5 \\fB-at\\fP 0.2");
}
@@ -458,9 +479,19 @@ void loadOptions(Options &o, seqan::ArgumentParser &parser){
getOptionValue(o.nThreads, parser, "threads");
*out << "Number of threads: " << o.nThreads << endl;
+ if(o.nThreads < 1){
+ cerr << "\n" << "Number of threads should be 1 at least.\n" << endl;
+ exit(1);
+ }
+
getOptionValue(o.bundleSize, parser, "bundle");
*out << "Bundled fragments: " << o.bundleSize << endl << endl;
+ if(o.bundleSize < 1){
+ cerr << "\n" << "Bundle size should be 1 at least.\n" << endl;
+ exit(1);
+ }
+
getOptionValue(o.targetName, parser, "target");
*out << "Target name: " << o.targetName << endl;
@@ -481,7 +512,20 @@ void loadOptions(Options &o, seqan::ArgumentParser &parser){
o.runType = SINGLE;
+
+ if(isSet(parser, "interleaved")){
+ *out << "Interleaved reads: on (paired run)" << endl;
+ o.runType = PAIRED;
+ o.isPaired = true;
+ o.interleavedInput = true;
+ }
+
if(isSet(parser, "reads2")){
+
+ if(o.interleavedInput){
+ cerr << "\n" << "Please specify either interleaved reads or second reads file.\n" << endl;
+ exit(1);
+ }
getOptionValue(o.readsFile2, parser, "reads2");
*out << "Reads file 2: " << o.readsFile2 << " (paired run)" << endl;
o.runType = PAIRED;
@@ -491,7 +535,7 @@ void loadOptions(Options &o, seqan::ArgumentParser &parser){
checkInputType(o.readsFile2, fformat, false);
if(o.format != fformat){
- cerr << "\n\n" << "First and second reads file do not have same format.\n" << endl;
+ cerr << "\n" << "First and second reads file do not have same format.\n" << endl;
exit(1);
}
}
@@ -509,7 +553,7 @@ void loadOptions(Options &o, seqan::ArgumentParser &parser){
checkInputType(o.barReadsFile, fformat, false);
if(o.format != fformat){
- cerr << "\n\n" << "Barcode reads file does not have same format as reads.\n" << endl;
+ cerr << "\n" << "Barcode reads file does not have same format as reads.\n" << endl;
exit(1);
}
@@ -570,19 +614,6 @@ void loadOptions(Options &o, seqan::ArgumentParser &parser){
*out << "pre-trim-right: " << o.cutLen_end << endl;
}
- if(isSet(parser, "post-trim-left-hps")){
- getOptionValue(o.trimLeftNucs, parser, "post-trim-left-hps");
- *out << "post-trim-left-hps: " << o.trimLeftNucs << endl;
- }
-
- if(isSet(parser, "post-trim-right-hps")){
- getOptionValue(o.trimRightNucs, parser, "post-trim-right-hps");
- *out << "post-trim-right-hps: " << o.trimRightNucs << endl;
- }
-
- getOptionValue(o.hpsMinLength, parser, "post-trim-hps-length");
- *out << "post-trim-hps-length: " << o.hpsMinLength << endl;
-
if(isSet(parser, "post-trim-length")){
getOptionValue(o.cutLen_read, parser, "post-trim-length");
*out << "post-trim-length: " << o.cutLen_read << endl;
@@ -596,6 +627,11 @@ void loadOptions(Options &o, seqan::ArgumentParser &parser){
exit(1);
}
+ if(o.cutLen_read != 0 && o.cutLen_read < o.min_readLen){
+ o.cutLen_read = 0;
+ cerr << "\nOption post-trim-length omitted, as it is shorter than min read length.\n" << endl;
+ }
+
// quality-based trimming
@@ -624,7 +660,7 @@ void loadOptions(Options &o, seqan::ArgumentParser &parser){
*out << "qtrim-format: " << quality << endl;
}
else{
- cerr << "\n\n" << "Specify qtrim-format for quality-based trimming.\n" << endl;
+ cerr << "\n" << "Specify qtrim-format for quality-based trimming.\n" << endl;
exit(1);
}
@@ -658,6 +694,42 @@ void loadOptions(Options &o, seqan::ArgumentParser &parser){
}
+ // trimming of homopolymers
+
+ if(isSet(parser, "htrim-left") || isSet(parser, "htrim-right")){
+
+ if(isSet(parser, "htrim-left")){
+ getOptionValue(o.htrimLeft, parser, "htrim-left");
+ *out << "htrim-left: " << o.htrimLeft << endl;
+ }
+
+ if(isSet(parser, "htrim-right")){
+ getOptionValue(o.htrimRight, parser, "htrim-right");
+ *out << "htrim-right: " << o.htrimRight << endl;
+ }
+
+ if(isSet(parser, "htrim-max-length")){
+ getOptionValue(o.htrimMaxLength, parser, "htrim-max-length");
+ *out << "htrim-max-length: " << o.htrimMaxLength << endl;
+
+ if(isSet(parser, "htrim-max-first")){
+ *out << "htrim-max-first: on" << endl;
+ o.htrimMaxFirstOnly = true;
+ }
+ }
+
+ getOptionValue(o.htrimMinLength, parser, "htrim-min-length");
+ *out << "htrim-min-length: " << o.htrimMinLength << endl;
+
+ getOptionValue(o.h_errorRate, parser, "htrim-error-rate");
+ *out << "htrim-error-rate: " << o.h_errorRate << endl;
+
+ if(isSet(parser, "htrim-adapter")){
+ *out << "htrim-adapter: on" << endl;
+ o.htrimAdapterRm = true;
+ }
+ }
+
// output, logging and tagging options
if(isSet(parser, "align-log")){
@@ -692,7 +764,7 @@ void loadOptions(Options &o, seqan::ArgumentParser &parser){
if(isSet(parser, "length-dist")) o.writeLengthDist = true;
if(isSet(parser, "number-tags")) o.useNumberTag = true;
if(isSet(parser, "removal-tags")) o.useRemovalTag = true;
- if(isSet(parser, "umi-tags")) o.randTag = true;
+ if(isSet(parser, "umi-tags")) o.umiTags = true;
*out << endl;
@@ -710,7 +782,7 @@ void loadOptions(Options &o, seqan::ArgumentParser &parser){
else if(b_trim_end == "LTAIL") o.b_end = LTAIL;
else if(b_trim_end == "RTAIL") o.b_end = RTAIL;
else{
- cerr << "Specified barcode trim-end is unknown!\n" << endl;
+ cerr << "\nSpecified barcode trim-end is unknown.\n" << endl;
exit(1);
}
*out << "barcode-trim-end: " << b_trim_end << endl;
@@ -724,6 +796,11 @@ void loadOptions(Options &o, seqan::ArgumentParser &parser){
if(isSet(parser, "barcode-min-overlap")){
getOptionValue(o.b_min_overlap, parser, "barcode-min-overlap");
*out << "barcode-min-overlap: " << o.b_min_overlap << endl;
+
+ if(o.b_min_overlap < 1){
+ cerr << "\nBarcode min-overlap should be 1 at least.\n" << endl;
+ exit(1);
+ }
}
getOptionValue(o.b_errorRate, parser, "barcode-error-rate");
@@ -763,13 +840,13 @@ void loadOptions(Options &o, seqan::ArgumentParser &parser){
string a_trim_end;
getOptionValue(a_trim_end, parser, "adapter-trim-end");
- if (a_trim_end == "LEFT") o.end = LEFT;
- else if(a_trim_end == "RIGHT") o.end = RIGHT;
- else if(a_trim_end == "ANY") o.end = ANY;
- else if(a_trim_end == "LTAIL") o.end = LTAIL;
- else if(a_trim_end == "RTAIL") o.end = RTAIL;
+ if (a_trim_end == "LEFT") o.a_end = LEFT;
+ else if(a_trim_end == "RIGHT") o.a_end = RIGHT;
+ else if(a_trim_end == "ANY") o.a_end = ANY;
+ else if(a_trim_end == "LTAIL") o.a_end = LTAIL;
+ else if(a_trim_end == "RTAIL") o.a_end = RTAIL;
else {
- cerr << "Specified adapter trim-end is unknown!\n" << endl;
+ cerr << "\nSpecified adapter trim-end is unknown.\n" << endl;
exit(1);
}
*out << "adapter-trim-end: " << a_trim_end << endl;
@@ -781,12 +858,38 @@ void loadOptions(Options &o, seqan::ArgumentParser &parser){
}
if(isSet(parser, "adapter-revcomp")){
- *out << "adapter-revcomp: yes" << endl;
- o.revCompAdapter = true;
+
+ string rcModeStr;
+ getOptionValue(rcModeStr, parser, "adapter-revcomp");
+ *out << "adapter-revcomp: " << rcModeStr << endl;
+
+ if (rcModeStr == "ON") o.rcMode = RCON;
+ else if(rcModeStr == "ONLY") o.rcMode = RCONLY;
+
+ if(isSet(parser, "adapter-revcomp-end") && o.rcMode == RCON){
+
+ string arc_trim_end;
+ getOptionValue(arc_trim_end, parser, "adapter-revcomp-end");
+
+ if (arc_trim_end == "LEFT") o.arc_end = LEFT;
+ else if(arc_trim_end == "RIGHT") o.arc_end = RIGHT;
+ else if(arc_trim_end == "ANY") o.arc_end = ANY;
+ else if(arc_trim_end == "LTAIL") o.arc_end = LTAIL;
+ else if(arc_trim_end == "RTAIL") o.arc_end = RTAIL;
+ else {
+ cerr << "\nSpecified reverse complement adapter trim-end is unknown.\n" << endl;
+ exit(1);
+ }
+
+ if(o.arc_end != o.a_end){
+ *out << "adapter-revcomp-end: " << arc_trim_end << endl;
+ o.useRcTrimEnd = true;
+ }
+ }
}
if(isSet(parser, "adapter-relaxed")){
- *out << "adapter-relaxed: yes" << endl;
+ *out << "adapter-relaxed: on" << endl;
o.relaxRegion = true;
}
@@ -802,6 +905,11 @@ void loadOptions(Options &o, seqan::ArgumentParser &parser){
getOptionValue(o.a_min_overlap, parser, "adapter-min-overlap");
*out << "adapter-min-overlap: " << o.a_min_overlap << endl;
+ if(o.a_min_overlap < 1){
+ cerr << "\nAdapter min-overlap should be 1 at least.\n" << endl;
+ exit(1);
+ }
+
getOptionValue(o.a_errorRate, parser, "adapter-error-rate");
*out << "adapter-error-rate: " << o.a_errorRate << endl;
@@ -810,6 +918,14 @@ void loadOptions(Options &o, seqan::ArgumentParser &parser){
exit(1);
}
+ getOptionValue(o.a_cycles, parser, "adapter-cycles");
+ if(o.a_cycles > 1) *out << "adapter-cycles: " << o.a_cycles << endl;
+
+ if(o.a_cycles < 1){
+ cerr << "\nNumber of adapter removal cycles should be 1 at least.\n" << endl;
+ exit(1);
+ }
+
// getOptionValue(o.a_overhang, parser, "adapter-overhang");
// *out << "adapter-overhang: " << o.a_overhang << endl;
@@ -830,14 +946,6 @@ void loadOptions(Options &o, seqan::ArgumentParser &parser){
*out << o.gapCost << "\n" << endl;
}
-
- // option compatibility tests
-
- if(o.cutLen_read != 0 && o.cutLen_read < o.min_readLen){
- o.cutLen_read = 0;
- cerr << "\nOption post-trim-length omitted, as it is shorter than min read length.\n" << endl;
- }
-
}
=====================================
src/PairedAlign.h
=====================================
--- a/src/PairedAlign.h
+++ b/src/PairedAlign.h
@@ -12,12 +12,22 @@ class PairedAlign : public tbb::filter {
private:
- const bool m_writeUnassigned, m_twoBarcodes;
+ const bool m_writeUnassigned, m_twoBarcodes, m_umiTags, m_useRcTrimEnd;
+ const bool m_htrim, m_htrimAdapterRm, m_htrimMaxFirstOnly;
+ const std::string m_htrimLeft, m_htrimRight;
+
+ const int m_htrimMinLength, m_htrimMaxLength;
+ const float m_htrimErrorRate;
+
+ const unsigned int m_arTimes;
+
+ const flexbar::FileFormat m_format;
const flexbar::LogAlign m_log;
const flexbar::RunType m_runType;
const flexbar::BarcodeDetect m_barType;
const flexbar::AdapterRemoval m_adapRem;
+ const flexbar::TrimEnd m_aTrimEnd, m_arcTrimEnd, m_bTrimEnd;
tbb::atomic<unsigned long> m_unassigned;
tbb::concurrent_vector<flexbar::TBar> *m_adapters, *m_adapters2;
@@ -33,11 +43,26 @@ public:
PairedAlign(Options &o) :
filter(parallel),
+ m_format(o.format),
m_log(o.logAlign),
m_runType(o.runType),
m_barType(o.barDetect),
m_adapRem(o.adapRm),
+ m_aTrimEnd(o.a_end),
+ m_arcTrimEnd(o.arc_end),
+ m_bTrimEnd(o.b_end),
+ m_arTimes(o.a_cycles),
+ m_umiTags(o.umiTags),
+ m_useRcTrimEnd(o.useRcTrimEnd),
m_writeUnassigned(o.writeUnassigned),
+ m_htrimLeft(o.htrimLeft),
+ m_htrimRight(o.htrimRight),
+ m_htrimMinLength(o.htrimMinLength),
+ m_htrimMaxLength(o.htrimMaxLength),
+ m_htrimMaxFirstOnly(o.htrimMaxFirstOnly),
+ m_htrimErrorRate(o.h_errorRate),
+ m_htrimAdapterRm(o.htrimAdapterRm),
+ m_htrim(o.htrimLeft != "" || o.htrimRight != ""),
m_twoBarcodes(o.barDetect == flexbar::WITHIN_READ_REMOVAL2 || o.barDetect == flexbar::WITHIN_READ2),
out(o.out),
m_unassigned(0){
@@ -47,11 +72,11 @@ public:
m_barcodes2 = &o.barcodes2;
m_adapters2 = &o.adapters2;
- m_b1 = new TSeqAlign(m_barcodes, o, o.b_min_overlap, o.b_errorRate, o.b_tail_len, o.b_match, o.b_mismatch, o.b_gapCost, o.b_end, true);
- m_a1 = new TSeqAlign(m_adapters, o, o.a_min_overlap, o.a_errorRate, o.a_tail_len, o.match, o.mismatch, o.gapCost, o.end, false);
+ m_b1 = new TSeqAlign(m_barcodes, o, o.b_min_overlap, o.b_errorRate, o.b_tail_len, o.b_match, o.b_mismatch, o.b_gapCost, true);
+ m_a1 = new TSeqAlign(m_adapters, o, o.a_min_overlap, o.a_errorRate, o.a_tail_len, o.match, o.mismatch, o.gapCost, false);
- m_b2 = new TSeqAlign(m_barcodes2, o, o.b_min_overlap, o.b_errorRate, o.b_tail_len, o.b_match, o.b_mismatch, o.b_gapCost, o.b_end, true);
- m_a2 = new TSeqAlign(m_adapters2, o, o.a_min_overlap, o.a_errorRate, o.a_tail_len, o.match, o.mismatch, o.gapCost, o.end, false);
+ m_b2 = new TSeqAlign(m_barcodes2, o, o.b_min_overlap, o.b_errorRate, o.b_tail_len, o.b_match, o.b_mismatch, o.b_gapCost, true);
+ m_a2 = new TSeqAlign(m_adapters2, o, o.a_min_overlap, o.a_errorRate, o.a_tail_len, o.match, o.mismatch, o.gapCost, false);
if(m_log == flexbar::TAB)
*out << "ReadTag\tQueryTag\tQueryStart\tQueryEnd\tOverlapLength\tMismatches\tIndels\tAllowedErrors" << std::endl;
@@ -66,18 +91,18 @@ public:
};
- void alignPairedReadBarcode(flexbar::TPairedRead* pRead, flexbar::TAlignBundle &alBundle, std::vector<flexbar::ComputeCycle> &cycle, std::vector<unsigned int> &idxAl){
+ void alignPairedReadToBarcodes(flexbar::TPairedRead* pRead, flexbar::TAlignBundle &alBundle, std::vector<flexbar::ComputeCycle> &cycle, std::vector<unsigned int> &idxAl, const flexbar::AlignmentMode &alMode){
using namespace flexbar;
if(m_barType != BOFF){
switch(m_barType){
- case BARCODE_READ: pRead->barID = m_b1->alignSeqRead(pRead->b, false, alBundle[0], cycle[0], idxAl[0]); break;
- case WITHIN_READ_REMOVAL2: pRead->barID2 = m_b2->alignSeqRead(pRead->r2, true, alBundle[2], cycle[2], idxAl[2]);
- case WITHIN_READ_REMOVAL: pRead->barID = m_b1->alignSeqRead(pRead->r1, true, alBundle[1], cycle[1], idxAl[1]); break;
- case WITHIN_READ2: pRead->barID2 = m_b2->alignSeqRead(pRead->r2, false, alBundle[2], cycle[2], idxAl[2]);
- case WITHIN_READ: pRead->barID = m_b1->alignSeqRead(pRead->r1, false, alBundle[1], cycle[1], idxAl[1]); break;
+ case BARCODE_READ: pRead->barID = m_b1->alignSeqRead(pRead->b, false, alBundle[0], cycle[0], idxAl[0], alMode, m_bTrimEnd); break;
+ case WITHIN_READ_REMOVAL2: pRead->barID2 = m_b2->alignSeqRead(pRead->r2, true, alBundle[2], cycle[2], idxAl[2], alMode, m_bTrimEnd);
+ case WITHIN_READ_REMOVAL: pRead->barID = m_b1->alignSeqRead(pRead->r1, true, alBundle[1], cycle[1], idxAl[1], alMode, m_bTrimEnd); break;
+ case WITHIN_READ2: pRead->barID2 = m_b2->alignSeqRead(pRead->r2, false, alBundle[2], cycle[2], idxAl[2], alMode, m_bTrimEnd);
+ case WITHIN_READ: pRead->barID = m_b1->alignSeqRead(pRead->r1, false, alBundle[1], cycle[1], idxAl[1], alMode, m_bTrimEnd); break;
case BOFF: break;
}
@@ -89,18 +114,113 @@ public:
}
- void alignPairedReadAdapter(flexbar::TPairedRead* pRead, flexbar::TAlignBundle &alBundle, std::vector<flexbar::ComputeCycle> &cycle, std::vector<unsigned int> &idxAl){
+ void alignPairedReadToAdapters(flexbar::TPairedRead* pRead, flexbar::TAlignBundle &alBundle, std::vector<flexbar::ComputeCycle> &cycle, std::vector<unsigned int> &idxAl, const flexbar::AlignmentMode &alMode, const flexbar::TrimEnd trimEnd){
using namespace flexbar;
if(m_adapRem != AOFF){
if(m_adapRem != ATWO)
- m_a1->alignSeqRead(pRead->r1, true, alBundle[0], cycle[0], idxAl[0]);
+ m_a1->alignSeqRead(pRead->r1, true, alBundle[0], cycle[0], idxAl[0], alMode, trimEnd);
if(pRead->r2 != NULL && m_adapRem != AONE){
- if(m_adapRem != NORMAL2) m_a1->alignSeqRead(pRead->r2, true, alBundle[1], cycle[1], idxAl[1]);
- else m_a2->alignSeqRead(pRead->r2, true, alBundle[1], cycle[1], idxAl[1]);
+ if(m_adapRem != NORMAL2) m_a1->alignSeqRead(pRead->r2, true, alBundle[1], cycle[1], idxAl[1], alMode, trimEnd);
+ else m_a2->alignSeqRead(pRead->r2, true, alBundle[1], cycle[1], idxAl[1], alMode, trimEnd);
+ }
+ }
+ }
+
+
+ void trimLeftHPS(flexbar::TSeqRead* seqRead){
+
+ using namespace std;
+ using namespace flexbar;
+
+ if(m_htrimAdapterRm && m_useRcTrimEnd){
+ if (seqRead->rmAdapter && (m_aTrimEnd == RIGHT || m_aTrimEnd == RTAIL)) return;
+ else if(seqRead->rmAdapterRC && (m_arcTrimEnd == RIGHT || m_arcTrimEnd == RTAIL)) return;
+ }
+ else if(m_htrimAdapterRm && ! m_useRcTrimEnd){
+ if(m_aTrimEnd == RIGHT || m_aTrimEnd == RTAIL) return;
+ }
+
+ if(! m_htrimAdapterRm || seqRead->rmAdapter || seqRead->rmAdapterRC){
+
+ for(unsigned int s = 0; s < m_htrimLeft.length(); ++s){
+
+ char nuc = m_htrimLeft[s];
+
+ unsigned int cutPos = 0;
+ unsigned int notNuc = 0;
+
+ for(unsigned int i = 0; i < length(seqRead->seq); ++i){
+
+ if(seqRead->seq[i] != nuc){
+ notNuc++;
+ }
+ else if(notNuc <= m_htrimErrorRate * (i+1)){
+
+ if(m_htrimMaxLength != 0 && i+1 > m_htrimMaxLength && (! m_htrimMaxFirstOnly || s == 0)) break;
+
+ cutPos = i+1;
+ }
+ }
+
+ if(cutPos > 0 && cutPos >= m_htrimMinLength){
+ erase(seqRead->seq, 0, cutPos);
+
+ if(m_format == FASTQ){
+ erase(seqRead->qual, 0, cutPos);
+ }
+ }
+ }
+ }
+ }
+
+
+ void trimRightHPS(flexbar::TSeqRead* seqRead){
+
+ using namespace std;
+ using namespace flexbar;
+
+ if(m_htrimAdapterRm && m_useRcTrimEnd){
+ if (seqRead->rmAdapter && (m_aTrimEnd == LEFT || m_aTrimEnd == LTAIL)) return;
+ else if(seqRead->rmAdapterRC && (m_arcTrimEnd == LEFT || m_arcTrimEnd == LTAIL)) return;
+ }
+ else if(m_htrimAdapterRm && ! m_useRcTrimEnd){
+ if(m_aTrimEnd == LEFT || m_aTrimEnd == LTAIL) return;
+ }
+
+ if(! m_htrimAdapterRm || seqRead->rmAdapter || seqRead->rmAdapterRC){
+
+ for(unsigned int s = 0; s < m_htrimRight.length(); ++s){
+
+ char nuc = m_htrimRight[s];
+
+ unsigned int seqLen = length(seqRead->seq);
+ unsigned int cutPos = seqLen;
+ unsigned int notNuc = 0;
+
+ for(int i = seqLen - 1; i >= 0; --i){
+
+ if(seqRead->seq[i] != nuc){
+ notNuc++;
+ }
+ else if(notNuc <= m_htrimErrorRate * (seqLen - i)){
+
+ if(m_htrimMaxLength != 0 && i < seqLen - m_htrimMaxLength && (! m_htrimMaxFirstOnly || s == 0)) break;
+
+ cutPos = i;
+ }
+ }
+
+ if(cutPos < seqLen && cutPos <= seqLen - m_htrimMinLength){
+ erase(seqRead->seq, cutPos, length(seqRead->seq));
+
+ if(m_format == FASTQ){
+ erase(seqRead->qual, cutPos, length(seqRead->qual));
+ }
+ }
}
}
}
@@ -115,6 +235,17 @@ public:
TPairedReadBundle *prBundle = static_cast<TPairedReadBundle* >(item);
+ if(m_umiTags){
+ for(unsigned int i = 0; i < prBundle->size(); ++i){
+ prBundle->at(i)->r1->umi = "";
+
+ if(prBundle->at(i)->r2 != NULL)
+ prBundle->at(i)->r2->umi = "";
+ }
+ }
+
+ AlignmentMode alMode = ALIGNALL;
+
// barcode detection
if(m_barType != BOFF){
@@ -135,7 +266,7 @@ public:
}
for(unsigned int i = 0; i < prBundle->size(); ++i){
- alignPairedReadBarcode(prBundle->at(i), alBundle, cycle, idxAl);
+ alignPairedReadToBarcodes(prBundle->at(i), alBundle, cycle, idxAl, alMode);
}
for(unsigned int i = 0; i < 3; ++i){
@@ -144,7 +275,7 @@ public:
}
for(unsigned int i = 0; i < prBundle->size(); ++i){
- alignPairedReadBarcode(prBundle->at(i), alBundle, cycle, idxAl);
+ alignPairedReadToBarcodes(prBundle->at(i), alBundle, cycle, idxAl, alMode);
}
}
@@ -152,31 +283,86 @@ public:
if(m_adapRem != AOFF){
- TAlignBundle alBundle;
- Alignments r1AlignmentsA, r2AlignmentsA;
-
- alBundle.push_back(r1AlignmentsA);
- alBundle.push_back(r2AlignmentsA);
-
- std::vector<unsigned int> idxAl;
- std::vector<ComputeCycle> cycle;
-
- for(unsigned int i = 0; i < 2; ++i){
- idxAl.push_back(0);
- cycle.push_back(PRELOAD);
+ for(unsigned int c = 0; c < m_arTimes; ++c){
+
+ flexbar::TrimEnd trimEnd = m_aTrimEnd;
+
+ unsigned int rc = 1;
+
+ if(m_useRcTrimEnd){
+ alMode = ALIGNRCOFF;
+ rc = 2;
+ }
+
+ for(unsigned int r = 0; r < rc; ++r){
+
+ if(m_useRcTrimEnd && r == 1){
+ alMode = ALIGNRC;
+ trimEnd = m_arcTrimEnd;
+ }
+
+ TAlignBundle alBundle;
+ Alignments r1AlignmentsA, r2AlignmentsA;
+
+ alBundle.push_back(r1AlignmentsA);
+ alBundle.push_back(r2AlignmentsA);
+
+ std::vector<unsigned int> idxAl;
+ std::vector<ComputeCycle> cycle;
+
+ for(unsigned int i = 0; i < 2; ++i){
+ idxAl.push_back(0);
+ cycle.push_back(PRELOAD);
+ }
+
+ for(unsigned int i = 0; i < prBundle->size(); ++i){
+ alignPairedReadToAdapters(prBundle->at(i), alBundle, cycle, idxAl, alMode, trimEnd);
+ }
+
+ for(unsigned int i = 0; i < 2; ++i){
+ idxAl[i] = 0;
+ cycle[i] = COMPUTE;
+ }
+
+ for(unsigned int i = 0; i < prBundle->size(); ++i){
+ alignPairedReadToAdapters(prBundle->at(i), alBundle, cycle, idxAl, alMode, trimEnd);
+ }
+ }
}
-
+ }
+
+ if(m_umiTags){
for(unsigned int i = 0; i < prBundle->size(); ++i){
- alignPairedReadAdapter(prBundle->at(i), alBundle, cycle, idxAl);
+
+ append(prBundle->at(i)->r1->id, prBundle->at(i)->r1->umi);
+
+ if(prBundle->at(i)->r2 != NULL){
+ append(prBundle->at(i)->r1->id, prBundle->at(i)->r2->umi);
+
+ append(prBundle->at(i)->r2->id, prBundle->at(i)->r1->umi);
+ append(prBundle->at(i)->r2->id, prBundle->at(i)->r2->umi);
+ }
}
-
- for(unsigned int i = 0; i < 2; ++i){
- idxAl[i] = 0;
- cycle[i] = COMPUTE;
+ }
+
+ if(m_htrim){
+ if(m_htrimLeft != ""){
+
+ for(unsigned int i = 0; i < prBundle->size(); ++i){
+ trimLeftHPS(prBundle->at(i)->r1);
+
+ if(prBundle->at(i)->r2 != NULL)
+ trimLeftHPS(prBundle->at(i)->r2);
+ }
}
-
- for(unsigned int i = 0; i < prBundle->size(); ++i){
- alignPairedReadAdapter(prBundle->at(i), alBundle, cycle, idxAl);
+ if(m_htrimRight != ""){
+
+ for(unsigned int i = 0; i < prBundle->size(); ++i){
+ trimRightHPS(prBundle->at(i)->r1);
+
+ if(prBundle->at(i)->r2 != NULL)
+ trimRightHPS(prBundle->at(i)->r2);
+ }
}
}
=====================================
src/PairedInput.h
=====================================
--- a/src/PairedInput.h
+++ b/src/PairedInput.h
@@ -12,7 +12,7 @@ class PairedInput : public tbb::filter {
private:
const flexbar::FileFormat m_format;
- const bool m_isPaired, m_useBarRead, m_useNumberTag;
+ const bool m_isPaired, m_useBarRead, m_useNumberTag, m_interleaved;
const unsigned int m_bundleSize;
tbb::atomic<unsigned long> m_uncalled, m_uncalledPairs, m_tagCounter;
@@ -25,6 +25,7 @@ public:
filter(serial_in_order),
m_format(o.format),
m_useNumberTag(o.useNumberTag),
+ m_interleaved(o.interleavedInput),
m_isPaired(o.isPaired),
m_useBarRead(o.barDetect == flexbar::BARCODE_READ),
m_bundleSize(o.bundleSize),
@@ -37,7 +38,7 @@ public:
m_f2 = NULL;
m_b = NULL;
- if(m_isPaired)
+ if(m_isPaired && ! m_interleaved)
m_f2 = new SeqInput<TSeqStr, TString>(o, o.readsFile2, true, false);
if(m_useBarRead)
@@ -61,9 +62,17 @@ public:
TStrings quals, quals2, qualsBR;
TBools uncalled, uncalled2, uncalledBR;
- unsigned int nReads = m_f1->loadSeqReads(uncalled, ids, seqs, quals, m_bundleSize);
+ unsigned int bundleSize = m_bundleSize;
+ if(m_interleaved) bundleSize = m_bundleSize * 2;
- if(m_isPaired){
+ unsigned int nReads = m_f1->loadSeqReads(uncalled, ids, seqs, quals, bundleSize);
+
+ if(m_interleaved && nReads % 2 == 1){
+ cerr << "\nERROR: Interleaved reads input does not contain even number of reads.\n" << endl;
+ exit(1);
+ }
+
+ if(m_isPaired && ! m_interleaved){
unsigned int nReads2 = m_f2->loadSeqReads(uncalled2, ids2, seqs2, quals2, m_bundleSize);
if(nReads != nReads2){
@@ -75,11 +84,14 @@ public:
if(m_useBarRead){
unsigned int nBarReads = m_b->loadSeqReads(uncalledBR, idsBR, seqsBR, qualsBR, m_bundleSize);
- if(nReads > nBarReads){
+ unsigned int multi = 1;
+ if(m_interleaved) multi = 2;
+
+ if(nReads > nBarReads * multi){
cerr << "\nERROR: Read without barcode read in input.\n" << endl;
exit(1);
}
- else if(nReads < nBarReads){
+ else if(nReads < nBarReads * multi){
cerr << "\nERROR: Barcode read without read in input.\n" << endl;
exit(1);
}
@@ -90,44 +102,96 @@ public:
TPairedReadBundle *prBundle = new TPairedReadBundle();
- for(unsigned int i = 0; i < length(ids); ++i){
+ if(! m_interleaved){
- if(uncalled[i] || (m_isPaired && uncalled2[i])){
-
- if(uncalled[i]) ++m_uncalled;
- if(m_isPaired && uncalled2[i]) ++m_uncalled;
- if(m_isPaired) ++m_uncalledPairs;
- }
- // else if(m_useBarRead && uncalledBR[i]){
- //
- // // to be handled
- // }
- else{
+ for(unsigned int i = 0; i < length(ids); ++i){
- if(m_useNumberTag){
- stringstream converter;
- converter << ++m_tagCounter;
- TString tagCount = converter.str();
+ if(uncalled[i] || (m_isPaired && uncalled2[i])){
+
+ if(uncalled[i]) ++m_uncalled;
+ if(m_isPaired && uncalled2[i]) ++m_uncalled;
+ if(m_isPaired) ++m_uncalledPairs;
+ }
+ // else if(m_useBarRead && uncalledBR[i]){
+ //
+ // // to be handled
+ // }
+ else{
+
+ if(m_useNumberTag){
+ stringstream converter;
+ converter << ++m_tagCounter;
+ TString tagCount = converter.str();
+
+ ids[i] = tagCount;
+ if(m_isPaired) ids2[i] = tagCount;
+ if(m_useBarRead) idsBR[i] = tagCount;
+ }
+
+ TSeqRead *read1 = NULL, *read2 = NULL, *barRead = NULL;
+
+ if(m_format == FASTA){
+ read1 = new TSeqRead(seqs[i], ids[i]);
+ if(m_isPaired) read2 = new TSeqRead(seqs2[i], ids2[i]);
+ if(m_useBarRead) barRead = new TSeqRead(seqsBR[i], idsBR[i]);
+ }
+ else{
+ read1 = new TSeqRead(seqs[i], ids[i], quals[i]);
+ if(m_isPaired) read2 = new TSeqRead(seqs2[i], ids2[i], quals2[i]);
+ if(m_useBarRead) barRead = new TSeqRead(seqsBR[i], idsBR[i], qualsBR[i]);
+ }
- ids[i] = tagCount;
- if(m_isPaired) ids2[i] = tagCount;
- if(m_useBarRead) idsBR[i] = tagCount;
+ prBundle->push_back(new TPairedRead(read1, read2, barRead));
}
+ }
+ }
+ else{ // interleaved paired input
+
+ unsigned int nEntries = (unsigned int) (length(ids) / 2);
+
+ for(unsigned int i = 0; i < nEntries; ++i){
- TSeqRead *read1 = NULL, *read2 = NULL, *barRead = NULL;
+ unsigned int r = (i * 2);
+ unsigned int p = (i * 2) + 1;
- if(m_format == FASTA){
- read1 = new TSeqRead(seqs[i], ids[i]);
- if(m_isPaired) read2 = new TSeqRead(seqs2[i], ids2[i]);
- if(m_useBarRead) barRead = new TSeqRead(seqsBR[i], idsBR[i]);
+ if(uncalled[r] || uncalled[p]){
+
+ if(uncalled[r]) ++m_uncalled;
+ if(uncalled[p]) ++m_uncalled;
+
+ ++m_uncalledPairs;
}
+ // else if(m_useBarRead && uncalledBR[i]){
+ //
+ // // to be handled
+ // }
else{
- read1 = new TSeqRead(seqs[i], ids[i], quals[i]);
- if(m_isPaired) read2 = new TSeqRead(seqs2[i], ids2[i], quals2[i]);
- if(m_useBarRead) barRead = new TSeqRead(seqsBR[i], idsBR[i], qualsBR[i]);
+
+ if(m_useNumberTag){
+ stringstream converter;
+ converter << ++m_tagCounter;
+ TString tagCount = converter.str();
+
+ ids[r] = tagCount;
+ ids[p] = tagCount;
+ if(m_useBarRead) idsBR[i] = tagCount;
+ }
+
+ TSeqRead *read1 = NULL, *read2 = NULL, *barRead = NULL;
+
+ if(m_format == FASTA){
+ read1 = new TSeqRead(seqs[r], ids[r]);
+ read2 = new TSeqRead(seqs[p], ids[p]);
+ if(m_useBarRead) barRead = new TSeqRead(seqsBR[i], idsBR[i]);
+ }
+ else{
+ read1 = new TSeqRead(seqs[r], ids[r], quals[r]);
+ read2 = new TSeqRead(seqs[p], ids[p], quals[p]);
+ if(m_useBarRead) barRead = new TSeqRead(seqsBR[i], idsBR[i], qualsBR[i]);
+ }
+
+ prBundle->push_back(new TPairedRead(read1, read2, barRead));
}
-
- prBundle->push_back(new TPairedRead(read1, read2, barRead));
}
}
@@ -176,20 +240,20 @@ public:
unsigned long getNrProcessedReads() const{
- if(m_isPaired) return m_f1->getNrProcessedReads() + m_f2->getNrProcessedReads();
- else return m_f1->getNrProcessedReads();
+ if(m_isPaired && ! m_interleaved) return m_f1->getNrProcessedReads() + m_f2->getNrProcessedReads();
+ else return m_f1->getNrProcessedReads();
}
unsigned long getNrProcessedChars() const{
- if(m_isPaired) return m_f1->getNrProcessedChars() + m_f2->getNrProcessedChars();
- else return m_f1->getNrProcessedChars();
+ if(m_isPaired && ! m_interleaved) return m_f1->getNrProcessedChars() + m_f2->getNrProcessedChars();
+ else return m_f1->getNrProcessedChars();
}
unsigned long getNrLowPhredReads() const {
- if(m_isPaired) return m_f1->getNrLowPhredReads() + m_f2->getNrLowPhredReads();
- else return m_f1->getNrLowPhredReads();
+ if(m_isPaired && ! m_interleaved) return m_f1->getNrLowPhredReads() + m_f2->getNrLowPhredReads();
+ else return m_f1->getNrLowPhredReads();
}
};
=====================================
src/PairedOutput.h
=====================================
--- a/src/PairedOutput.h
+++ b/src/PairedOutput.h
@@ -16,15 +16,11 @@ private:
int m_mapsize;
const int m_minLength, m_qtrimThresh, m_qtrimWinSize;
const bool m_isPaired, m_writeUnassigned, m_writeSingleReads, m_writeSingleReadsP;
- const bool m_twoBarcodes, m_qtrimPostRm, m_randTag;
+ const bool m_twoBarcodes, m_qtrimPostRm;
tbb::atomic<unsigned long> m_nSingleReads, m_nLowPhred;
const std::string m_target;
- const std::string m_trimLeftNucs, m_trimRightNucs;
-
- const int m_hpsMinLength;
- const float m_errorRate;
const flexbar::FileFormat m_format;
const flexbar::RunType m_runType;
@@ -50,16 +46,11 @@ public:
m_runType(o.runType),
m_barDetect(o.barDetect),
m_minLength(o.min_readLen),
- m_trimLeftNucs(o.trimLeftNucs),
- m_trimRightNucs(o.trimRightNucs),
- m_hpsMinLength(o.hpsMinLength),
- m_errorRate(o.a_errorRate),
m_qtrim(o.qTrim),
m_qtrimThresh(o.qtrimThresh),
m_qtrimWinSize(o.qtrimWinSize),
m_qtrimPostRm(o.qtrimPostRm),
m_isPaired(o.isPaired),
- m_randTag(o.randTag),
m_writeUnassigned(o.writeUnassigned),
m_writeSingleReads(o.writeSingleReads),
m_writeSingleReadsP(o.writeSingleReadsP),
@@ -235,72 +226,6 @@ public:
};
- void trimLeftNucs(flexbar::TSeqRead* seqRead){
-
- using namespace std;
- using namespace flexbar;
-
- for(unsigned int s = 0; s < m_trimLeftNucs.length(); ++s){
-
- char nuc = m_trimLeftNucs[s];
-
- unsigned int cutPos = 0;
- unsigned int notNuc = 0;
-
- for(unsigned int i = 0; i < length(seqRead->seq); ++i){
-
- if(seqRead->seq[i] != nuc){
- notNuc++;
- }
- else if(notNuc <= m_errorRate * (i + 1)){
- cutPos = i+1;
- }
- }
-
- if(cutPos > 0 && cutPos >= m_hpsMinLength){
- erase(seqRead->seq, 0, cutPos);
-
- if(m_format == FASTQ){
- erase(seqRead->qual, 0, cutPos);
- }
- }
- }
- }
-
-
- void trimRightNucs(flexbar::TSeqRead* seqRead){
-
- using namespace std;
- using namespace flexbar;
-
- for(unsigned int s = 0; s < m_trimRightNucs.length(); ++s){
-
- char nuc = m_trimRightNucs[s];
-
- unsigned int cutPos = length(seqRead->seq);
- unsigned int notNuc = 0;
-
- for(int i = length(seqRead->seq) - 1; i >= 0; --i){
-
- if(seqRead->seq[i] != nuc){
- notNuc++;
- }
- else if(notNuc <= m_errorRate * (length(seqRead->seq) - i)){
- cutPos = i;
- }
- }
-
- if(cutPos < length(seqRead->seq) && cutPos <= length(seqRead->seq) - m_hpsMinLength){
- erase(seqRead->seq, cutPos, length(seqRead->seq));
-
- if(m_format == FASTQ){
- erase(seqRead->qual, cutPos, length(seqRead->qual));
- }
- }
- }
- }
-
-
void writePairedRead(flexbar::TPairedRead* pRead){
using namespace flexbar;
@@ -319,11 +244,6 @@ public:
if(qualTrim(pRead->r1, m_qtrim, m_qtrimThresh, m_qtrimWinSize)) ++m_nLowPhred;
}
- if(m_trimLeftNucs != "") trimLeftNucs(pRead->r1);
- if(m_trimRightNucs != "") trimRightNucs(pRead->r1);
-
- if(m_randTag) append(pRead->r1->id, pRead->r1->umi);
-
if(length(pRead->r1->seq) >= m_minLength){
m_outMap[pRead->barID].f1->writeRead(pRead->r1);
@@ -355,23 +275,6 @@ public:
if(qualTrim(pRead->r2, m_qtrim, m_qtrimThresh, m_qtrimWinSize)) ++m_nLowPhred;
}
- if(m_trimLeftNucs != ""){
- trimLeftNucs(pRead->r1);
- trimLeftNucs(pRead->r2);
- }
- if(m_trimRightNucs != ""){
- trimRightNucs(pRead->r1);
- trimRightNucs(pRead->r2);
- }
-
- if(m_randTag){
- append(pRead->r1->id, pRead->r1->umi);
- append(pRead->r1->id, pRead->r2->umi);
-
- append(pRead->r2->id, pRead->r1->umi);
- append(pRead->r2->id, pRead->r2->umi);
- }
-
if(length(pRead->r1->seq) >= m_minLength) l1ok = true;
if(length(pRead->r2->seq) >= m_minLength) l2ok = true;
=====================================
src/SeqAlign.h
=====================================
--- a/src/SeqAlign.h
+++ b/src/SeqAlign.h
@@ -11,11 +11,10 @@ private:
typedef AlignResults<TSeqStr> TAlignResults;
- const flexbar::TrimEnd m_trimEnd;
const flexbar::LogAlign m_log;
const flexbar::FileFormat m_format;
- const bool m_isBarcoding, m_writeTag, m_randTag, m_strictRegion;
+ const bool m_isBarcoding, m_writeTag, m_umiTags, m_strictRegion;
const int m_minLength, m_minOverlap, m_tailLength;
const float m_errorRate;
const unsigned int m_bundleSize;
@@ -29,14 +28,13 @@ private:
public:
- SeqAlign(tbb::concurrent_vector<flexbar::TBar> *queries, const Options &o, int minOverlap, float errorRate, const int tailLength, const int match, const int mismatch, const int gapCost, const flexbar::TrimEnd end, const bool isBarcoding):
+ SeqAlign(tbb::concurrent_vector<flexbar::TBar> *queries, const Options &o, int minOverlap, float errorRate, const int tailLength, const int match, const int mismatch, const int gapCost, const bool isBarcoding):
m_minOverlap(minOverlap),
m_errorRate(errorRate),
m_tailLength(tailLength),
- m_trimEnd(end),
m_isBarcoding(isBarcoding),
- m_randTag(o.randTag),
+ m_umiTags(o.umiTags),
m_minLength(o.min_readLen),
m_log(o.logAlign),
m_format(o.format),
@@ -46,14 +44,14 @@ public:
m_out(o.out),
m_nPreShortReads(0),
m_modified(0),
- algo(TAlgorithm(o, match, mismatch, gapCost, end)){
+ algo(TAlgorithm(o, match, mismatch, gapCost)){
m_queries = queries;
m_rmOverlaps = tbb::concurrent_vector<unsigned long>(flexbar::MAX_READLENGTH + 1, 0);
};
- int alignSeqRead(flexbar::TSeqRead* sr, const bool performRemoval, flexbar::Alignments &alignments, flexbar::ComputeCycle &cycle, unsigned int &idxAl){
+ int alignSeqRead(flexbar::TSeqRead* sr, const bool performRemoval, flexbar::Alignments &alignments, flexbar::ComputeCycle &cycle, unsigned int &idxAl, const flexbar::AlignmentMode &alMode, const flexbar::TrimEnd trimEnd){
using namespace std;
using namespace flexbar;
@@ -78,16 +76,19 @@ public:
for(unsigned int i = 0; i < m_queries->size(); ++i){
+ if (alMode == ALIGNRCOFF && m_queries->at(i).rcAdapter) continue;
+ else if(alMode == ALIGNRC && ! m_queries->at(i).rcAdapter) continue;
+
TSeqStr &qseq = m_queries->at(i).seq;
TSeqStr *rseq = &seqRead.seq;
TSeqStr tmp;
- if(m_trimEnd == LTAIL || m_trimEnd == RTAIL){
+ if(trimEnd == LTAIL || trimEnd == RTAIL){
int tailLength = (m_tailLength > 0) ? m_tailLength : length(qseq);
if(tailLength < readLength){
- if(m_trimEnd == LTAIL) tmp = prefix(seqRead.seq, tailLength);
- else tmp = suffix(seqRead.seq, readLength - tailLength);
+ if(trimEnd == LTAIL) tmp = prefix(seqRead.seq, tailLength);
+ else tmp = suffix(seqRead.seq, readLength - tailLength);
rseq = &tmp;
}
}
@@ -112,10 +113,13 @@ public:
// align each query sequence and store best one
for(unsigned int i = 0; i < m_queries->size(); ++i){
+ if (alMode == ALIGNRCOFF && m_queries->at(i).rcAdapter) continue;
+ else if(alMode == ALIGNRC && ! m_queries->at(i).rcAdapter) continue;
+
TAlignResults a;
// global sequence alignment
- algo.alignGlobal(a, alignments, cycle, idxAl++);
+ algo.alignGlobal(a, alignments, cycle, idxAl++, trimEnd);
a.queryLength = length(m_queries->at(i).seq);
a.tailLength = (m_tailLength > 0) ? m_tailLength : a.queryLength;
@@ -128,8 +132,8 @@ public:
bool validAl = true;
- if(((m_trimEnd == RTAIL || m_trimEnd == RIGHT) && a.startPosA < a.startPosS && m_strictRegion) ||
- ((m_trimEnd == LTAIL || m_trimEnd == LEFT) && a.endPosA > a.endPosS && m_strictRegion) ||
+ if(((trimEnd == RTAIL || trimEnd == RIGHT) && a.startPosA < a.startPosS && m_strictRegion) ||
+ ((trimEnd == LTAIL || trimEnd == LEFT) && a.endPosA > a.endPosS && m_strictRegion) ||
a.overlapLength < 1){
validAl = false;
@@ -149,24 +153,24 @@ public:
// valid alignment
if(qIndex >= 0){
- TrimEnd trimEnd = m_trimEnd;
+ TrimEnd trEnd = trimEnd;
// trim read based on alignment
if(performRemoval){
- if(trimEnd == ANY){
+ if(trEnd == ANY){
if(am.startPosA <= am.startPosS && am.endPosS <= am.endPosA){
seqRead.seq = "";
if(m_format == FASTQ) seqRead.qual = "";
}
else if(am.startPosA - am.startPosS >= am.endPosS - am.endPosA){
- trimEnd = RIGHT;
+ trEnd = RIGHT;
}
- else trimEnd = LEFT;
+ else trEnd = LEFT;
}
- switch(trimEnd){
+ switch(trEnd){
int rCutPos;
@@ -211,6 +215,11 @@ public:
++m_modified;
+ if(! m_isBarcoding){
+ if(! m_queries->at(qIndex).rcAdapter) seqRead.rmAdapter = true;
+ else seqRead.rmAdapterRC = true;
+ }
+
// count number of removals for each query
m_queries->at(qIndex).rmOverlap++;
@@ -233,9 +242,9 @@ public:
// valid alignment, not neccesarily removal
- if(m_randTag && am.randTag != ""){
+ if(m_umiTags && am.umiTag != ""){
append(seqRead.umi, "_");
- append(seqRead.umi, am.randTag);
+ append(seqRead.umi, am.umiTag);
}
// alignment stats
@@ -244,9 +253,9 @@ public:
if(performRemoval){
s << "Sequence removal:";
- if(trimEnd == LEFT || trimEnd == LTAIL) s << " left side\n";
- else if(trimEnd == RIGHT || trimEnd == RTAIL) s << " right side\n";
- else s << " any side\n";
+ if(trEnd == LEFT || trEnd == LTAIL) s << " left side\n";
+ else if(trEnd == RIGHT || trEnd == RTAIL) s << " right side\n";
+ else s << " any side\n";
}
else s << "Sequence detection, no removal:\n";
=====================================
src/SeqAlignAlgo.h
=====================================
--- a/src/SeqAlignAlgo.h
+++ b/src/SeqAlignAlgo.h
@@ -21,16 +21,14 @@ private:
// TScoreSimple m_score;
TScoreMatrix m_scoreMatrix;
- const bool m_randTag;
+ const bool m_umiTags;
const flexbar::LogAlign m_log;
- const flexbar::TrimEnd m_trimEnd;
public:
- SeqAlignAlgo(const Options &o, const int match, const int mismatch, const int gapCost, const flexbar::TrimEnd trimEnd):
- m_randTag(o.randTag),
- m_log(o.logAlign),
- m_trimEnd(trimEnd){
+ SeqAlignAlgo(const Options &o, const int match, const int mismatch, const int gapCost):
+ m_umiTags(o.umiTags),
+ m_log(o.logAlign){
using namespace seqan;
@@ -50,7 +48,7 @@ public:
};
- void alignGlobal(TAlignResults &a, flexbar::Alignments &alignments, flexbar::ComputeCycle &cycle, const unsigned int idxAl){
+ void alignGlobal(TAlignResults &a, flexbar::Alignments &alignments, flexbar::ComputeCycle &cycle, const unsigned int idxAl, const flexbar::TrimEnd trimEnd){
using namespace std;
using namespace seqan;
@@ -69,12 +67,12 @@ public:
cycle = RESULTS;
- if(m_trimEnd == RIGHT || m_trimEnd == RTAIL){
+ if(trimEnd == RIGHT || trimEnd == RTAIL){
AlignConfig<true, false, true, true> ac;
alignments.ascores = globalAlignment(alignments.aset, m_scoreMatrix, ac);
}
- else if(m_trimEnd == LEFT || m_trimEnd == LTAIL){
+ else if(trimEnd == LEFT || trimEnd == LTAIL){
AlignConfig<true, true, false, true> ac;
alignments.ascores = globalAlignment(alignments.aset, m_scoreMatrix, ac);
@@ -113,7 +111,7 @@ public:
a.alString = s.str();
}
- if(m_randTag) a.randTag = "";
+ if(m_umiTags) a.umiTag = "";
TRowIterator it1 = begin(row1);
@@ -130,7 +128,7 @@ public:
if(isGap(it1)) ++a.gapsR;
else if(isGap(it2)) ++a.gapsA;
else if(*it1 != *it2 && *it2 != 'N') ++a.mismatches;
- else if(m_randTag && *it2 == 'N') append(a.randTag, (TChar) *it1);
+ else if(m_umiTags && *it2 == 'N') append(a.umiTag, (TChar) *it1);
}
++alPos;
++it2;
=====================================
src/SeqInput.h
=====================================
--- a/src/SeqInput.h
+++ b/src/SeqInput.h
@@ -117,6 +117,7 @@ public:
if(m_format == FASTQ)
erase(quals[i], 0, idx);
}
+
if(m_preTrimEnd > 0 && length(seq) > 1){
int idx = m_preTrimEnd;
@@ -127,6 +128,7 @@ public:
if(m_format == FASTQ)
quals[i] = prefix(quals[i], length(quals[i]) - idx);
}
+
if(m_qtrim != QOFF && ! m_qtrimPostRm){
if(qualTrim(seq, quals[i], m_qtrim, m_qtrimThresh, m_qtrimWinSize)) ++m_nLowPhred;
}
View it on GitLab: https://salsa.debian.org/med-team/flexbar/commit/9856de0a96691831d1b7d2a34d0de5aa22a78425
---
View it on GitLab: https://salsa.debian.org/med-team/flexbar/commit/9856de0a96691831d1b7d2a34d0de5aa22a78425
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20180522/6a6fab12/attachment-0001.html>
More information about the debian-med-commit
mailing list