[med-svn] [blasr] 07/13: Imported Upstream version 5.3
Afif Elghraoui
afif at moszumanska.debian.org
Mon Oct 24 01:24:35 UTC 2016
This is an automated email from the git hooks/post-receive script.
afif pushed a commit to branch master
in repository blasr.
commit 317d7ebbdd81af097a2baf0531b723b816bc7717
Author: Afif Elghraoui <afif at debian.org>
Date: Sun Oct 23 15:45:00 2016 -0700
Imported Upstream version 5.3
---
Blasr.cpp | 36 ++++++++++++---
cram.mk | 11 +++--
ctest/affineAlign.t | 6 +--
ctest/aggressiveIntervalCut.t | 4 +-
ctest/alignScore.t | 2 +-
ctest/bamConcordant.t | 6 +--
ctest/bamIn.t | 39 ++--------------
ctest/bamOut.t | 4 +-
ctest/bug25328.t | 2 +-
ctest/bug25741.t | 9 ----
ctest/bug25766.t | 4 +-
ctest/ccsH5.t | 4 +-
ctest/cigarAdjecentIndels.t | 8 ++--
ctest/concordant.t | 14 +++---
ctest/dataset.t | 16 +++----
ctest/deterministic.t | 6 +--
ctest/ecoli.t | 10 ++--
ctest/fastMaxInterval.t | 4 +-
ctest/filtercriteria.t | 6 +--
ctest/fofn.t | 4 +-
ctest/hitpolicy.t | 18 ++++----
ctest/holeNumbers.t | 4 +-
ctest/m0-5.t | 10 ++--
ctest/multipart.t | 2 +-
ctest/noSplitSubreads.t | 6 +--
ctest/samNM.t | 13 ------
ctest/setup.sh | 3 +-
ctest/unaligned.t | 4 +-
ctest/useccsallBestN1.t | 5 +-
ctest/useccsallLargeGenome.t | 4 +-
ctest/verbose.t | 2 +-
iblasr/BlasrHeaders.h | 1 +
iblasr/BlasrUtils.hpp | 7 +--
iblasr/BlasrUtilsImpl.hpp | 11 +++--
iblasr/MappingParameters.h | 49 ++++++++++++++++++--
iblasr/RegisterBlasrOptions.h | 10 ++--
makefile | 2 +-
rules.mk | 2 +-
utils/SamFilter.cpp | 2 +-
utils/bam2bax/makefile | 3 +-
utils/bam2bax/src/Bam2BaxConverterImpl.hpp | 14 ------
utils/bam2bax/src/Bam2BaxMain.cpp | 4 +-
utils/bam2bax/src/Bam2PlxMain.cpp | 4 +-
utils/bam2bax/src/Converter.cpp | 16 -------
utils/bam2bax/src/Settings.cpp | 12 +----
utils/bam2bax/tests/cram/bam2bax.t | 22 ++++-----
utils/bam2bax/tests/cram/bam2plx.t | 12 ++---
utils/bax2bam/CMakeLists.txt | 2 +
utils/bax2bam/README.md | 4 +-
utils/bax2bam/makefile | 5 +-
utils/bax2bam/src/Bax2Bam.cpp | 72 ++++++++++++++++++++++++++---
utils/bax2bam/src/CMakeLists.txt | 2 +
utils/bax2bam/src/ConverterBase.h | 16 ++++---
utils/bax2bam/src/Settings.cpp | 12 ++++-
utils/bax2bam/src/Settings.h | 4 ++
utils/bax2bam/src/main.cpp | 15 +++++-
utils/bax2bam/tests/CMakeLists.txt | 1 +
utils/bax2bam/tests/bax2bam.t | 2 +-
utils/bax2bam/tests/src/TestData.h.in | 2 +-
utils/bax2bam/tests/src/test_ccs.cpp | 2 +-
utils/bax2bam/tests/src/test_hqregions.cpp | 13 +++---
utils/bax2bam/tests/src/test_polymerase.cpp | 18 ++++----
utils/bax2bam/tests/src/test_subreads.cpp | 12 +++--
utils/ctest/samFilter.t | 24 +++++-----
64 files changed, 366 insertions(+), 277 deletions(-)
diff --git a/Blasr.cpp b/Blasr.cpp
index 85a928a..bee08c8 100644
--- a/Blasr.cpp
+++ b/Blasr.cpp
@@ -16,24 +16,33 @@ using namespace std;
MappingSemaphores semaphores;
ostream *outFilePtr = NULL;
#ifdef USE_PBBAM
-PacBio::BAM::BamWriter * bamWriterPtr = NULL;
+PacBio::BAM::IRecordWriter * bamWriterPtr = NULL; // use IRecordWriter for both SAM ands BAM
#endif
HDFRegionTableReader *regionTableReader = NULL;
ReaderAgglomerate *reader = NULL;
+// Add comment to version history for each version change !
+//
+// Version history
+//
+// 5.0 - a new major version number
+// 5.1 - transiotion to POSIX notation - double sashes before multi-character flags
+// 5.2 - --sam no longer supported
+// 5.3 - --sam supported via pbbam/IRecordWriter
+//
const string GetMajorVersion() {
- return "5.2";
+ return "5.3";
}
// version format is 3 numbers sparated by dots : Version.Subversion.SHA1
const string GetVersion(void) {
string gitVersionString(SHA1_7); // gitVersionString is first 7 characters of SHA1
string version = GetMajorVersion();
- if (gitVersionString.size() == 7) {
+ // if (gitVersionString.size() == 7) {
version.append(".");
version.append(gitVersionString);
- }
+ // }
return version;
}
@@ -1286,12 +1295,23 @@ int main(int argc, char* argv[]) {
commandLineString);
string headerString = shp.ToString();// SAM/BAM header
if (params.printSAM) {
+ // this is not going to be executed since sam is printed via bam
*outFilePtr << headerString;
} else if (params.printBAM) {
+ // here both bam and sam are handled
#ifdef USE_PBBAM
PacBio::BAM::BamHeader header = PacBio::BAM::BamHeader(headerString);
- // Both file name and SAMHeader are required in order to create a BamWriter.
- bamWriterPtr = new PacBio::BAM::BamWriter(params.outFileName, header);
+ // Create bam header
+ // Both file name and SAMHeader are required in order to create a BamWriter.
+ // sam_via_bam changes
+ if (params.sam_via_bam)
+ {
+ bamWriterPtr = new PacBio::BAM::SamWriter(params.outFileName, header);
+ }
+ else
+ {
+ bamWriterPtr = new PacBio::BAM::BamWriter(params.outFileName, header);
+ }
#else
REQUIRE_PBBAM_ERROR();
#endif
@@ -1508,7 +1528,9 @@ int main(int argc, char* argv[]) {
#ifdef USE_PBBAM
assert(bamWriterPtr);
try {
- bamWriterPtr->TryFlush();
+ if (!params.sam_via_bam) { // no need to flush for SAM , but need to understand why
+ bamWriterPtr->TryFlush();
+ }
delete bamWriterPtr;
bamWriterPtr = NULL;
} catch (std::exception e) {
diff --git a/cram.mk b/cram.mk
index e928689..35f62e8 100644
--- a/cram.mk
+++ b/cram.mk
@@ -1,14 +1,19 @@
FAST_CTESTS := \
-ctest/affineAlign.t ctest/bamOut.t ctest/ccsH5.t ctest/filtercriteria.t ctest/m0-5.t ctest/samNM.t \
+ctest/affineAlign.t ctest/bamOut.t ctest/ccsH5.t ctest/filtercriteria.t ctest/m0-5.t \
ctest/aggressiveIntervalCut.t ctest/fofn.t ctest/multipart.t \
-ctest/alignScore.t ctest/bug25741.t ctest/ecoli.t ctest/hitpolicy.t ctest/noSplitSubreads.t \
+ctest/alignScore.t ctest/hitpolicy.t ctest/noSplitSubreads.t \
ctest/bamIn.t ctest/fastMaxInterval.t ctest/open_fail.t ctest/verbose.t ctest/deterministic.t
+
MILD_CTESTS := \
- ctest/useccsallBestN1.t ctest/concordant.t ctest/bug25766.t ctest/holeNumbers.t
+ ctest/bug25766.t ctest/holeNumbers.t
SLOW_CTESTS := ctest/bug25328.t ctest/useccsallLargeGenome.t
+# XXX: following tests sidelined, needs bam input after --sam option removed
+# FAST: ctest/ecoli.t
+# MILD: ctest/useccsallBestN1.t ctest/concordant.t
+
#BLASR_PATH=/mnt/secondary/builds/full/3.0.0/prod/current-build_smrtanalysis/private/otherbins/internalall/bin/
#export BLASR_PATH
diff --git a/ctest/affineAlign.t b/ctest/affineAlign.t
index 71e9bbe..df03825 100644
--- a/ctest/affineAlign.t
+++ b/ctest/affineAlign.t
@@ -3,15 +3,15 @@ Set up
Test affineAlign
$ rm -rf $OUTDIR/affineAlign.m0
- $ $EXEC $DATDIR/affineAlign.fofn $DATDIR/substr_with_ins.fasta -m 0 -out $OUTDIR/affineAlign.m0 -affineAlign -holeNumbers 493 -insertion 100 -deletion 100
+ $ $EXEC $DATDIR/affineAlign.fofn $DATDIR/substr_with_ins.fasta -m 0 --out $OUTDIR/affineAlign.m0 --affineAlign --holeNumbers 493 --insertion 100 --deletion 100
[INFO]* (glob)
[INFO]* (glob)
$ diff $OUTDIR/affineAlign.m0 $STDDIR/affineAlign_2014_06_10.m0
$ rm -rf $OUTDIR/ecoli_affine.m0
- $ $EXEC $DATDIR/ecoli_affine.fasta $DATDIR/ecoli_reference.fasta -m 0 -out $OUTDIR/ecoli_affine.m0 -affineAlign -insertion 100 -deletion 100
+ $ $EXEC $DATDIR/ecoli_affine.fasta $DATDIR/ecoli_reference.fasta -m 0 --out $OUTDIR/ecoli_affine.m0 --affineAlign --insertion 100 --deletion 100
[INFO]* (glob)
[INFO]* (glob)
$ diff $OUTDIR/ecoli_affine.m0 $STDDIR/ecoli_affine_2014_06_10.m0
-# Note that MapQV for -affineAlign has been fixed in 2014 04 18, bug 24363
+# Note that MapQV for --affineAlign has been fixed in 2014 04 18, bug 24363
diff --git a/ctest/aggressiveIntervalCut.t b/ctest/aggressiveIntervalCut.t
index 571e8fe..8743782 100644
--- a/ctest/aggressiveIntervalCut.t
+++ b/ctest/aggressiveIntervalCut.t
@@ -1,11 +1,11 @@
Set up
$ . $TESTDIR/setup.sh
-Test -aggressiveIntervalCut.
+Test --aggressiveIntervalCut.
$ rm -f $TMP1
$ BASFILE=/mnt/data3/vol53/2450598/0001/Analysis_Results/m130812_185809_42141_c100533960310000001823079711101380_s1_p0.bas.h5
$ REFFA=/mnt/secondary/Smrtpipe/repository/Ecoli_BL21_O26/sequence/Ecoli_BL21_O26.fasta
- $ $EXEC $BASFILE $REFFA -holeNumbers 1-100 -out $TMP1 -aggressiveIntervalCut
+ $ $EXEC $BASFILE $REFFA --holeNumbers 1--100 --out $TMP1 --aggressiveIntervalCut
[INFO] * [blasr] started. (glob)
[INFO] * [blasr] ended. (glob)
$ echo $?
diff --git a/ctest/alignScore.t b/ctest/alignScore.t
index 492f92e..d3eea9e 100644
--- a/ctest/alignScore.t
+++ b/ctest/alignScore.t
@@ -3,7 +3,7 @@ Set up
Test alignment score
$ rm -rf $OUTDIR/testscore.m0
- $ $EXEC $DATDIR/read.fasta $DATDIR/ref.fasta -minReadLength 1 -m 0 -out $OUTDIR/testscore.m0
+ $ $EXEC $DATDIR/read.fasta $DATDIR/ref.fasta --minReadLength 1 -m 0 --out $OUTDIR/testscore.m0
[INFO]* (glob)
[INFO]* (glob)
$ diff $OUTDIR/testscore.m0 $STDDIR/testscore.m0
diff --git a/ctest/bamConcordant.t b/ctest/bamConcordant.t
index 93d5691..fd910d6 100644
--- a/ctest/bamConcordant.t
+++ b/ctest/bamConcordant.t
@@ -1,8 +1,8 @@
Set up
$ . $TESTDIR/setup.sh
-Test using bam as input, use -concordant
- $ $EXEC $DATDIR/test_bam/tiny_bam.fofn $DATDIR/bamConcordantRef.fasta -bam -concordant -refineConcordantAlignments -bestn 1 -out $OUTDIR/bamConcordant.bam
+Test using bam as input, use --concordant
+ $ $EXEC $DATDIR/test_bam/tiny_bam.fofn $DATDIR/bamConcordantRef.fasta --bam --concordant --refineConcordantAlignments --bestn 1 --out $OUTDIR/bamConcordant.bam
[INFO]* (glob)
[INFO]* (glob)
@@ -25,7 +25,7 @@ Check whether sam out and bam out have identical alignments, not checking qvs
86?? (glob)
86?? (glob)
- $ $EXEC /pbi/dept/secondary/siv/testdata/SA3-RS/lambda/2372215/0007_tiny/Analysis_Results/m150404_101626_42267_c100807920800000001823174110291514_s1_p0.1.subreads.bam $DATDIR/lambda_ref.fasta -m 4 -concordant -bestn 1 -holeNumbers 17417 -out $OUTDIR/tmp.m4 -V 2 > $OUTDIR/bamConcordant.log
+ $ $EXEC /pbi/dept/secondary/siv/testdata/SA3-RS/lambda/2372215/0007_tiny/Analysis_Results/m150404_101626_42267_c100807920800000001823174110291514_s1_p0.1.subreads.bam $DATDIR/lambda_ref.fasta -m 4 --concordant --bestn 1 --holeNumbers 17417 --out $OUTDIR/tmp.m4 -V 2 > $OUTDIR/bamConcordant.log
[INFO]* (glob)
[INFO]* (glob)
diff --git a/ctest/bamIn.t b/ctest/bamIn.t
index d518c35..970759d 100644
--- a/ctest/bamIn.t
+++ b/ctest/bamIn.t
@@ -2,51 +2,20 @@ Set up
$ . $TESTDIR/setup.sh
Test using bam as input
- $ $EXEC $DATDIR/test_bam/tiny_bam.fofn $DATDIR/lambda_ref.fasta -m 4 -out $OUTDIR/tiny_bam_in.m4
+ $ $EXEC $DATDIR/test_bam/tiny_bam.fofn $DATDIR/lambda_ref.fasta -m 4 --out $OUTDIR/tiny_bam_in.m4
[INFO]* (glob)
[INFO]* (glob)
Check whether blasr produces identical results taking fasta sequences of the bam as input
- $ $EXEC $DATDIR/test_bam/tiny_fasta.fofn $DATDIR/lambda_ref.fasta -m 4 -out $OUTDIR/tiny_fasta_in.m4
+ $ $EXEC $DATDIR/test_bam/tiny_fasta.fofn $DATDIR/lambda_ref.fasta -m 4 --out $OUTDIR/tiny_fasta_in.m4
[INFO]* (glob)
[INFO]* (glob)
$ diff $OUTDIR/tiny_bam_in.m4 $OUTDIR/tiny_fasta_in.m4
-Test bam in, sam out
- $ $EXEC $DATDIR/test_bam/tiny_bam.fofn $DATDIR/lambda_ref.fasta -sam -out $OUTDIR/tiny_bam_in.sam -printSAMQV -clipping subread -cigarUseSeqMatch
- [INFO]* (glob)
- [INFO]* (glob)
-
Test bam in, bam out
- $ $EXEC $DATDIR/test_bam/tiny_bam.fofn $DATDIR/lambda_ref.fasta -bam -out $OUTDIR/tiny_bam_in.bam -clipping subread
- [INFO]* (glob)
- [INFO]* (glob)
-
-Check whether sam out and bam out have identical alignments, not checking qvs
- $ $SAMTOOLS view -h $OUTDIR/tiny_bam_in.bam -o $OUTDIR/tiny_bam_in.bam.sam
- $ cut -f 2-11 $OUTDIR/tiny_bam_in.bam.sam |sed -n '6,$p' > $TMP1.aln
- $ cut -f 2-11 $OUTDIR/tiny_bam_in.sam |sed -n '6,$p' > $TMP2.aln
- $ diff $TMP1.aln $TMP2.aln
-
-Check whether sam out and bam out have identical read groups @RG
- $ awk '/^@RG/' $OUTDIR/tiny_bam_in.bam.sam > $TMP1.rg
- $ awk '/^@RG/' $OUTDIR/tiny_bam_in.sam > $TMP2.rg
- $ diff $TMP1.rg $TMP2.rg
-
-Compare iq produced with stdout
- $ sed -n '6,$p' $OUTDIR/tiny_bam_in.bam.sam | awk '{gsub(/\t/,"\n");}1' | awk '/^iq:Z:/' > $TMP1.iq
- $ sed -n '6,$p' $STDDIR/$UPDATEDATE/tiny_bam_in.bam.sam | awk '{gsub(/\t/,"\n");}1' | awk '/^iq:Z:/' > $TMP2.iq
- $ diff $TMP1.iq $TMP2.iq
-
-TODO:Check whether sam out and bam out have identical insertion qvs
-Currently QVs in bam are in 'native' orientation, and QVs in sam are in 'genomic' orientation. This needs to be fixed.
-$ sed -n '6,$p' $OUTDIR/tiny_bam_in.sam | awk '{gsub(/\t/,"\n");}1' | awk '/^iq:Z:/' > $TMP2.iq
-
-Test with multiple nproc
- $ $EXEC $DATDIR/test_bam/two_bam.fofn $DATDIR/lambda_ref.fasta -bam -nproc 15 -out $OUTDIR/two_bam_in.bam
+ $ $EXEC $DATDIR/test_bam/tiny_bam.fofn $DATDIR/lambda_ref.fasta --bam --out $OUTDIR/tiny_bam_in.bam --clipping subread
[INFO]* (glob)
[INFO]* (glob)
- $ $SAMTOOLS view -h $OUTDIR/two_bam_in.bam -o $OUTDIR/two_bam_in.bam.sam
-TODO: test -concordant, when pbbam API to query over ZMWs is available.
+TODO: test --concordant, when pbbam API to query over ZMWs is available.
TODO: test bam with ccs reads
diff --git a/ctest/bamOut.t b/ctest/bamOut.t
index a15b811..0e8c6fa 100644
--- a/ctest/bamOut.t
+++ b/ctest/bamOut.t
@@ -4,11 +4,11 @@ Set up
Test generating bam output
Input is bam, clipping=soft and subread should produce identical results
- $ $EXEC $DATDIR/test_bam/tiny_bam.fofn $DATDIR/lambda_ref.fasta -bam -out $OUTDIR/tiny_bam_in_soft.bam -clipping soft
+ $ $EXEC $DATDIR/test_bam/tiny_bam.fofn $DATDIR/lambda_ref.fasta --bam --out $OUTDIR/tiny_bam_in_soft.bam --clipping soft
[INFO]* (glob)
[INFO]* (glob)
- $ $EXEC $DATDIR/test_bam/tiny_bam.fofn $DATDIR/lambda_ref.fasta -bam -out $OUTDIR/tiny_bam_in_subread.bam -clipping subread
+ $ $EXEC $DATDIR/test_bam/tiny_bam.fofn $DATDIR/lambda_ref.fasta --bam --out $OUTDIR/tiny_bam_in_subread.bam --clipping subread
[INFO]* (glob)
[INFO]* (glob)
diff --git a/ctest/bug25328.t b/ctest/bug25328.t
index 2127433..12bb118 100644
--- a/ctest/bug25328.t
+++ b/ctest/bug25328.t
@@ -5,7 +5,7 @@ bug_25328, unrolled resequencing test
$ INFA=$DATDIR/bug_25328_zmw_38131.fasta
$ REF=$DATDIR/All4mers_circular_72x_l50256.fasta
$ OUTFA=$OUTDIR/bug_25328.m4
- $ $EXEC $INFA $REF -bestn 1 -nCandidates 1 -forwardOnly -maxMatch 14 -m 4 -out $OUTFA
+ $ $EXEC $INFA $REF --bestn 1 --nCandidates 1 --forwardOnly --maxMatch 14 -m 4 --out $OUTFA
[INFO]* (glob)
[INFO]* (glob)
diff --git a/ctest/bug25741.t b/ctest/bug25741.t
deleted file mode 100644
index d29829a..0000000
--- a/ctest/bug25741.t
+++ /dev/null
@@ -1,9 +0,0 @@
-Set up
- $ . $TESTDIR/setup.sh
-
-bug_25741, if input bas.h5 does not contain mergeQV, blasr with -printSAMQV, -nproc>1 should not write garbage 'mq' values to output.
- $ $EXEC $DATDIR/bas_wo_mergeQV.fofn $DATDIR/lambda_ref.fasta -printSAMQV -sam -clipping subread -out $OUTDIR/out_printSAMQV.sam -nproc 12
- [INFO]* (glob)
- [INFO]* (glob)
- $ grep 'mq' $OUTDIR/out_printSAMQV.sam |wc -l
- 1
diff --git a/ctest/bug25766.t b/ctest/bug25766.t
index eaf9e06..3d05b2a 100644
--- a/ctest/bug25766.t
+++ b/ctest/bug25766.t
@@ -1,10 +1,10 @@
Set up
$ . $TESTDIR/setup.sh
-bug_25766, added an option -minRawSubreadScore
+bug_25766, added an option --minRawSubreadScore
$ BASFILE=$DATDIR/lambda_bax.fofn
$ REF=$DATDIR/lambda_ref.fasta
- $ $EXEC $BASFILE $REF -out $TMP1 -minRawSubreadScore 700 -nproc 18
+ $ $EXEC $BASFILE $REF --out $TMP1 --minRawSubreadScore 700 --nproc 18
[INFO]* (glob)
[INFO]* (glob)
$ echo $?
diff --git a/ctest/ccsH5.t b/ctest/ccsH5.t
index b3e0477..63e4217 100644
--- a/ctest/ccsH5.t
+++ b/ctest/ccsH5.t
@@ -3,9 +3,9 @@ Set up
Test using *.ccs.h5 as input
# The results should be exactly the same as
-# blasr $DATDIR/ccsasinput_bas.fofn $DATDIR/ccsasinput.fasta -m 4 -out tmp.m4 -useccsdenovo
+# blasr $DATDIR/ccsasinput_bas.fofn $DATDIR/ccsasinput.fasta -m 4 --out tmp.m4 --useccsdenovo
$ rm -rf $OUTDIR/ccsasinput.m4
- $ $EXEC $DATDIR/ccsasinput.fofn $DATDIR/ccsasinput.fasta -m 4 -out $OUTDIR/ccsasinput.m4
+ $ $EXEC $DATDIR/ccsasinput.fofn $DATDIR/ccsasinput.fasta -m 4 --out $OUTDIR/ccsasinput.m4
[INFO]* (glob)
[INFO]* (glob)
$ diff $OUTDIR/ccsasinput.m4 $STDDIR/ccsasinput_2014_06_10.m4
diff --git a/ctest/cigarAdjecentIndels.t b/ctest/cigarAdjecentIndels.t
index 698f3a3..c861b8a 100644
--- a/ctest/cigarAdjecentIndels.t
+++ b/ctest/cigarAdjecentIndels.t
@@ -1,8 +1,8 @@
Set up
$ . $TESTDIR/setup.sh
-Without -allowAdjacentIndels, adjacent indels should not exist in SAM/BAM CIGAR strings
- $ $EXEC $DATDIR/test_dataset/nofilter.subreadset.xml $DATDIR/ecoli_reference.fasta -bam -out $OUTDIR/noAdjacentIndels.bam -concordant -refineConcordantAlignments -bestn 1 && echo $?
+Without --allowAdjacentIndels, adjacent indels should not exist in SAM/BAM CIGAR strings
+ $ $EXEC $DATDIR/test_dataset/nofilter.subreadset.xml $DATDIR/ecoli_reference.fasta --bam --out $OUTDIR/noAdjacentIndels.bam --concordant --refineConcordantAlignments --bestn 1 && echo $?
[INFO]* (glob)
[INFO]* (glob)
0
@@ -15,8 +15,8 @@ Without -allowAdjacentIndels, adjacent indels should not exist in SAM/BAM CIGAR
$ grep 'DI' $TMP1 |wc -l
0
-With -allowAdjacentIndels
- $ $EXEC $DATDIR/test_dataset/nofilter.subreadset.xml $DATDIR/ecoli_reference.fasta -bam -out $OUTDIR/allowAdjacentIndels.bam -concordant -bestn 1 -allowAdjacentIndels && echo $?
+With --allowAdjacentIndels
+ $ $EXEC $DATDIR/test_dataset/nofilter.subreadset.xml $DATDIR/ecoli_reference.fasta --bam --out $OUTDIR/allowAdjacentIndels.bam --concordant --bestn 1 --allowAdjacentIndels && echo $?
[INFO]* (glob)
[INFO]* (glob)
0
diff --git a/ctest/concordant.t b/ctest/concordant.t
index 5b3b6aa..e0b0225 100644
--- a/ctest/concordant.t
+++ b/ctest/concordant.t
@@ -1,11 +1,13 @@
Set up
$ . $TESTDIR/setup.sh
-Test -concordant
+Test --concordant
+ $ rm -rf $OUTDIR/concordant_subset.bam
$ rm -rf $OUTDIR/concordant_subset.sam
- $ $EXEC $DATDIR/ecoli_lp.fofn $DATDIR/ecoli_reference.fasta -concordant -refineConcordantAlignments -sam -out $OUTDIR/concordant_subset.sam -nproc 12 -holeNumbers 1-10000 -sa $DATDIR/ecoli_reference.sa
+ $ $EXEC $DATDIR/ecoli_lp.fofn $DATDIR/ecoli_reference.fasta --concordant --refineConcordantAlignments --bam --out $OUTDIR/concordant_subset.bam --nproc 12 --holeNumbers 1--10000 --sa $DATDIR/ecoli_reference.sa
[INFO]* (glob)
[INFO]* (glob)
+ $ $SAMTOOLS view $OUTDIR/concordant_subset.bam > $OUTDIR/concordant_subset.sam
$ sed -n 6,110864p $OUTDIR/concordant_subset.sam > $OUTDIR/tmp1
$ sort $OUTDIR/tmp1 > $OUTDIR/tmp11
$ sed -n 6,110864p $STDDIR/$UPDATEDATE/concordant_subset.sam > $OUTDIR/tmp2
@@ -15,19 +17,19 @@ Test -concordant
#2014_05_28 --> changelist 135254, use MAX_BAND_SIZE to contrain GuidedAlign
#2014_08_21 --> changelist 138516, added YS, YE, ZM tags.
#2014_08_28 --> changelist 139176, update SAM MD5
-#2014_09_12 --> changelist 140410, changed the default value of '-concordantTemplate' from 'longestsubread' to 'typicalsubread'
+#2014_09_12 --> changelist 140410, changed the default value of '--concordantTemplate' from 'longestsubread' to 'typicalsubread'
#2014_09_17 --> changelist 140573, changed SDPFragment LessThan to make sure blasr compiled with gcc 4.4 and 4.8 can produce identical results.
-#2014_10_16 --> changelist 141378, changed the default value of '-concordantTemplate' from 'typicalsubread' to 'mediansubread'
+#2014_10_16 --> changelist 141378, changed the default value of '--concordantTemplate' from 'typicalsubread' to 'mediansubread'
#2015_03_01 --> changelist 146599, reads from the same movie should have unique readGroupId
#2015_03_28 --> changelist 148101, 148080 updated read group id, 148100 updated TLEN
#2015_04_09 --> changelist 148796, updated read group id
#2015_04_25 --> changelist 149721, update CIGAR string, replace M with X=.
#2015_04_25 --> changelist ?, force refine all concordant alignments
-Test -concordant FMR1 case (the 'typical subread' is selected as template for concordant mapping)
+Test --concordant FMR1 case (the 'typical subread' is selected as template for concordant mapping)
$ FOFN=$DATDIR/FMR1_concordant.fofn
$ REF=$DATDIR/FMR1_130CGG.fasta
- $ $EXEC $FOFN $REF -concordant -refineConcordantAlignments -out $OUTDIR/FMR1_zmw_37927.m4 -m 4 -holeNumbers 37927
+ $ $EXEC $FOFN $REF --concordant --refineConcordantAlignments --out $OUTDIR/FMR1_zmw_37927.m4 -m 4 --holeNumbers 37927
[INFO]* (glob)
[INFO]* (glob)
$ diff $OUTDIR/FMR1_zmw_37927.m4 $STDDIR/$UPDATEDATE/FMR1_zmw_37927.m4
diff --git a/ctest/dataset.t b/ctest/dataset.t
index 9a14ec8..873d50e 100644
--- a/ctest/dataset.t
+++ b/ctest/dataset.t
@@ -2,7 +2,7 @@ Set up
$ . $TESTDIR/setup.sh
Test dataset.xml as input
- $ $EXEC $DATDIR/test_dataset/chunking.subreadset.xml $DATDIR/ecoli_reference.fasta -m 4 -out $OUTDIR/chunking.m4 -bestn 1 && echo $?
+ $ $EXEC $DATDIR/test_dataset/chunking.subreadset.xml $DATDIR/ecoli_reference.fasta -m 4 --out $OUTDIR/chunking.m4 --bestn 1 && echo $?
[INFO]* (glob)
[INFO]* (glob)
0
@@ -10,20 +10,20 @@ Test filters in dataset.xml is respected.
$ cat $OUTDIR/chunking.m4 | wc -l
9
-Test dataset.xml -bam output
- $ $EXEC $DATDIR/test_dataset/chunking.subreadset.xml $DATDIR/ecoli_reference.fasta -bam -out $OUTDIR/chunking.bam && echo $?
+Test dataset.xml --bam output
+ $ $EXEC $DATDIR/test_dataset/chunking.subreadset.xml $DATDIR/ecoli_reference.fasta --bam --out $OUTDIR/chunking.bam && echo $?
[INFO]* (glob)
[INFO]* (glob)
0
-Test dataset.xml -concordant
- $ $EXEC $DATDIR/test_dataset/chunking.subreadset.xml $DATDIR/ecoli_reference.fasta -bam -out $OUTDIR/chunking.concordant.bam -concordant && echo $?
+Test dataset.xml --concordant
+ $ $EXEC $DATDIR/test_dataset/chunking.subreadset.xml $DATDIR/ecoli_reference.fasta --bam --out $OUTDIR/chunking.concordant.bam --concordant && echo $?
[INFO]* (glob)
[INFO]* (glob)
0
Test dataset with no filters (to make sure that an empty filter does not discard all bam records.)
- $ $EXEC $DATDIR/test_dataset/nofilter.subreadset.xml $DATDIR/ecoli_reference.fasta -bam -out $OUTDIR/nofilter.bam -concordant -bestn 1 && echo $?
+ $ $EXEC $DATDIR/test_dataset/nofilter.subreadset.xml $DATDIR/ecoli_reference.fasta --bam --out $OUTDIR/nofilter.bam --concordant --bestn 1 && echo $?
[INFO]* (glob)
[INFO]* (glob)
0
@@ -32,8 +32,8 @@ Test dataset with no filters (to make sure that an empty filter does not discard
131
-Test dataset with -concordant is on
- $ $EXEC $DATDIR/test_dataset/nofilter.subreadset.xml $DATDIR/bamConcordantRef.fasta -bam -concordant -refineConcordantAlignments -bestn 1 -out $OUTDIR/datasetConcordant.bam -holeNumbers 1898 && echo $?
+Test dataset with --concordant is on
+ $ $EXEC $DATDIR/test_dataset/nofilter.subreadset.xml $DATDIR/bamConcordantRef.fasta --bam --concordant --refineConcordantAlignments --bestn 1 --out $OUTDIR/datasetConcordant.bam --holeNumbers 1898 && echo $?
[INFO]* (glob)
[INFO]* (glob)
0
diff --git a/ctest/deterministic.t b/ctest/deterministic.t
index 8a1214c..16bfe4a 100644
--- a/ctest/deterministic.t
+++ b/ctest/deterministic.t
@@ -13,7 +13,7 @@ and then check if output is determined.
$ outfile=$OUTDIR/$name.m4
$ stdfile=$STDDIR/$name.m4
$ rm -f $outfile
- $ $EXEC $infile $DATDIR/lambda_ref.fasta -m 4 -out $outfile && echo $?
+ $ $EXEC $infile $DATDIR/lambda_ref.fasta -m 4 --out $outfile && echo $?
[INFO]* (glob)
[INFO]* (glob)
0
@@ -26,7 +26,7 @@ and then check if output is determined.
$ outfile=$OUTDIR/$name.m4
$ stdfile=$STDDIR/$name.m4
$ rm -f $outfile
- $ $EXEC $infile $DATDIR/lambda_ref.fasta -m 4 -out $outfile && echo $?
+ $ $EXEC $infile $DATDIR/lambda_ref.fasta -m 4 --out $outfile && echo $?
[INFO]* (glob)
[INFO]* (glob)
0
@@ -39,7 +39,7 @@ and then check if output is determined.
$ outfile=$OUTDIR/$name.m4
$ stdfile=$STDDIR/$name.m4
$ rm -f $outfile
- $ $EXEC $infile $DATDIR/lambda_ref.fasta -m 4 -out $outfile && echo $?
+ $ $EXEC $infile $DATDIR/lambda_ref.fasta -m 4 --out $outfile && echo $?
[INFO]* (glob)
[INFO]* (glob)
0
diff --git a/ctest/ecoli.t b/ctest/ecoli.t
index c6291a2..07940fc 100644
--- a/ctest/ecoli.t
+++ b/ctest/ecoli.t
@@ -2,18 +2,20 @@ Set up
$ . $TESTDIR/setup.sh
Test blasr on ecoli.
-Test blasr with -sam
+Test blasr with --bam
# The following job takes a very long time to finish, let us use a subset of reads instead
#See $STDOUT/ecoli_v1.4.sam for 1.4 output.
# $STDOUT/ecoli_2014_03_28.sam for bug before mapQV for affineAlign/align without QV is fixed.
+ $ rm -rf $OUTDIR/ecoli_subset.bam
$ rm -rf $OUTDIR/ecoli_subset.sam
- $ $EXEC $DATDIR/ecoli_subset.fasta $DATDIR/ecoli_reference.fasta -sam -out $OUTDIR/ecoli_subset.sam -nproc 15
+ $ $EXEC $DATDIR/ecoli_subset.fasta $DATDIR/ecoli_reference.fasta --bam --out $OUTDIR/ecoli_subset.bam --nproc 15
[INFO]* (glob)
[INFO]* (glob)
- $ sed -n '5,$ p' $OUTDIR/ecoli_subset.sam | sort | cut -f 1-11 > $TMP1
- $ sed -n '5,$ p' $STDDIR/$UPDATEDATE/ecoli_subset.sam | sort | cut -f 1-11 > $TMP2
+ $ $SAMTOOLS view $OUTDIR/ecoli_subset.bam > $OUTDIR/ecoli_subset.sam
+ $ sed -n '5,$ p' $OUTDIR/ecoli_subset.sam | sort | cut -f 1--11 > $TMP1
+ $ sed -n '5,$ p' $STDDIR/$UPDATEDATE/ecoli_subset.sam | sort | cut -f 1--11 > $TMP2
$ diff $TMP1 $TMP2
$ rm $TMP1 $TMP2
# 2015_03_08 --> changelist 148101, 148080 updated read group id; 148100 updated TLEN
diff --git a/ctest/fastMaxInterval.t b/ctest/fastMaxInterval.t
index 323fab1..67a687b 100644
--- a/ctest/fastMaxInterval.t
+++ b/ctest/fastMaxInterval.t
@@ -1,11 +1,11 @@
Set up
$ . $TESTDIR/setup.sh
-Test -fastMaxInterval.
+Test --fastMaxInterval.
$ rm -f $TMP1
$ BASFILE=/mnt/data3/vol53/2450598/0001/Analysis_Results/m130812_185809_42141_c100533960310000001823079711101380_s1_p0.bas.h5
$ REFFA=/mnt/secondary/Smrtpipe/repository/Ecoli_BL21_O26/sequence/Ecoli_BL21_O26.fasta
- $ $EXEC $BASFILE $REFFA -holeNumbers 1-100 -out $TMP1 -fastMaxInterval
+ $ $EXEC $BASFILE $REFFA --holeNumbers 1--100 --out $TMP1 --fastMaxInterval
[INFO] * [blasr] started. (glob)
[INFO] * [blasr] ended. (glob)
$ echo $?
diff --git a/ctest/filtercriteria.t b/ctest/filtercriteria.t
index 9cec9dc..a0f4460 100644
--- a/ctest/filtercriteria.t
+++ b/ctest/filtercriteria.t
@@ -7,12 +7,12 @@ Set up
$ STDDIR=$STDDIR/$NAME
$ mkdir -p $OUTDIR
-Test -minPctSimilarity
+Test --minPctSimilarity
$ I=$DATDIR/tiny_bam.fofn
$ R=$DATDIR/lambdaNEB.fa
$ O=$OUTDIR/min_pct_similarity_90.m4
- $ $EXEC $I $R -out $O -m 4 -minPctSimilarity 90
+ $ $EXEC $I $R --out $O -m 4 --minPctSimilarity 90
[INFO]* (glob)
[INFO]* (glob)
$ echo $?
@@ -21,7 +21,7 @@ Test -minPctSimilarity
0
$ O=$OUTDIR/min_aln_len_1000.m4
- $ $EXEC $I $R -out $O -m 4 -minAlnLength 1000
+ $ $EXEC $I $R --out $O -m 4 --minAlnLength 1000
[INFO]* (glob)
[INFO]* (glob)
$ echo $?
diff --git a/ctest/fofn.t b/ctest/fofn.t
index 348b6f7..dfdbd42 100644
--- a/ctest/fofn.t
+++ b/ctest/fofn.t
@@ -3,7 +3,7 @@ Set up
Test blasr with *.fofn input
# $ rm -rf $OUTDIR/lambda_bax.m4
-# $ $EXEC $DATDIR/lambda_bax.fofn $DATDIR/lambda_ref.fasta -m 4 -out lambda_bax_tmp.m4 -nproc 15 -minMatch 14
+# $ $EXEC $DATDIR/lambda_bax.fofn $DATDIR/lambda_ref.fasta -m 4 --out lambda_bax_tmp.m4 --nproc 15 --minMatch 14
# [INFO]* (glob)
# [INFO]* (glob)
# $ sort lambda_bax_tmp.m4 > $OUTDIR/lambda_bax.m4
@@ -11,7 +11,7 @@ Test blasr with *.fofn input
# This test takes a long time, use a subset instad.
$ rm -rf $OUTDIR/lambda_bax_subset.m4
- $ $EXEC $DATDIR/lambda_bax.fofn $DATDIR/lambda_ref.fasta -m 4 -out $OUTDIR/lambda_bax_tmp_subset.m4 -nproc 15 -minMatch 14 -holeNumbers 1-1000 -sa $DATDIR/lambda_ref.sa
+ $ $EXEC $DATDIR/lambda_bax.fofn $DATDIR/lambda_ref.fasta -m 4 --out $OUTDIR/lambda_bax_tmp_subset.m4 --nproc 15 --minMatch 14 --holeNumbers 1--1000 --sa $DATDIR/lambda_ref.sa
[INFO]* (glob)
[INFO]* (glob)
$ sort $OUTDIR/lambda_bax_tmp_subset.m4 > $OUTDIR/lambda_bax_subset.m4
diff --git a/ctest/hitpolicy.t b/ctest/hitpolicy.t
index 2a90e19..becf826 100644
--- a/ctest/hitpolicy.t
+++ b/ctest/hitpolicy.t
@@ -13,7 +13,7 @@ Set up
$ X=$STDDIR/hitpolicy_all.m4
Test hitpolicy all
- $ $EXEC $I $R -out $O -m 4 -hitPolicy all
+ $ $EXEC $I $R --out $O -m 4 --hitPolicy all
[INFO]* (glob)
[INFO]* (glob)
$ echo $?
@@ -24,7 +24,7 @@ Test hitpolicy all
Test hitpolicy allbest
$ O=$OUTDIR/hitpolicy_allbest.m4
$ X=$STDDIR/hitpolicy_allbest.m4
- $ $EXEC $I $R -out $O -m 4 -hitPolicy allbest && sort $O > $TMP1 && mv $TMP1 $O
+ $ $EXEC $I $R --out $O -m 4 --hitPolicy allbest && sort $O > $TMP1 && mv $TMP1 $O
[INFO]* (glob)
[INFO]* (glob)
$ echo $?
@@ -37,10 +37,10 @@ Test hitpolicy random
$ O=$OUTDIR/hitpolicy_random.m4
$ O2=$OUTDIR/hitpolicy_random_2.m4
$ X=$STDDIR/hitpolicy_random.m4
- $ $EXEC $I $R -out $O -m 4 -hitPolicy random -randomSeed 1
+ $ $EXEC $I $R --out $O -m 4 --hitPolicy random --randomSeed 1
[INFO]* (glob)
[INFO]* (glob)
- $ $EXEC $I $R -out $O2 -m 4 -hitPolicy random -randomSeed 1
+ $ $EXEC $I $R --out $O2 -m 4 --hitPolicy random --randomSeed 1
[INFO]* (glob)
[INFO]* (glob)
$ sort $O > $TMP1 && mv $TMP1 $O
@@ -52,10 +52,10 @@ Test hitpolicy randombest bam inputs, nproc > 1, fixed seed
$ O=$OUTDIR/hitpolicy_randombest_bam_in.m4
$ O2=$OUTDIR/hitpolicy_randombest_bam_in_2.m4
$ X=$STDDIR/hitpolicy_randombest_bam_in.m4
- $ $EXEC $I $R -out $O -m 4 -hitPolicy randombest -randomSeed 1 -nproc 10
+ $ $EXEC $I $R --out $O -m 4 --hitPolicy randombest --randomSeed 1 --nproc 10
[INFO]* (glob)
[INFO]* (glob)
- $ $EXEC $I $R -out $O2 -m 4 -hitPolicy randombest -randomSeed 1 -nproc 10
+ $ $EXEC $I $R --out $O2 -m 4 --hitPolicy randombest --randomSeed 1 --nproc 10
[INFO]* (glob)
[INFO]* (glob)
$ sort $O > $TMP1 && mv $TMP1 $O
@@ -67,7 +67,7 @@ Test hitpolicy randombest bax inputs, nproc > 1, fixed seed
$ I=$DATDIR/tiny_bax.fofn
$ O=$OUTDIR/hitpolicy_randombest_bax_in.m4
$ X=$STDDIR/hitpolicy_randombest_bax_in.m4
- $ $EXEC $I $R -out $O -m 4 -hitPolicy randombest -randomSeed 1 -nproc 10
+ $ $EXEC $I $R --out $O -m 4 --hitPolicy randombest --randomSeed 1 --nproc 10
[INFO]* (glob)
[INFO]* (glob)
$ sort $O > $TMP1 && mv $TMP1 $O
@@ -78,7 +78,7 @@ Test hitpolicy randombest fasta inputs, nproc > 1, fixed seed
$ I=$DATDIR/tiny_fasta.fofn
$ O=$OUTDIR/hitpolicy_randombest_fasta_in.m4
$ X=$STDDIR/hitpolicy_randombest_fasta_in.m4
- $ $EXEC $I $R -out $O -m 4 -hitPolicy randombest -randomSeed 1 -nproc 10
+ $ $EXEC $I $R --out $O -m 4 --hitPolicy randombest --randomSeed 1 --nproc 10
[INFO]* (glob)
[INFO]* (glob)
$ sort $O > $TMP1 && mv $TMP1 $O
@@ -88,7 +88,7 @@ Test hitpolicy randombest fasta inputs, nproc > 1, fixed seed
Test hitpolicy leftmost
$ O=$OUTDIR/hitpolicy_leftmost.m4
$ X=$STDDIR/hitpolicy_leftmost.m4
- $ $EXEC $I $R -out $O -m 4 -hitPolicy leftmost -nproc 10
+ $ $EXEC $I $R --out $O -m 4 --hitPolicy leftmost --nproc 10
[INFO]* (glob)
[INFO]* (glob)
$ # target is lambda x 6, leftmost -> only map to the very first x.
diff --git a/ctest/holeNumbers.t b/ctest/holeNumbers.t
index 427f31a..7f2e292 100644
--- a/ctest/holeNumbers.t
+++ b/ctest/holeNumbers.t
@@ -1,9 +1,9 @@
Set up
$ . $TESTDIR/setup.sh
-Test -holeNumbers
+Test --holeNumbers
$ rm -f $OUTDIR/holeNumbers.m4
- $ $EXEC $DATDIR/lambda_bax.fofn $DATDIR/lambda_ref.fasta -m 4 -out $OUTDIR/holeNumbers.m4 -holeNumbers 14798,55000-55100 -nproc 8
+ $ $EXEC $DATDIR/lambda_bax.fofn $DATDIR/lambda_ref.fasta -m 4 --out $OUTDIR/holeNumbers.m4 --holeNumbers 14798,55000--55100 --nproc 8
[INFO]* (glob)
[INFO]* (glob)
$ sort $OUTDIR/holeNumbers.m4 > $TMP1
diff --git a/ctest/m0-5.t b/ctest/m0-5.t
index 84182b3..34c12e0 100644
--- a/ctest/m0-5.t
+++ b/ctest/m0-5.t
@@ -3,31 +3,31 @@ Set up
Test blasr with -m 0 ~ 5
$ rm -rf $OUTDIR/read.m0
- $ $EXEC $DATDIR/read.fasta $DATDIR/ref.fasta -m 0 -out $OUTDIR/read.m0
+ $ $EXEC $DATDIR/read.fasta $DATDIR/ref.fasta -m 0 --out $OUTDIR/read.m0
[INFO]* (glob)
[INFO]* (glob)
$ diff $OUTDIR/read.m0 $STDDIR/read.m0
$ rm -rf $OUTDIR/read.m1
- $ $EXEC $DATDIR/read.fasta $DATDIR/ref.fasta -m 1 -out $OUTDIR/read.m1
+ $ $EXEC $DATDIR/read.fasta $DATDIR/ref.fasta -m 1 --out $OUTDIR/read.m1
[INFO]* (glob)
[INFO]* (glob)
$ diff $OUTDIR/read.m1 $STDDIR/read_2014_05_29.m1
$ rm -rf $OUTDIR/read.m2
- $ $EXEC $DATDIR/read.fasta $DATDIR/ref.fasta -m 2 -out $OUTDIR/read.m2
+ $ $EXEC $DATDIR/read.fasta $DATDIR/ref.fasta -m 2 --out $OUTDIR/read.m2
[INFO]* (glob)
[INFO]* (glob)
$ diff $OUTDIR/read.m2 $STDDIR/read.m2
$ rm -rf $OUTDIR/read.m3
- $ $EXEC $DATDIR/read.fasta $DATDIR/ref.fasta -m 3 -out $OUTDIR/read.m3
+ $ $EXEC $DATDIR/read.fasta $DATDIR/ref.fasta -m 3 --out $OUTDIR/read.m3
[INFO]* (glob)
[INFO]* (glob)
$ diff $OUTDIR/read.m3 $STDDIR/read.m3
$ rm -rf $OUTDIR/read.m4
- $ $EXEC $DATDIR/read.fasta $DATDIR/ref.fasta -m 4 -out $OUTDIR/read.m4
+ $ $EXEC $DATDIR/read.fasta $DATDIR/ref.fasta -m 4 --out $OUTDIR/read.m4
[INFO]* (glob)
[INFO]* (glob)
$ diff $OUTDIR/read.m4 $STDDIR/read.m4
diff --git a/ctest/multipart.t b/ctest/multipart.t
index 2cf6e81..5e824d7 100644
--- a/ctest/multipart.t
+++ b/ctest/multipart.t
@@ -6,7 +6,7 @@ contain any /PulseData, instead contains /MultiPart/Parts.
$ rm -f $TMP1
$ BASFILE=/mnt/data3/vol53/2450598/0001/Analysis_Results/m130812_185809_42141_c100533960310000001823079711101380_s1_p0.bas.h5
$ REFFA=/mnt/secondary/Smrtpipe/repository/Ecoli_BL21_O26/sequence/Ecoli_BL21_O26.fasta
- $ $EXEC $BASFILE $REFFA -holeNumbers 1-100 -out $TMP1
+ $ $EXEC $BASFILE $REFFA --holeNumbers 1--100 --out $TMP1
[INFO] * [blasr] started. (glob)
[INFO] * [blasr] ended. (glob)
$ echo $?
diff --git a/ctest/noSplitSubreads.t b/ctest/noSplitSubreads.t
index 3667cbd..ed4a192 100644
--- a/ctest/noSplitSubreads.t
+++ b/ctest/noSplitSubreads.t
@@ -1,9 +1,9 @@
Set up
$ . $TESTDIR/setup.sh
-Test blasr with -noSplitSubreads
+Test blasr with --noSplitSubreads
# $ rm -rf $OUTDIR/lambda_bax_noSplitSubreads.m4
-# $ $EXEC $DATDIR/lambda_bax.fofn $DATDIR/lambda_ref.fasta -noSplitSubreads -m 4 -out lambda_bax_noSplitSubreads_tmp.m4 -nproc 15
+# $ $EXEC $DATDIR/lambda_bax.fofn $DATDIR/lambda_ref.fasta --noSplitSubreads -m 4 --out lambda_bax_noSplitSubreads_tmp.m4 --nproc 15
# [INFO]* (glob)
# [INFO]* (glob)
# $ sort lambda_bax_noSplitSubreads_tmp.m4 > $OUTDIR/lambda_bax_noSplitSubreads.m4
@@ -11,7 +11,7 @@ Test blasr with -noSplitSubreads
# This test takes a long time, use a subset instad.
$ rm -rf $OUTDIR/lambda_bax_noSplitSubreads_subset.m4
- $ $EXEC $DATDIR/lambda_bax.fofn $DATDIR/lambda_ref.fasta -noSplitSubreads -m 4 -out $OUTDIR/lambda_bax_noSplitSubreads_tmp_subset.m4 -nproc 15 -holeNumbers 1-1000 -sa $DATDIR/lambda_ref.sa
+ $ $EXEC $DATDIR/lambda_bax.fofn $DATDIR/lambda_ref.fasta --noSplitSubreads -m 4 --out $OUTDIR/lambda_bax_noSplitSubreads_tmp_subset.m4 --nproc 15 --holeNumbers 1--1000 --sa $DATDIR/lambda_ref.sa
[INFO]* (glob)
[INFO]* (glob)
$ sort $OUTDIR/lambda_bax_noSplitSubreads_tmp_subset.m4 > $OUTDIR/lambda_bax_noSplitSubreads_subset.m4
diff --git a/ctest/samNM.t b/ctest/samNM.t
deleted file mode 100644
index 93ae89c..0000000
--- a/ctest/samNM.t
+++ /dev/null
@@ -1,13 +0,0 @@
-Set up
- $ . $TESTDIR/setup.sh
-
-Test Sam out nm tag
- $ rm -rf $OUTDIR/read.sam
- $ $EXEC $DATDIR/read.fasta $DATDIR/ref.fasta -sam -out $OUTDIR/read.sam
- [INFO]* (glob)
- [INFO]* (glob)
- $ tail -n+5 $OUTDIR/read.sam |cut -f 21
- NM:i:2
- NM:i:3
- NM:i:2
- NM:i:4
diff --git a/ctest/setup.sh b/ctest/setup.sh
index f75e4fb..8ae52b1 100755
--- a/ctest/setup.sh
+++ b/ctest/setup.sh
@@ -6,7 +6,8 @@ OUTDIR=$CURDIR/out
STDDIR=$REMOTEDIR/stdout
# Set up the executable: blasr.
-EXEC=${BLASR_PATH}/blasr
+#EXEC=${BLASR_PATH}/blasr
+EXEC=blasr
# Define tmporary files
TMP1=$OUTDIR/$$.tmp.out
diff --git a/ctest/unaligned.t b/ctest/unaligned.t
index 3340b0c..08b47f2 100644
--- a/ctest/unaligned.t
+++ b/ctest/unaligned.t
@@ -2,7 +2,7 @@ Set up
$ . $TESTDIR/setup.sh
Test dataset.xml as input
- $ $EXEC $DATDIR/test_dataset/chunking.subreadset.xml $DATDIR/ecoli_reference.fasta -unaligned $OUTDIR/unaligned.txt -noPrintUnalignedSeqs -concordant 1>/dev/null && echo $?
+ $ $EXEC $DATDIR/test_dataset/chunking.subreadset.xml $DATDIR/ecoli_reference.fasta --unaligned $OUTDIR/unaligned.txt --noPrintUnalignedSeqs --concordant 1>/dev/null && echo $?
[INFO]* (glob)
[INFO]* (glob)
0
@@ -13,7 +13,7 @@ Test dataset.xml as input
m150404_101626_42267_c100807920800000001823174110291514_s1_p0/480/12033_13456
m150404_101626_42267_c100807920800000001823174110291514_s1_p0/480/13519_14067
- $ $EXEC $DATDIR/ecoli_subset.fasta $DATDIR/ecoli_reference.fasta -unaligned $OUTDIR/unaligned.txt -noPrintUnalignedSeqs 1>/dev/null && echo $?
+ $ $EXEC $DATDIR/ecoli_subset.fasta $DATDIR/ecoli_reference.fasta --unaligned $OUTDIR/unaligned.txt --noPrintUnalignedSeqs 1>/dev/null && echo $?
[INFO]* (glob)
[INFO]* (glob)
0
diff --git a/ctest/useccsallBestN1.t b/ctest/useccsallBestN1.t
index 32fafc7..659b3b8 100644
--- a/ctest/useccsallBestN1.t
+++ b/ctest/useccsallBestN1.t
@@ -1,10 +1,11 @@
Set up
$ . $TESTDIR/setup.sh
-Test -useccsall with bestn = 1
- $ $EXEC $DATDIR/ccstest.fofn $DATDIR/ccstest_ref.fasta -bestn 1 -useccsall -sam -out $OUTDIR/useccsall.sam -holeNumbers 76772
+Test --useccsall with bestn = 1
+ $ $EXEC $DATDIR/ccstest.fofn $DATDIR/ccstest_ref.fasta --bestn 1 --useccsall --bam --out $OUTDIR/useccsall.bam --holeNumbers 76772
[INFO]* (glob)
[INFO]* (glob)
+ $ $SAMTOOLS view $OUTDIR/useccsall.bam > $OUTDIR/useccsall.sam
$ sed -n '9,$ p' $OUTDIR/useccsall.sam |cut -f 1-4 > $TMP1
$ sed -n '9,$ p' $STDDIR/$UPDATEDATE/useccsall.sam | cut -f 1-4 > $TMP2
$ diff $TMP1 $TMP2
diff --git a/ctest/useccsallLargeGenome.t b/ctest/useccsallLargeGenome.t
index ac93834..dfded47 100644
--- a/ctest/useccsallLargeGenome.t
+++ b/ctest/useccsallLargeGenome.t
@@ -1,13 +1,13 @@
Set up
$ . $TESTDIR/setup.sh
-Test -useccsall with Large genome.
+Test --useccsall with Large genome.
$ BASFILE=/mnt/data3/vol53/2450530/0014/Analysis_Results/m130507_052228_42161_c100519212550000001823079909281305_s1_p0.3.bax.h5
$ REFDIR=/mnt/secondary/Smrtpipe/repository/hg19_M_sorted/sequence
$ REFFA=$REFDIR/hg19_M_sorted.fasta
$ REFSA=$REFDIR/hg19_M_sorted.fasta.sa
$ OUTFILE=$OUTDIR/intflow.m4
- $ $EXEC $BASFILE $REFFA -out $OUTFILE -m 4 -sa $REFSA -holeNumbers 109020
+ $ $EXEC $BASFILE $REFFA --out $OUTFILE -m 4 --sa $REFSA --holeNumbers 109020
[INFO]* (glob)
[INFO]* (glob)
$ sort $OUTFILE > $TMP1 && sort $STDDIR/intflow_2014_06_10.m4 > $TMP2 && diff $TMP1 $TMP2 && echo $?
diff --git a/ctest/verbose.t b/ctest/verbose.t
index 19e508a..a36074c 100644
--- a/ctest/verbose.t
+++ b/ctest/verbose.t
@@ -2,7 +2,7 @@ Set up
$ . $TESTDIR/setup.sh
Test alignment score
- $ $EXEC $DATDIR/lambda_bax.fofn $DATDIR/lambda_ref.fasta -holeNumbers 1-200 -V 3 > $TMP1
+ $ $EXEC $DATDIR/lambda_bax.fofn $DATDIR/lambda_ref.fasta --holeNumbers 1--200 -V 3 > $TMP1
[INFO]* (glob)
[INFO]* (glob)
$ echo $?
diff --git a/iblasr/BlasrHeaders.h b/iblasr/BlasrHeaders.h
index 8a08c27..80d1fb1 100644
--- a/iblasr/BlasrHeaders.h
+++ b/iblasr/BlasrHeaders.h
@@ -24,6 +24,7 @@ using namespace std;
#include <libconfig.h>
#ifdef USE_PBBAM
#include <pbbam/BamWriter.h>
+#include <pbbam/SamWriter.h>
#endif
#include <CCSSequence.hpp>
diff --git a/iblasr/BlasrUtils.hpp b/iblasr/BlasrUtils.hpp
index ea7e4de..0176733 100644
--- a/iblasr/BlasrUtils.hpp
+++ b/iblasr/BlasrUtils.hpp
@@ -52,6 +52,7 @@ void SumMismatches(SMRTSequence &read,
T_AlignmentCandidate &alignment,
int mismatchScore,
int fullIntvStart, int fullIntvEnd,
+ MappingParameters ¶ms,
int &sum);
//FIXME: move to class T_AlignmentCandidate
@@ -120,7 +121,7 @@ void PrintAlignment(T_AlignmentCandidate &alignment,
ostream &outFile
#ifdef USE_PBBAM
, SMRTSequence &subread
- , PacBio::BAM::BamWriter * bamWriterPtr
+ , PacBio::BAM::IRecordWriter * bamWriterPtr
#endif
);
@@ -131,7 +132,7 @@ void PrintAlignments(vector<T_AlignmentCandidate*> alignmentPtrs,
AlignmentContext alignmentContext,
#ifdef USE_PBBAM
SMRTSequence &subread,
- PacBio::BAM::BamWriter * bamWriterPtr,
+ PacBio::BAM::IRecordWriter * bamWriterPtr,
#endif
MappingSemaphores & semaphores);
@@ -162,7 +163,7 @@ void PrintAllReadAlignments(ReadAlignments & allReadAlignments,
MappingParameters & params,
vector<SMRTSequence> & subreads,
#ifdef USE_PBBAM
- PacBio::BAM::BamWriter * bamWriterPtr,
+ PacBio::BAM::IRecordWriter * bamWriterPtr,
#endif
MappingSemaphores & semaphores);
diff --git a/iblasr/BlasrUtilsImpl.hpp b/iblasr/BlasrUtilsImpl.hpp
index 6839590..49ac6b3 100644
--- a/iblasr/BlasrUtilsImpl.hpp
+++ b/iblasr/BlasrUtilsImpl.hpp
@@ -222,7 +222,7 @@ void StoreMapQVs(SMRTSequence &read,
// bug 24363, use updated SumMismatches to compute mismatch score when
// no QV is available.
SumMismatches(read, *alignmentPtrs[*partIt], 15,
- partitionBeginPos[p], partitionEndPos[p], mismatchSum);
+ partitionBeginPos[p], partitionEndPos[p], params, mismatchSum);
}
//
// Random sequence can be aligned with about 50% similarity due
@@ -345,13 +345,14 @@ void SumMismatches(SMRTSequence &read,
T_AlignmentCandidate &alignment,
int mismatchScore,
int fullIntvStart, int fullIntvEnd,
+ MappingParameters ¶ms,
int &sum)
{
int alnStart, alnEnd;
alignment.GetQIntervalOnForwardStrand(alnStart, alnEnd);
int p;
sum = 0;
- if (read.substitutionQV.Empty() == false) {
+ if (not params.ignoreQualities and read.substitutionQV.Empty() == false) {
for (p = fullIntvStart; p < alnStart; p++) {
sum += read.substitutionQV[p];
}
@@ -953,7 +954,7 @@ void PrintAlignment(T_AlignmentCandidate &alignment,
ostream &outFile
#ifdef USE_PBBAM
, SMRTSequence & subread
- , PacBio::BAM::BamWriter * bamWriterPtr
+ , PacBio::BAM::IRecordWriter * bamWriterPtr
#endif
) {
try {
@@ -1013,7 +1014,7 @@ void PrintAlignments(vector<T_AlignmentCandidate*> alignmentPtrs,
AlignmentContext alignmentContext,
#ifdef USE_PBBAM
SMRTSequence &subread,
- PacBio::BAM::BamWriter * bamWriterPtr,
+ PacBio::BAM::IRecordWriter * bamWriterPtr,
#endif
MappingSemaphores & semaphores) {
if (params.nProc > 1) {
@@ -1130,7 +1131,7 @@ void PrintAllReadAlignments(ReadAlignments & allReadAlignments,
MappingParameters & params,
vector<SMRTSequence> & subreads,
#ifdef USE_PBBAM
- PacBio::BAM::BamWriter * bamWriterPtr,
+ PacBio::BAM::IRecordWriter * bamWriterPtr,
#endif
MappingSemaphores & semaphores)
{
diff --git a/iblasr/MappingParameters.h b/iblasr/MappingParameters.h
index ed64e80..271fce4 100644
--- a/iblasr/MappingParameters.h
+++ b/iblasr/MappingParameters.h
@@ -88,6 +88,7 @@ public:
bool printSAM;
bool cigarUseSeqMatch;
bool printBAM;
+ bool sam_via_bam; // for SAM output via pbbam using IRecordWriter
bool storeMapQV;
bool useRandomSeed;
int randomSeed;
@@ -271,6 +272,7 @@ public:
readsFileIndex = 0;
printSAM = false;
printBAM = false;
+ sam_via_bam = false;
useRandomSeed = false;
randomSeed = 0;
placeRandomly = false;
@@ -557,10 +559,6 @@ public:
if (randomSeed != 0) {
useRandomSeed = true;
}
- if (printSAM) {
- cerr << "ERROR: --sam is no longer supported, use --bam, then translate from .bam to .sam" << endl;
- exit(1);
- }
//
// Parse the clipping.
//
@@ -581,7 +579,43 @@ public:
exit(1);
}
- if (printBAM) {
+ if (printSAM) { // since sam is printed via bam we need to use ifndef USE_PBBAM here
+#ifndef USE_PBBAM
+ REQUIRE_PBBAM_ERROR();
+#else
+ printSAM = false;
+ printBAM = true;
+ sam_via_bam = true; // set to true for constructors and to avoid entering if (printBAM
+ cigarUseSeqMatch = true; // ALWAYS true for BAM
+ printFormat = BAM; // Not sure for sam_via_bam
+ samQVList.SetDefaultQV();
+ printSAMQV = true;
+ if (clipping != SAMOutput::soft) {
+ // Only support two clipping methods: soft or subread.
+ clipping = SAMOutput::subread;
+ }
+ // Turn on fa fa -> bam pipe
+ /*
+ if (queryFileType != FileType::PBBAM and queryFileType != FileType::PBDATASET and not enableHiddenPaths) {
+ // bax|fasta|fastq -> bam paths are turned off by default
+ cout << "ERROR, could not output alignments in BAM unless input reads are in PacBio BAM or DATASET files." << endl;
+ exit(1);
+ }
+ */
+ if (outFileName == "") {
+ cout << "ERROR, SAM output file must be specified." << endl;
+ exit(1);
+ }
+ // VR Need to see what happens if printing SAM
+ // VR Check with Derek regarding sam_via_bam
+ if (outputByThread) {
+ cout << "ERROR, could not output alignments by threads in BAM format." << endl;
+ exit(1);
+ }
+#endif
+ }
+
+ if (printBAM && !sam_via_bam) { // Need to check settings for SAM,
#ifndef USE_PBBAM
REQUIRE_PBBAM_ERROR();
#else
@@ -594,15 +628,20 @@ public:
// Only support two clipping methods: soft or subread.
clipping = SAMOutput::subread;
}
+ // Turn on fa fa -> bam pipe
+ /*
if (queryFileType != FileType::PBBAM and queryFileType != FileType::PBDATASET and not enableHiddenPaths) {
// bax|fasta|fastq -> bam paths are turned off by default
cout << "ERROR, could not output alignments in BAM unless input reads are in PacBio BAM or DATASET files." << endl;
exit(1);
}
+ */
if (outFileName == "") {
cout << "ERROR, BAM output file must be specified." << endl;
exit(1);
}
+ // VR Need to see what happens if printing SAM
+ // VR Check with Derek regarding sam_via_bam
if (outputByThread) {
cout << "ERROR, could not output alignments by threads in BAM format." << endl;
exit(1);
diff --git a/iblasr/RegisterBlasrOptions.h b/iblasr/RegisterBlasrOptions.h
index 0425db5..af7770a 100644
--- a/iblasr/RegisterBlasrOptions.h
+++ b/iblasr/RegisterBlasrOptions.h
@@ -466,12 +466,12 @@ const string BlasrDiscussion(void) {
ss << "NAME" << endl
<< " blasr - Map SMRT Sequences to a reference genome."<< endl << endl
<< "SYNOPSIS" << endl
- << " blasr reads.bam genome.fasta -bam -out out.bam" << endl << endl
+ << " blasr reads.bam genome.fasta --bam --out out.bam" << endl << endl
<< " blasr reads.fasta genome.fasta " << endl << endl
- << " blasr reads.fasta genome.fasta -sa genome.fasta.sa" << endl << endl
- << " blasr reads.bax.h5 genome.fasta [-sa genome.fasta.sa] " << endl << endl
- << " blasr reads.bax.h5 genome.fasta -sa genome.fasta.sa -maxScore -100 -minMatch 15 ... " << endl << endl
- << " blasr reads.bax.h5 genome.fasta -sa genome.fasta.sa -nproc 24 -out alignment.out ... " << endl << endl
+ << " blasr reads.fasta genome.fasta --sa genome.fasta.sa" << endl << endl
+ << " blasr reads.bax.h5 genome.fasta [--sa genome.fasta.sa] " << endl << endl
+ << " blasr reads.bax.h5 genome.fasta --sa genome.fasta.sa --maxScore 100 --minMatch 15 ... " << endl << endl
+ << " blasr reads.bax.h5 genome.fasta --sa genome.fasta.sa --nproc 24 --out alignment.out ... " << endl << endl
<< "DESCRIPTION " << endl
<< " blasr is a read mapping program that maps reads to positions " << endl
<< " in a genome by clustering short exact matches between the read and" << endl
diff --git a/makefile b/makefile
index 9182630..e2d53fc 100644
--- a/makefile
+++ b/makefile
@@ -9,6 +9,7 @@ foo:
echo $(MAKEFILE_LIST)
echo ${SRCDIR}
+GET_SHA1 := $(shell git -C ${SRCDIR} describe --always --dirty='*')
CXXFLAGS += -O3 -g -DSHA1_7=\"${GET_SHA1}\"
CXXOPTS += \
-std=c++0x -pedantic \
@@ -28,7 +29,6 @@ override CXXFLAGS += ${CXXOPTS} ${GCXXFLAGS}
SRCS := Blasr.cpp
OBJS := ${SRCS:.cpp=.o}
DEPS := ${SRCS:.cpp=.d}
-GET_SHA1 := $(shell git rev-parse --short HEAD)
override BLASR_PATH=${SRCDIR}/
export BLASR_PATH
diff --git a/rules.mk b/rules.mk
index c705565..a7adcc7 100644
--- a/rules.mk
+++ b/rules.mk
@@ -33,5 +33,5 @@ LDLIBS+= \
# We repeat LIBPBIHDF_LIBFLAGS because of a circular dependency. See #77.
CPPFLAGS+=$(patsubst %,-I%,${INCDIRS})
-CPPFLAGS+=$(patsubst %,-isystem%,${SYSINCDIRS})
+CPPFLAGS+=$(patsubst %,-I%,${SYSINCDIRS})
LDFLAGS+=$(patsubst %,-L%,${LIBDIRS})
diff --git a/utils/SamFilter.cpp b/utils/SamFilter.cpp
index c484618..30653ca 100644
--- a/utils/SamFilter.cpp
+++ b/utils/SamFilter.cpp
@@ -271,7 +271,7 @@ int main(int argc, char* argv[]) {
string holeNumberStr;
Ranges holeNumberRanges;
- clp.RegisterStringOption("holeNumbers", &holeNumberStr,
+ clp.RegisterStringOption("-holeNumbers", &holeNumberStr,
"A string of comma-delimited hole number ranges to output hits, "
"such as '1,2,10-12'. "
"This requires hit titles to be in SMRT read title format.");
diff --git a/utils/bam2bax/makefile b/utils/bam2bax/makefile
index f4b4245..277e605 100644
--- a/utils/bam2bax/makefile
+++ b/utils/bam2bax/makefile
@@ -7,7 +7,8 @@ include ${SRCDIR}/../../rules.mk
all: ${CURDIR}/src/*.cpp ${CURDIR}/src/*.h ${CURDIR}/tests/src/*.cpp ${CURDIR}/tests/src/*.h
@mkdir -p ${CURDIR}/build && \
cd ${CURDIR}/build && \
- cmake -DPacBioBAM_INCLUDE_DIRS=${PBBAM_INC} \
+ cmake -DBOOST_ROOT=${BOOST_ROOT} \
+ -DPacBioBAM_INCLUDE_DIRS=${PBBAM_INC} \
-DHTSLIB_INCLUDE_DIRS=${HTSLIB_INC} \
-DPacBioBAM_LIBRARIES=${PBBAM_LIB}/libpbbam${SH_LIB_EXT} \
-DHTSLIB_LIBRARIES=${HTSLIB_LIB}/libhts${SH_LIB_EXT} \
diff --git a/utils/bam2bax/src/Bam2BaxConverterImpl.hpp b/utils/bam2bax/src/Bam2BaxConverterImpl.hpp
index 016b3fc..ab11a8d 100644
--- a/utils/bam2bax/src/Bam2BaxConverterImpl.hpp
+++ b/utils/bam2bax/src/Bam2BaxConverterImpl.hpp
@@ -50,20 +50,6 @@ bool Bam2BaxConverter<T_HDFWRITER>::ConvertFile(void) {
}
if (not settings_.ignoreQV) writer.WriteFakeDataSets();
for (auto error: writer.Errors()) { AddErrorMessage(error); }
- } else if (not settings_.polymeraseBamFilename.empty()) {
- // Read polymerase reads from polymerase.bam directly.
- PacBio::BAM::EntireFileQuery query(bamfile);
- for (auto record: query) {
- SMRTSequence smrt;
- smrt.Copy(record, true);
- RegionAnnotation ra(record.HoleNumber(),
- RegionTypeAdapter::ToRegionTypeIndex(PacBio::BAM::VirtualRegionType::HQREGION, regionTypes),
- 0, 0, 0);
- std::vector<RegionAnnotation> ras({ra});
- if (not writer.WriteOneZmw(smrt, ras) or not writer.Errors().empty()) { break; }
- }
- if (not settings_.ignoreQV) writer.WriteFakeDataSets();
- for (auto error: writer.Errors()) { AddErrorMessage(error); }
}
return errors_.empty();
diff --git a/utils/bam2bax/src/Bam2BaxMain.cpp b/utils/bam2bax/src/Bam2BaxMain.cpp
index e8e3895..f29f83e 100644
--- a/utils/bam2bax/src/Bam2BaxMain.cpp
+++ b/utils/bam2bax/src/Bam2BaxMain.cpp
@@ -21,8 +21,8 @@ int main(int argc, char* argv[])
auto ioGroup = optparse::OptionGroup(parser, "Input/output files");
ioGroup.add_option("")
.dest(Settings::Option::input_)
- .metavar("movie.subreads.bam movie.scraps.bam | movie.polymerase.bam")
- .help("Input a movie.polymerase.bam. Or a movie.subreads.bam and a movie.scraps.bam");
+ .metavar("movie.subreads.bam movie.scraps.bam")
+ .help("A movie.subreads.bam and a movie.scraps.bam");
ioGroup.add_option("--trace")
.dest(Settings::Option::trace_)
.metavar("movie.trc.h5")
diff --git a/utils/bam2bax/src/Bam2PlxMain.cpp b/utils/bam2bax/src/Bam2PlxMain.cpp
index 8d0f48f..3be76a0 100644
--- a/utils/bam2bax/src/Bam2PlxMain.cpp
+++ b/utils/bam2bax/src/Bam2PlxMain.cpp
@@ -21,8 +21,8 @@ int main(int argc, char* argv[])
auto ioGroup = optparse::OptionGroup(parser, "Input/output files");
ioGroup.add_option("")
.dest(Settings::Option::input_)
- .metavar("movie.subreads.bam movie.scraps.bam | movie.polymerase.bam")
- .help("Input a movie.polymerase.bam. Or a movie.subreads.bam and a movie.scraps.bam");
+ .metavar("movie.subreads.bam movie.scraps.bam")
+ .help("A movie.subreads.bam and a movie.scraps.bam");
ioGroup.add_option("-o")
.dest(Settings::Option::output_)
.metavar("STRING")
diff --git a/utils/bam2bax/src/Converter.cpp b/utils/bam2bax/src/Converter.cpp
index 1926b19..8fa98b6 100644
--- a/utils/bam2bax/src/Converter.cpp
+++ b/utils/bam2bax/src/Converter.cpp
@@ -7,8 +7,6 @@ Converter::Converter(Settings const& settings)
std::string infn = settings_.subreadsBamFilename;
- if (infn.empty()) infn = settings_.polymeraseBamFilename;
-
bamfile_ = new PacBio::BAM::BamFile(infn);
PacBio::BAM::BamHeader bamheader = bamfile_->Header();
@@ -75,20 +73,6 @@ bool Converter::Run() {
}
if (not settings_.ignoreQV) writer_->WriteFakeDataSets();
for (auto error: writer_->Errors()) { AddErrorMessage(error); }
- } else if (not settings_.polymeraseBamFilename.empty()) {
- // Read polymerase reads from polymerase.bam directly.
- PacBio::BAM::EntireFileQuery query(*bamfile_);
- for (auto record: query) {
- SMRTSequence smrt;
- smrt.Copy(record, true);
- RegionAnnotation ra(record.HoleNumber(),
- RegionTypeAdapter::ToRegionTypeIndex(PacBio::BAM::VirtualRegionType::HQREGION, regionTypes),
- 0, 0, 0);
- std::vector<RegionAnnotation> ras({ra});
- if (not writer_->WriteOneZmw(smrt, ras) or not writer_->Errors().empty()) { break; }
- }
- if (not settings_.ignoreQV) writer_->WriteFakeDataSets();
- for (auto error: writer_->Errors()) { AddErrorMessage(error); }
}
return errors_.empty();
diff --git a/utils/bam2bax/src/Settings.cpp b/utils/bam2bax/src/Settings.cpp
index 5d54d56..a5a68d0 100644
--- a/utils/bam2bax/src/Settings.cpp
+++ b/utils/bam2bax/src/Settings.cpp
@@ -154,11 +154,7 @@ Settings Settings::FromCommandLine(optparse::OptionParser& parser,
// input
settings.inputBamFilenames = parser.args();
- if (settings.inputBamFilenames.size() == 1) {
- settings.polymeraseBamFilename = settings.inputBamFilenames[0];
- if (settings.polymeraseBamFilename.find("polymerase.bam") == std::string::npos)
- settings.errors_.push_back("missing input *.polymerase.bam.");
- } else if (settings.inputBamFilenames.size() == 2) {
+ if (settings.inputBamFilenames.size() == 2) {
settings.subreadsBamFilename = settings.inputBamFilenames[0];
settings.scrapsBamFilename = settings.inputBamFilenames[1];
if (settings.subreadsBamFilename.find("subreads.bam") == std::string::npos)
@@ -166,7 +162,7 @@ Settings Settings::FromCommandLine(optparse::OptionParser& parser,
if (settings.scrapsBamFilename.find("scraps.bam") == std::string::npos)
settings.errors_.push_back("missing input *.scraps.bam.");
} else {
- settings.errors_.push_back("missing input (polymerase.bam or subreads+scraps.bam.");
+ settings.errors_.push_back("missing input subreads+scraps.bam.");
}
if (options.is_set(Settings::Option::trace_)) {
@@ -180,8 +176,6 @@ Settings Settings::FromCommandLine(optparse::OptionParser& parser,
if (settings.outputBaxPrefix.empty()) { // if output prefix not set.
if (not settings.subreadsBamFilename.empty()) {
settings.outputBaxPrefix = internal::GetMovienameFromFilename(settings.subreadsBamFilename);
- } else if (not settings.polymeraseBamFilename.empty()) {
- settings.outputBaxPrefix = internal::GetMovienameFromFilename(settings.polymeraseBamFilename);
}
}
@@ -219,8 +213,6 @@ Settings Settings::FromCommandLine(optparse::OptionParser& parser,
cerr << " subreads : " << settings.subreadsBamFilename << endl;
if (not settings.scrapsBamFilename.empty())
cerr << " scraps : " << settings.scrapsBamFilename << endl;
- if (not settings.polymeraseBamFilename.empty())
- cerr << " polymerase: " << settings.polymeraseBamFilename << endl;
if (not settings.traceFilename.empty())
cerr << " trace : " << settings.traceFilename << endl;
cerr << "Output h5 : " << settings.outputBaxFilename << endl
diff --git a/utils/bam2bax/tests/cram/bam2bax.t b/utils/bam2bax/tests/cram/bam2bax.t
index 735ff3e..d7b94f0 100755
--- a/utils/bam2bax/tests/cram/bam2bax.t
+++ b/utils/bam2bax/tests/cram/bam2bax.t
@@ -65,18 +65,6 @@ diff input with output
$ diff $I_SR_SAM.tmp $O_SR_SAM.tmp || echo I.subreads.bam and O.subreads.bam are not identical
$ diff $I_SC_SAM.tmp $O_SC_SAM.tmp || echo I.subreads.bam and O.subreads.bam are not identical
-
-polymerase.bam to bax.h5
- $ $BAM2BAX /pbi/dept/secondary/siv/testdata/bam2bax/bam2plx/small.polymerase.bam -o Analysis_Results/polymerase 1>/dev/null 2>/dev/null && echo $?
- 0
-
- $ h5ls -r Analysis_Results/polymerase.bax.h5 |grep PulseData/Regions |wc -l
- 1
-
- $ h5dump -d PulseData/Regions Analysis_Results/polymerase.bax.h5 |grep '133229, 2, 0, 0, 0' |wc -l
- 1
-
-
ZMW with no HQ region
$ $BAM2BAX /pbi/dept/secondary/siv/testdata/bam2bax/all_lq/all_lq.subreads.bam /pbi/dept/secondary/siv/testdata/bam2bax/all_lq/all_lq.scraps.bam -o Analysis_Results/all_lq 1>/dev/null 2>/dev/null && echo $?
0
@@ -84,3 +72,13 @@ ZMW with no HQ region
$ h5dump -d /PulseData/Regions Analysis_Results/all_lq.bax.h5 |grep "(0,0): 47775928, 2, 0, 0, 700"
(0,0): 47775928, 2, 0, 0, 700,
+Check ZMW HoleXY
+ $ HOLEXY_PATH=/pbi/dept/secondary/siv/testdata/bam2bax/holexy
+ $ $BAM2BAX $HOLEXY_PATH/rt_m54075_160614_021811_4194561.subreads.bam $HOLEXY_PATH/rt_m54075_160614_021811_4194561.scraps.bam -o Analysis_Results/test_m54075_160614_021811_4194561 1>/dev/null 2>/dev/null && echo $?
+ 0
+
+ $ h5dump -d /PulseData/BaseCalls/ZMW/HoleXY $HOLEXY_PATH/poc_m54075_160614_021811_4194561.bax.h5 | grep "(0,0): "
+ (0,0): 64, 257
+ $ h5dump -d /PulseData/BaseCalls/ZMW/HoleXY Analysis_Results/test_m54075_160614_021811_4194561.bax.h5 | grep "(0,0): "
+ (0,0): 64, 257
+
diff --git a/utils/bam2bax/tests/cram/bam2plx.t b/utils/bam2bax/tests/cram/bam2plx.t
index 07669c4..e0d4235 100755
--- a/utils/bam2bax/tests/cram/bam2plx.t
+++ b/utils/bam2bax/tests/cram/bam2plx.t
@@ -115,15 +115,15 @@ Check PulseCalls/MeanSignal
DATATYPE H5T_STD_U16LE
DATASPACE SIMPLE { ( 15581, 4 ) / ( H5S_UNLIMITED, 4 ) }
DATA {
- (0,0): 0, 0, 0, 112,
- (1,0): 0, 75, 0, 0,
- (2,0): 0, 0, 0, 65,
- (3,0): 0, 60, 0, 0,
- (4,0): 0, 0, 0, 62,
+ (0,0): 0, 0, 0, 11,
+ (1,0): 0, 8, 0, 0,
+ (2,0): 0, 0, 0, 7,
+ (3,0): 0, 6, 0, 0,
+ (4,0): 0, 0, 0, 6,
Check PulseCalls/MidSignal
$ h5dump -d /PulseData/PulseCalls/MidSignal $O_PLX_H5 |grep "(0):"
- (0): 0, 0, 0, 51, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 82, 0, 82,
+ (0): 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 8, 0,
Check PulseCalls/WidthInFrames
$ h5dump -d /PulseData/PulseCalls/WidthInFrames $O_PLX_H5 | grep "(0):"
diff --git a/utils/bax2bam/CMakeLists.txt b/utils/bax2bam/CMakeLists.txt
index 8ffb153..14f4697 100644
--- a/utils/bax2bam/CMakeLists.txt
+++ b/utils/bax2bam/CMakeLists.txt
@@ -77,6 +77,8 @@ if (NOT ZLIB_LIBRARIES OR NOT ZLIB_INCLUDE_DIRS)
find_package(ZLIB REQUIRED)
endif()
+find_package(Threads)
+
# shared CXX flags for src & tests
include(CheckCXXCompilerFlag)
set(Bax2Bam_CXX_FLAGS "-g -std=c++11 -Wall")
diff --git a/utils/bax2bam/README.md b/utils/bax2bam/README.md
index 62bdcbd..20832d9 100644
--- a/utils/bax2bam/README.md
+++ b/utils/bax2bam/README.md
@@ -40,7 +40,7 @@ Options:
DeletionTag dt Y
InsertionQV iq Y
IPD ip Y
- PulseWidth pw N
+ PulseWidth pw Y
MergeQV mq Y
SubstitutionQV sq Y
SubstitutionTag st N
@@ -59,4 +59,4 @@ Options:
that non-sequencing ZMWs should be included in the
output scraps BAM file, if applicable.
-```
\ No newline at end of file
+```
diff --git a/utils/bax2bam/makefile b/utils/bax2bam/makefile
index c50e02c..4c395bb 100644
--- a/utils/bax2bam/makefile
+++ b/utils/bax2bam/makefile
@@ -1,13 +1,14 @@
.PHONY=all
SRCDIR:=$(dir $(realpath $(lastword $(MAKEFILE_LIST))))
--include ${CURDIR}/../../defines.mk
+include ${CURDIR}/../../defines.mk
include ${SRCDIR}/../../rules.mk
all: ${CURDIR}/src/* ${CURDIR}/tests/src/*
@mkdir -p ${CURDIR}/build && \
cd ${CURDIR}/build && \
- cmake -DPacBioBAM_INCLUDE_DIRS=${PBBAM_INC} \
+ cmake -DBOOST_ROOT=${BOOST_ROOT} \
+ -DPacBioBAM_INCLUDE_DIRS=${PBBAM_INC} \
-DHTSLIB_INCLUDE_DIRS=${HTSLIB_INC} \
-DPacBioBAM_LIBRARIES=${PBBAM_LIB}/libpbbam${SH_LIB_EXT} \
-DHTSLIB_LIBRARIES=${HTSLIB_LIB}/libhts${SH_LIB_EXT} \
diff --git a/utils/bax2bam/src/Bax2Bam.cpp b/utils/bax2bam/src/Bax2Bam.cpp
index 70fc196..6f66acb 100644
--- a/utils/bax2bam/src/Bax2Bam.cpp
+++ b/utils/bax2bam/src/Bax2Bam.cpp
@@ -38,17 +38,77 @@ bool WriteDatasetXmlOutput(const Settings& settings,
assert(errors);
try {
+
+ // determine output details based on mode
+ // initialize with SUBREAD data (most common)
+ DataSet::TypeEnum outputDataSetType;
+ string outputDataSetMetaType;
+ string outputTimestampPrefix;
+ string outputBamFileType;
+ string outputScrapsFileType;
+ string outputXmlSuffix;
+
+ switch(settings.mode)
+ {
+ case Settings::SubreadMode :
+ {
+ outputDataSetType = DataSet::SUBREAD;
+ outputDataSetMetaType = "PacBio.DataSet.SubreadSet";
+ outputTimestampPrefix = "pacbio_dataset_subreadset-";
+ outputBamFileType = "PacBio.SubreadFile.SubreadBamFile";
+ outputScrapsFileType = "PacBio.SubreadFile.ScrapsBamFile";
+ outputXmlSuffix = ".subreadset.xml";
+ break;
+ }
+
+ case Settings::CCSMode :
+ {
+ outputDataSetType = DataSet::CONSENSUS_READ;
+ outputDataSetMetaType = "PacBio.DataSet.ConsensusReadSet";
+ outputTimestampPrefix = "pacbio_dataset_consensusreadset-";
+ outputBamFileType = "PacBio.ConsensusReadFile.ConsensusReadBamFile";
+ outputScrapsFileType = "";
+ outputXmlSuffix = ".consensusreadset.xml";
+ break;
+
+ }
+ case Settings::HQRegionMode :
+ {
+ outputDataSetType = DataSet::SUBREAD;
+ outputDataSetMetaType = "PacBio.DataSet.SubreadSet";
+ outputTimestampPrefix = "pacbio_dataset_subreadset-";
+ outputBamFileType = "PacBio.SubreadFile.HqRegionBamFile";
+ outputScrapsFileType = "PacBio.SubreadFile.HqScrapsBamFile";;
+ outputXmlSuffix = ".subreadset.xml";
+ break;
+ }
+ case Settings::PolymeraseMode :
+ {
+ outputDataSetType = DataSet::SUBREAD;
+ outputDataSetMetaType = "PacBio.DataSet.SubreadSet";
+ outputTimestampPrefix = "pacbio_dataset_subreadset-";
+ outputBamFileType = "PacBio.SubreadFile.PolymeraseBamFile";
+ outputScrapsFileType = "PacBio.SubreadFile.PolymeraseScrapsBamFile";
+ outputXmlSuffix = ".subreadset.xml";
+ break;
+ }
+
+ default:
+ assert(false); // should already be checked upstream
+ throw std::runtime_error("unknown mode selected");
+ }
+
DataSet dataset(settings.datasetXmlFilename);
assert(dataset.Type() == DataSet::HDF_SUBREAD);
// change type
- dataset.Type(DataSet::SUBREAD);
- dataset.MetaType("PacBio.DataSet.SubreadSet");
+ dataset.Type(outputDataSetType);
+ dataset.MetaType(outputDataSetMetaType);
time_t currentTime = time(NULL);
//const string& timestamp = CurrentTimestamp();
dataset.CreatedAt(ToIso8601(currentTime));
- dataset.TimeStampedName(string{"pacbio_dataset_subreadset-"}+ToDataSetFormat(currentTime));
+ dataset.TimeStampedName(outputTimestampPrefix+ToDataSetFormat(currentTime));
// change files: remove BAX, add BAM
std::vector<ExternalResource> toRemove;
@@ -86,7 +146,7 @@ bool WriteDatasetXmlOutput(const Settings& settings,
// Combine the scheme and filepath and store in the dataset
mainBamFilepath = scheme + mainBamFilepath;
- ExternalResource mainBam{ "PacBio.SubreadFile.SubreadBamFile", mainBamFilepath };
+ ExternalResource mainBam{ outputBamFileType, mainBamFilepath };
FileIndex mainPbi{ "PacBio.Index.PacBioIndex", mainBamFilepath + ".pbi" };
mainBam.FileIndices().Add(mainPbi);
@@ -108,7 +168,7 @@ bool WriteDatasetXmlOutput(const Settings& settings,
scrapsBamFilepath.append(settings.scrapsBamFilename);
}
- ExternalResource scrapsBam{ "PacBio.SubreadFile.ScrapsBamFile", scrapsBamFilepath };
+ ExternalResource scrapsBam{ outputScrapsFileType, scrapsBamFilepath };
FileIndex scrapsPbi{ "PacBio.Index.PacBioIndex", scrapsBamFilepath + ".pbi" };
scrapsBam.FileIndices().Add(scrapsPbi);
mainBam.ExternalResources().Add(scrapsBam);
@@ -139,7 +199,7 @@ bool WriteDatasetXmlOutput(const Settings& settings,
// save to file
string xmlFn = settings.outputXmlFilename; // try user-provided explicit filename first
if (xmlFn.empty())
- xmlFn = settings.outputBamPrefix + ".dataset.xml"; // prefix set w/ moviename elsewhere if not user-provided
+ xmlFn = settings.outputBamPrefix + outputXmlSuffix; // prefix set w/ moviename elsewhere if not user-provided
dataset.Save(xmlFn);
return true;
diff --git a/utils/bax2bam/src/CMakeLists.txt b/utils/bax2bam/src/CMakeLists.txt
index 9fa7243..aa8b803 100644
--- a/utils/bax2bam/src/CMakeLists.txt
+++ b/utils/bax2bam/src/CMakeLists.txt
@@ -53,5 +53,7 @@ target_link_libraries(bax2bam
${PacBioBAM_LIBRARIES}
${HTSLIB_LIBRARIES}
${ZLIB_LIBRARIES}
+ ${CMAKE_THREAD_LIBS_INIT}
${MY_LIBRT}
+ "-ldl"
)
diff --git a/utils/bax2bam/src/ConverterBase.h b/utils/bax2bam/src/ConverterBase.h
index e2cafd5..19a1d6c 100644
--- a/utils/bax2bam/src/ConverterBase.h
+++ b/utils/bax2bam/src/ConverterBase.h
@@ -818,12 +818,16 @@ bool ConverterBase<RecordType, HdfReader>::LoadChemistryFromMetadataXML(
sequencingKit_ = pt.get<std::string>("Metadata.SequencingKit.PartNumber");
basecallerVersion_ = pt.get<std::string>("Metadata.InstCtrlVer");
- // throws if invalid chemistry triple
- // we'll take the opportunity to exit early with error message
- using PacBio::BAM::ReadGroupInfo;
- auto chemistryCheck = ReadGroupInfo::SequencingChemistryFromTriple(bindingKit_,
- sequencingKit_,
- basecallerVersion_);
+ if (!settings_.isIgnoringChemistryCheck) {
+
+ // throws if invalid chemistry triple
+ // we'll take the opportunity to exit early with error message
+ using PacBio::BAM::ReadGroupInfo;
+ auto chemistryCheck = ReadGroupInfo::SequencingChemistryFromTriple(bindingKit_,
+ sequencingKit_,
+ basecallerVersion_);
+ }
+
return true;
}
catch (PacBio::BAM::InvalidSequencingChemistryException& e) {
diff --git a/utils/bax2bam/src/Settings.cpp b/utils/bax2bam/src/Settings.cpp
index a6daa01..6b827c3 100644
--- a/utils/bax2bam/src/Settings.cpp
+++ b/utils/bax2bam/src/Settings.cpp
@@ -88,17 +88,19 @@ const char* Settings::Option::ccsMode_ = "ccsMode";
const char* Settings::Option::internalMode_ = "internalMode";
const char* Settings::Option::outputXml_ = "outputXml";
const char* Settings::Option::sequelPlatform_ = "sequelPlatform";
+const char* Settings::Option::allowUnsupportedChem_ = "allowUnsupportedChem";
Settings::Settings(void)
: mode(Settings::SubreadMode)
, isInternal(false)
, isSequelInput(false)
+ , isIgnoringChemistryCheck(false)
, usingDeletionQV(true)
, usingDeletionTag(true)
, usingInsertionQV(true)
, usingIPD(true)
, usingMergeQV(true)
- , usingPulseWidth(false)
+ , usingPulseWidth(true)
, usingSubstitutionQV(true)
, usingSubstitutionTag(false)
, losslessFrames(false)
@@ -190,6 +192,10 @@ Settings Settings::FromCommandLine(optparse::OptionParser& parser,
settings.isInternal = options.is_set(Settings::Option::internalMode_) ? options.get(Settings::Option::internalMode_)
: false;
+ // strict/relaxed chemistry check
+ settings.isIgnoringChemistryCheck = options.is_set(Settings::Option::allowUnsupportedChem_) ? options.get(Settings::Option::allowUnsupportedChem_)
+ : false;
+
// platform
settings.isSequelInput = options.is_set(Settings::Option::sequelPlatform_) ? options.get(Settings::Option::sequelPlatform_)
: false;
@@ -228,6 +234,10 @@ Settings Settings::FromCommandLine(optparse::OptionParser& parser,
}
}
+ // always disable PulseWidth tag in CCS mode
+ if (isCCS)
+ settings.usingPulseWidth = false;
+
#ifdef DEBUG_SETTINGS
string modeString;
diff --git a/utils/bax2bam/src/Settings.h b/utils/bax2bam/src/Settings.h
index c060620..ab44923 100644
--- a/utils/bax2bam/src/Settings.h
+++ b/utils/bax2bam/src/Settings.h
@@ -30,6 +30,7 @@ public:
static const char* internalMode_;
static const char* outputXml_;
static const char* sequelPlatform_;
+ static const char* allowUnsupportedChem_;
};
public:
@@ -56,6 +57,9 @@ public:
// platform
bool isSequelInput;
+ // chemistry checking?
+ bool isIgnoringChemistryCheck;
+
// features
bool usingDeletionQV;
bool usingDeletionTag;
diff --git a/utils/bax2bam/src/main.cpp b/utils/bax2bam/src/main.cpp
index 223004f..8cbb7e8 100644
--- a/utils/bax2bam/src/main.cpp
+++ b/utils/bax2bam/src/main.cpp
@@ -75,10 +75,13 @@ int main(int argc, char* argv[])
" DeletionTag dt Y\n"
" InsertionQV iq Y\n"
" IPD ip Y\n"
- " PulseWidth pw N\n"
+ " PulseWidth pw Y*\n"
" MergeQV mq Y\n"
" SubstitutionQV sq Y\n"
" SubstitutionTag st N\n"
+ "\n"
+ "* - PulseWidth will be ignored in CCS mode.\n"
+ "\n"
"If this option is used, then only those features listed will be included, "
"regardless of the default state."
);
@@ -102,6 +105,16 @@ int main(int argc, char* argv[])
);
parser.add_option_group(bamModeGroup);
+ auto additionalGroup = optparse::OptionGroup(parser, "Additional options");
+ additionalGroup.add_option("--allowUnrecognizedChemistryTriple")
+ .dest(Settings::Option::allowUnsupportedChem_)
+ .action("store_true")
+ .help("By default, bax2bam only allows the conversion of files "
+ "with chemistries that are supported in SMRT Analysis 3. "
+ "Set this flag to disable the strict check and allow "
+ "generation of BAM files containing legacy chemistries.");
+ parser.add_option_group(additionalGroup);
+
// parse command line
Settings settings = Settings::FromCommandLine(parser, argc, argv);
if (!settings.errors.empty()) {
diff --git a/utils/bax2bam/tests/CMakeLists.txt b/utils/bax2bam/tests/CMakeLists.txt
index 8045024..4a3680a 100644
--- a/utils/bax2bam/tests/CMakeLists.txt
+++ b/utils/bax2bam/tests/CMakeLists.txt
@@ -79,6 +79,7 @@ target_link_libraries(test_bax2bam
${ZLIB_LIBRARIES}
${CMAKE_THREAD_LIBS_INIT} # quirky pthreads
${MY_LIBRT}
+ "-ldl"
)
# add_test(test_bax2bam test_bax2bam)
add_test(
diff --git a/utils/bax2bam/tests/bax2bam.t b/utils/bax2bam/tests/bax2bam.t
index 625ee6f..9f13a6e 100644
--- a/utils/bax2bam/tests/bax2bam.t
+++ b/utils/bax2bam/tests/bax2bam.t
@@ -4,4 +4,4 @@ Simple test to make sure bax2bam runs properly.
$ cd $WORKSPACE
$ cd smrtanalysis/_output/modulebuilds/bioinformatics/staging/PostPrimary/bax2bam/_output/install/binwrap-build
$ rm -f tst1.*.bam
- $ ./bax2bam -o tst1 /pbi/dept/secondary/siv/testdata/SA3-RS/lambda/2372215/0007_tiny/Analysis_Results/m150404_101626_42267_c100807920800000001823174110291514_s1_p0.1.bax.h5
+ $ ./bax2bam -o tst1 /pbi/dept/secondary/siv/testdata/bax2bam/m160823_221224_ethan_c010091942559900001800000112311890_s1_p0.1.bax.h5
diff --git a/utils/bax2bam/tests/src/TestData.h.in b/utils/bax2bam/tests/src/TestData.h.in
index 11fa1f8..53f0a89 100644
--- a/utils/bax2bam/tests/src/TestData.h.in
+++ b/utils/bax2bam/tests/src/TestData.h.in
@@ -10,7 +10,7 @@ namespace tests {
const std::string Bax2Bam_Exe = std::string("@Bax2Bam_BinDir@/bax2bam");
const std::string Source_Dir = std::string("@Bax2Bam_TestsDir@");
const std::string Bin_Dir = std::string("@CMAKE_CURRENT_BINARY_DIR@");
-const std::string Data_Dir = std::string("/pbi/dept/secondary/siv/testdata/bax2bam/data");
+const std::string Data_Dir = std::string("/pbi/dept/secondary/siv/testdata/bax2bam");
} // namespace tests
diff --git a/utils/bax2bam/tests/src/test_ccs.cpp b/utils/bax2bam/tests/src/test_ccs.cpp
index f388bca..4851be0 100644
--- a/utils/bax2bam/tests/src/test_ccs.cpp
+++ b/utils/bax2bam/tests/src/test_ccs.cpp
@@ -26,7 +26,7 @@ TEST(CcsTest, EndToEnd_Multiple)
const string movieName = "m131018_081703_42161_c100585152550000001823088404281404_s1_p0";
vector<string> baxFilenames;
- baxFilenames.push_back(tests::Data_Dir + "/" + movieName + ".1.ccs.h5");
+ baxFilenames.push_back(tests::Data_Dir + "/data/" + movieName + ".1.ccs.h5");
const string generatedBam = movieName + ".ccs.bam";
diff --git a/utils/bax2bam/tests/src/test_hqregions.cpp b/utils/bax2bam/tests/src/test_hqregions.cpp
index c443850..fba41f1 100644
--- a/utils/bax2bam/tests/src/test_hqregions.cpp
+++ b/utils/bax2bam/tests/src/test_hqregions.cpp
@@ -25,13 +25,13 @@ TEST(HqRegionsTest, EndToEnd_Single)
const string movieName = "m140905_042212_sidney_c100564852550000001823085912221377_s1_X0";
vector<string> baxFilenames;
- baxFilenames.push_back(tests::Data_Dir + "/" + movieName + ".1.bax.h5");
+ baxFilenames.push_back(tests::Data_Dir + "/data/" + movieName + ".1.bax.h5");
const string generatedBam = movieName + ".hqregions.bam";
const string scrapBam = movieName + ".lqregions.bam";
- // run conversion
- const int result = RunBax2Bam(baxFilenames, "--hqregion");
+ // run conversion (manually disabling PW check... we can restore this later with a BAX that has both HQRegions & PW data)
+ const int result = RunBax2Bam(baxFilenames, "--hqregion --pulsefeatures=\"DeletionQV,DeletionTag,InsertionQV,IPD,MergeQV,SubstitutionQV\"");
EXPECT_EQ(0, result);
{ // ensure PBIs exist
@@ -51,7 +51,7 @@ TEST(HqRegionsTest, EndToEnd_Single)
baxReader.IncludeField("MergeQV");
baxReader.IncludeField("SubstitutionQV");
baxReader.IncludeField("HQRegionSNR");
- // not using SubTag or PulseWidth here
+ // not using SubTag or PulseWidth
string baxBasecallerVersion;
string baxBindingKit;
@@ -136,7 +136,7 @@ TEST(HqRegionsTest, EndToEnd_Single)
EXPECT_EQ(baxBasecallerVersion, rg.BasecallerVersion());
EXPECT_EQ(baxBindingKit, rg.BindingKit());
EXPECT_EQ(baxSequencingKit, rg.SequencingKit());
- EXPECT_EQ(75, std::stod(rg.FrameRateHz()));
+ EXPECT_FLOAT_EQ(75.0, std::stof(rg.FrameRateHz()));
EXPECT_EQ("dq", rg.BaseFeatureTag(BaseFeature::DELETION_QV));
EXPECT_EQ("dt", rg.BaseFeatureTag(BaseFeature::DELETION_TAG));
EXPECT_EQ("iq", rg.BaseFeatureTag(BaseFeature::INSERTION_QV));
@@ -293,7 +293,7 @@ TEST(HqRegionsTest, EndToEnd_Single)
EXPECT_EQ(baxBasecallerVersion, rg.BasecallerVersion());
EXPECT_EQ(baxBindingKit, rg.BindingKit());
EXPECT_EQ(baxSequencingKit, rg.SequencingKit());
- EXPECT_EQ(75, std::stod(rg.FrameRateHz()));
+ EXPECT_FLOAT_EQ(75.0, std::stof(rg.FrameRateHz()));
EXPECT_EQ("dq", rg.BaseFeatureTag(BaseFeature::DELETION_QV));
EXPECT_EQ("dt", rg.BaseFeatureTag(BaseFeature::DELETION_TAG));
EXPECT_EQ("iq", rg.BaseFeatureTag(BaseFeature::INSERTION_QV));
@@ -419,3 +419,4 @@ TEST(HqRegionsTest, EndToEnd_Single)
RemoveFile(scrapBam + ".pbi");
});
}
+
diff --git a/utils/bax2bam/tests/src/test_polymerase.cpp b/utils/bax2bam/tests/src/test_polymerase.cpp
index 611962a..73c50a6 100644
--- a/utils/bax2bam/tests/src/test_polymerase.cpp
+++ b/utils/bax2bam/tests/src/test_polymerase.cpp
@@ -20,7 +20,7 @@ using namespace PacBio::BAM;
TEST(PolymeraseTest, EndToEnd_Single)
{
// setup
- const string movieName = "m140905_042212_sidney_c100564852550000001823085912221377_s1_X0";
+ const string movieName = "m160823_221224_ethan_c010091942559900001800000112311890_s1_p0";
vector<string> baxFilenames;
baxFilenames.push_back(tests::Data_Dir + "/" + movieName + ".1.bax.h5");
@@ -46,7 +46,8 @@ TEST(PolymeraseTest, EndToEnd_Single)
baxReader.IncludeField("MergeQV");
baxReader.IncludeField("SubstitutionQV");
baxReader.IncludeField("HQRegionSNR");
- // not using SubTag or PulseWidth here
+ baxReader.IncludeField("WidthInFrames");
+ // not using SubTag
string baxBasecallerVersion;
string baxBindingKit;
@@ -118,19 +119,20 @@ TEST(PolymeraseTest, EndToEnd_Single)
EXPECT_EQ(baxBasecallerVersion, rg.BasecallerVersion());
EXPECT_EQ(baxBindingKit, rg.BindingKit());
EXPECT_EQ(baxSequencingKit, rg.SequencingKit());
- EXPECT_EQ(75, std::stod(rg.FrameRateHz()));
+ EXPECT_FLOAT_EQ(75.00577, std::stof(rg.FrameRateHz()));
EXPECT_EQ("dq", rg.BaseFeatureTag(BaseFeature::DELETION_QV));
EXPECT_EQ("dt", rg.BaseFeatureTag(BaseFeature::DELETION_TAG));
EXPECT_EQ("iq", rg.BaseFeatureTag(BaseFeature::INSERTION_QV));
EXPECT_EQ("ip", rg.BaseFeatureTag(BaseFeature::IPD));
EXPECT_EQ("mq", rg.BaseFeatureTag(BaseFeature::MERGE_QV));
EXPECT_EQ("sq", rg.BaseFeatureTag(BaseFeature::SUBSTITUTION_QV));
+ EXPECT_EQ("pw", rg.BaseFeatureTag(BaseFeature::PULSE_WIDTH));
EXPECT_FALSE(rg.HasBaseFeature(BaseFeature::SUBSTITUTION_TAG));
EXPECT_EQ(FrameCodec::V1, rg.IpdCodec());
// compare 1st record from each file
SMRTSequence baxRecord;
- EXPECT_TRUE(baxReader.GetNext(baxRecord) > 0);
+ EXPECT_TRUE(baxReader.GetReadAt(8, baxRecord) > 0);
vector<float> hqSnr;
hqSnr.push_back(baxRecord.HQRegionSnr('A'));
@@ -138,10 +140,10 @@ TEST(PolymeraseTest, EndToEnd_Single)
hqSnr.push_back(baxRecord.HQRegionSnr('G'));
hqSnr.push_back(baxRecord.HQRegionSnr('T'));
- EXPECT_GT(hqSnr[0], 0);
- EXPECT_GT(hqSnr[1], 0);
- EXPECT_GT(hqSnr[2], 0);
- EXPECT_GT(hqSnr[3], 0);
+ EXPECT_FLOAT_EQ(0.0, hqSnr[0]);
+ EXPECT_FLOAT_EQ(0.0, hqSnr[1]);
+ EXPECT_FLOAT_EQ(0.0, hqSnr[2]);
+ EXPECT_FLOAT_EQ(0.0, hqSnr[3]);
bool firstRecord = true;
EntireFileQuery entireFile(bamFile);
diff --git a/utils/bax2bam/tests/src/test_subreads.cpp b/utils/bax2bam/tests/src/test_subreads.cpp
index 681a17a..6394f4a 100644
--- a/utils/bax2bam/tests/src/test_subreads.cpp
+++ b/utils/bax2bam/tests/src/test_subreads.cpp
@@ -121,7 +121,7 @@ void ComputeSubreadIntervals(vector<SubreadInterval>* const intervals,
TEST(SubreadsTest, EndToEnd_Multiple)
{
// setup
- const string movieName = "m140905_042212_sidney_c100564852550000001823085912221377_s1_X0";
+ const string movieName = "m160823_221224_ethan_c010091942559900001800000112311890_s1_p0";
vector<string> baxFilenames;
baxFilenames.push_back(tests::Data_Dir + "/" + movieName + ".1.bax.h5");
@@ -150,7 +150,8 @@ TEST(SubreadsTest, EndToEnd_Multiple)
baxReader.IncludeField("MergeQV");
baxReader.IncludeField("SubstitutionQV");
baxReader.IncludeField("HQRegionSNR");
- // not using SubTag or PulseWidth here
+ baxReader.IncludeField("WidthInFrames");
+ // not using SubTag here
string baxBasecallerVersion;
string baxBindingKit;
@@ -230,13 +231,14 @@ TEST(SubreadsTest, EndToEnd_Multiple)
EXPECT_EQ(baxBasecallerVersion, rg.BasecallerVersion());
EXPECT_EQ(baxBindingKit, rg.BindingKit());
EXPECT_EQ(baxSequencingKit, rg.SequencingKit());
- EXPECT_EQ(75, std::stod(rg.FrameRateHz()));
+ EXPECT_FLOAT_EQ(75.00577, std::stof(rg.FrameRateHz()));
EXPECT_EQ("dq", rg.BaseFeatureTag(BaseFeature::DELETION_QV));
EXPECT_EQ("dt", rg.BaseFeatureTag(BaseFeature::DELETION_TAG));
EXPECT_EQ("iq", rg.BaseFeatureTag(BaseFeature::INSERTION_QV));
EXPECT_EQ("ip", rg.BaseFeatureTag(BaseFeature::IPD));
EXPECT_EQ("mq", rg.BaseFeatureTag(BaseFeature::MERGE_QV));
EXPECT_EQ("sq", rg.BaseFeatureTag(BaseFeature::SUBSTITUTION_QV));
+ EXPECT_EQ("pw", rg.BaseFeatureTag(BaseFeature::PULSE_WIDTH));
EXPECT_FALSE(rg.HasBaseFeature(BaseFeature::SUBSTITUTION_TAG));
EXPECT_EQ(FrameCodec::V1, rg.IpdCodec());
@@ -251,6 +253,10 @@ TEST(SubreadsTest, EndToEnd_Multiple)
size_t numTested = 0;
EntireFileQuery entireFile(bamFile);
for (BamRecord& bamRecord : entireFile) {
+
+ if (numTested > 30)
+ goto cleanup;
+
if (intervalIdx >= subreadIntervals.size())
{
while (baxReader.GetNext(baxRecord))
diff --git a/utils/ctest/samFilter.t b/utils/ctest/samFilter.t
index 9dfed3b..9bbabe0 100644
--- a/utils/ctest/samFilter.t
+++ b/utils/ctest/samFilter.t
@@ -11,15 +11,15 @@ Set up the executable: samFilter.
$ TMP2=$OUTDIR/$$.tmp.stdout
$ rm -f $OUTFILE
- $ $EXEC $DATDIR/lambda_bax.sam $DATDIR/lambda_ref.fasta $OUTFILE -minAccuracy 70 -minPctSimilarity 30 -hitPolicy all
+ $ $EXEC $DATDIR/lambda_bax.sam $DATDIR/lambda_ref.fasta $OUTFILE --minAccuracy 70 --minPctSimilarity 30 --hitPolicy all
$ tail -n+7 $OUTFILE |sort > $TMP1
$ tail -n+7 $STDFILE |sort > $TMP2
$ diff $TMP1 $TMP2
$ rm $TMP1 $TMP2
-#Test whether minAccuracy and minPctSimilarity can be float.
+#Test whether --minAccuracy and --minPctSimilarity can be float.
# $ rm -f $OUTFILE
-# $ $EXEC $DATDIR/lambda_bax.sam $DATDIR/lambda_ref.fasta $OUTFILE -minAccuracy 70.0 -minPctSimilarity 30.0 -hitPolicy all
+# $ $EXEC $DATDIR/lambda_bax.sam $DATDIR/lambda_ref.fasta $OUTFILE --minAccuracy 70.0 --minPctSimilarity 30.0 --hitPolicy all
# $ tail -n+7 $OUTFILE | sort > $TMP1
# $ tail -n+7 $STDFILE | sort > $TMP2
# $ diff $TMP1 $TMP2
@@ -30,40 +30,40 @@ Set up the executable: samFilter.
$ STDFILE=$STDDIR/lambda_bax_filter_2.sam
$ rm -f $OUTFILE
- $ $EXEC $DATDIR/lambda_bax.sam $DATDIR/lambda_ref.fasta $OUTFILE -hitPolicy allbest
+ $ $EXEC $DATDIR/lambda_bax.sam $DATDIR/lambda_ref.fasta $OUTFILE --hitPolicy allbest
$ tail -n+7 $OUTFILE > $TMP1
$ tail -n+7 $STDFILE > $TMP2
$ diff $TMP1 $TMP2
$ rm $TMP1 $TMP2
-#Test samFilter with -hitPolicy random
+#Test samFilter with --hitPolicy random
$ OUTFILE=$OUTDIR/lambda_bax_filter_3.sam
$ STDFILE=$STDDIR/lambda_bax_filter_3.sam
$ rm -f $OUTFILE
- $ $EXEC $DATDIR/lambda_bax.sam $DATDIR/lambda_ref.fasta $OUTFILE -hitPolicy random
+ $ $EXEC $DATDIR/lambda_bax.sam $DATDIR/lambda_ref.fasta $OUTFILE --hitPolicy random
$ tail -n+7 $OUTFILE > $TMP1
$ tail -n+7 $STDFILE > $TMP2
$ diff $TMP1 $TMP2
$ rm $TMP1 $TMP2
-#Test samFilter with -hitPolicy randombest
+#Test samFilter with --hitPolicy randombest
$ OUTFILE=$OUTDIR/lambda_bax_filter_4.sam
$ STDFILE=$STDDIR/lambda_bax_filter_4.sam
$ rm -f $OUTFILE
- $ $EXEC $DATDIR/lambda_bax.sam $DATDIR/lambda_ref.fasta $OUTFILE -hitPolicy randombest
+ $ $EXEC $DATDIR/lambda_bax.sam $DATDIR/lambda_ref.fasta $OUTFILE --hitPolicy randombest
$ tail -n+7 $OUTFILE > $TMP1
$ tail -n+7 $STDFILE > $TMP2
$ diff $TMP1 $TMP2
$ rm $TMP1 $TMP2
-# Test samFilter with -hitPolicy leftmost
+# Test samFilter with --hitPolicy leftmost
$ OUTFILE=$OUTDIR/test_leftmost_out.sam
$ rm -f $OUTFILE
- $ $EXEC $DATDIR/test_leftmost.sam $DATDIR/test_leftmost_target.fasta $OUTFILE -hitPolicy leftmost
+ $ $EXEC $DATDIR/test_leftmost.sam $DATDIR/test_leftmost_target.fasta $OUTFILE --hitPolicy leftmost
$ tail -n+6 $OUTFILE |cut -f 4
1
@@ -77,12 +77,12 @@ Set up the executable: samFilter.
$ diff $TMP1 $TMP2
$ rm $TMP1 $TMP2
-#Test samFilter with -holeNumbers
+#Test samFilter with --holeNumbers
$ OUTFILE=$OUTDIR/lambda_bax_filter_6.sam
$ STDFILE=$STDDIR/lambda_bax_filter_6.sam
$ rm -f $OUTFILE
- $ $EXEC $DATDIR/lambda_bax.sam $DATDIR/lambda_ref.fasta $OUTFILE -holeNumbers 101350-105000,21494
+ $ $EXEC $DATDIR/lambda_bax.sam $DATDIR/lambda_ref.fasta $OUTFILE --holeNumbers 101350-105000,21494
$ tail -n+7 $OUTFILE > $TMP1
$ tail -n+7 $STDFILE > $TMP2
$ diff $TMP1 $TMP2
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/blasr.git
More information about the debian-med-commit
mailing list