[med-svn] [blasr] 01/04: Imported Upstream version 5.3+0
Afif Elghraoui
afif at moszumanska.debian.org
Sat Dec 24 01:53:54 UTC 2016
This is an automated email from the git hooks/post-receive script.
afif pushed a commit to branch master
in repository blasr.
commit afe0a9b297f64db82f4824b91794f0b538bd763b
Author: Afif Elghraoui <afif at debian.org>
Date: Fri Dec 23 16:51:42 2016 -0800
Imported Upstream version 5.3+0
---
cram.mk | 21 +++++++----
ctest/aggressiveIntervalCut.t | 4 +--
ctest/concordant.t | 39 ++++++++++-----------
ctest/ecoli.t | 9 +++--
ctest/fastMaxInterval.t | 4 +--
ctest/multipart.t | 4 +--
ctest/noSplitSubreads.t | 34 ++++++++++++++++++
ctest/useccsallBestN1.t | 2 +-
iblasr/MappingParameters.h | 26 +++++++++++++-
iblasr/RegisterBlasrOptions.h | 11 +++---
makefile | 4 +++
utils/bam2bax/BUILD.txt | 54 ++++++++++++++++++++---------
utils/bam2bax/CMakeLists.txt | 10 ++++--
utils/bam2bax/makefile | 22 ++++++++++++
utils/bam2bax/src/CMakeLists.txt | 2 ++
utils/bam2bax/tests/CMakeLists.txt | 1 +
utils/bax2bam/CMakeLists.txt | 9 ++++-
utils/bax2bam/makefile | 4 +++
utils/bax2bam/src/CMakeLists.txt | 1 +
utils/bax2bam/src/IConverter.cpp | 7 ----
utils/bax2bam/src/main.cpp | 2 +-
utils/bax2bam/tests/CMakeLists.txt | 1 +
utils/bax2bam/tests/src/TestData.h.in | 2 ++
utils/bax2bam/tests/src/test_ccs.cpp | 4 +--
utils/bax2bam/tests/src/test_hqregions.cpp | 8 ++---
utils/bax2bam/tests/src/test_polymerase.cpp | 4 +--
utils/bax2bam/tests/src/test_subreads.cpp | 4 +--
27 files changed, 210 insertions(+), 83 deletions(-)
diff --git a/cram.mk b/cram.mk
index 35f62e8..f6e77d5 100644
--- a/cram.mk
+++ b/cram.mk
@@ -1,18 +1,27 @@
FAST_CTESTS := \
+ctest/ecoli.t \
+ctest/fastMaxInterval.t \
+ctest/aggressiveIntervalCut.t \
+ctest/multipart.t \
ctest/affineAlign.t ctest/bamOut.t ctest/ccsH5.t ctest/filtercriteria.t ctest/m0-5.t \
-ctest/aggressiveIntervalCut.t ctest/fofn.t ctest/multipart.t \
+ctest/fofn.t \
ctest/alignScore.t ctest/hitpolicy.t ctest/noSplitSubreads.t \
-ctest/bamIn.t ctest/fastMaxInterval.t ctest/open_fail.t ctest/verbose.t ctest/deterministic.t
+ctest/bamIn.t ctest/open_fail.t ctest/verbose.t ctest/deterministic.t
MILD_CTESTS := \
- ctest/bug25766.t ctest/holeNumbers.t
+ ctest/concordant.t ctest/bug25766.t ctest/holeNumbers.t
-SLOW_CTESTS := ctest/bug25328.t ctest/useccsallLargeGenome.t
+SLOW_CTESTS := ctest/bug25328.t
# XXX: following tests sidelined, needs bam input after --sam option removed
-# FAST: ctest/ecoli.t
-# MILD: ctest/useccsallBestN1.t ctest/concordant.t
+# MILD: ctest/useccsallBestN1.t
+
+
+# sidelined because of changes in directories
+#
+# needed to restore /mnt/data3/vol53/2450530/0014
+# SLOW ctest/useccsallLargeGenome.t
#BLASR_PATH=/mnt/secondary/builds/full/3.0.0/prod/current-build_smrtanalysis/private/otherbins/internalall/bin/
#export BLASR_PATH
diff --git a/ctest/aggressiveIntervalCut.t b/ctest/aggressiveIntervalCut.t
index 8743782..7cc0cc7 100644
--- a/ctest/aggressiveIntervalCut.t
+++ b/ctest/aggressiveIntervalCut.t
@@ -3,8 +3,8 @@ Set up
Test --aggressiveIntervalCut.
$ rm -f $TMP1
- $ BASFILE=/mnt/data3/vol53/2450598/0001/Analysis_Results/m130812_185809_42141_c100533960310000001823079711101380_s1_p0.bas.h5
- $ REFFA=/mnt/secondary/Smrtpipe/repository/Ecoli_BL21_O26/sequence/Ecoli_BL21_O26.fasta
+ $ BASFILE=/pbi/dept/secondary/siv/testdata/BlasrTestData/ctest/data/aggressiveIntervalCut/m130812_185809_42141_c100533960310000001823079711101380_s1_p0.bas.h5
+ $ REFFA=/pbi/dept/secondary/siv/testdata/BlasrTestData/ctest/data/references/Ecoli_BL21_O26/sequence/Ecoli_BL21_O26.fasta
$ $EXEC $BASFILE $REFFA --holeNumbers 1--100 --out $TMP1 --aggressiveIntervalCut
[INFO] * [blasr] started. (glob)
[INFO] * [blasr] ended. (glob)
diff --git a/ctest/concordant.t b/ctest/concordant.t
index e0b0225..253d9bf 100644
--- a/ctest/concordant.t
+++ b/ctest/concordant.t
@@ -2,29 +2,13 @@ Set up
$ . $TESTDIR/setup.sh
Test --concordant
- $ rm -rf $OUTDIR/concordant_subset.bam
- $ rm -rf $OUTDIR/concordant_subset.sam
- $ $EXEC $DATDIR/ecoli_lp.fofn $DATDIR/ecoli_reference.fasta --concordant --refineConcordantAlignments --bam --out $OUTDIR/concordant_subset.bam --nproc 12 --holeNumbers 1--10000 --sa $DATDIR/ecoli_reference.sa
+ $ rm -rf $OUTDIR/concordant_subset.sam $OUTDIR/tmp1 $OUTDIR/tmp2
+ $ $EXEC $DATDIR/ecoli_lp.fofn $DATDIR/ecoli_reference.fasta --concordant --refineConcordantAlignments -m 4 --out $OUTDIR/concordant_subset.m4 --nproc 12 --holeNumbers 1--10000 --sa $DATDIR/ecoli_reference.sa
[INFO]* (glob)
[INFO]* (glob)
- $ $SAMTOOLS view $OUTDIR/concordant_subset.bam > $OUTDIR/concordant_subset.sam
- $ sed -n 6,110864p $OUTDIR/concordant_subset.sam > $OUTDIR/tmp1
- $ sort $OUTDIR/tmp1 > $OUTDIR/tmp11
- $ sed -n 6,110864p $STDDIR/$UPDATEDATE/concordant_subset.sam > $OUTDIR/tmp2
- $ sort $OUTDIR/tmp2 > $OUTDIR/tmp22
- $ diff $OUTDIR/tmp11 $OUTDIR/tmp22
- $ rm -rf $OUTDIR/tmp1 $OUTDIR/tmp2 $OUTDIR/tmp11 $OUTDIR/tmp22
-#2014_05_28 --> changelist 135254, use MAX_BAND_SIZE to contrain GuidedAlign
-#2014_08_21 --> changelist 138516, added YS, YE, ZM tags.
-#2014_08_28 --> changelist 139176, update SAM MD5
-#2014_09_12 --> changelist 140410, changed the default value of '--concordantTemplate' from 'longestsubread' to 'typicalsubread'
-#2014_09_17 --> changelist 140573, changed SDPFragment LessThan to make sure blasr compiled with gcc 4.4 and 4.8 can produce identical results.
-#2014_10_16 --> changelist 141378, changed the default value of '--concordantTemplate' from 'typicalsubread' to 'mediansubread'
-#2015_03_01 --> changelist 146599, reads from the same movie should have unique readGroupId
-#2015_03_28 --> changelist 148101, 148080 updated read group id, 148100 updated TLEN
-#2015_04_09 --> changelist 148796, updated read group id
-#2015_04_25 --> changelist 149721, update CIGAR string, replace M with X=.
-#2015_04_25 --> changelist ?, force refine all concordant alignments
+ $ sort $OUTDIR/concordant_subset.m4 > $OUTDIR/tmp1
+Updated in 2016_10_05 --> changed output format from sam to m4, isolate concordant tests from file format tests
+ $ diff $OUTDIR/tmp1 $STDDIR/2016_10_05/concordant_subset.m4
Test --concordant FMR1 case (the 'typical subread' is selected as template for concordant mapping)
$ FOFN=$DATDIR/FMR1_concordant.fofn
@@ -33,3 +17,16 @@ Test --concordant FMR1 case (the 'typical subread' is selected as template for c
[INFO]* (glob)
[INFO]* (glob)
$ diff $OUTDIR/FMR1_zmw_37927.m4 $STDDIR/$UPDATEDATE/FMR1_zmw_37927.m4
+
+#History
+#2014_05_28 --> changelist 135254, use MAX_BAND_SIZE to contrain GuidedAlign
+#2014_08_21 --> changelist 138516, added YS, YE, ZM tags.
+#2014_08_28 --> changelist 139176, update SAM MD5
+#2014_09_12 --> changelist 140410, changed the default value of '--concordantTemplate' from 'longestsubread' to 'typicalsubread'
+#2014_09_17 --> changelist 140573, changed SDPFragment LessThan to make sure blasr compiled with gcc 4.4 and 4.8 can produce identical results.
+#2014_10_16 --> changelist 141378, changed the default value of '--concordantTemplate' from 'typicalsubread' to 'mediansubread'
+#2015_03_01 --> changelist 146599, reads from the same movie should have unique readGroupId
+#2015_03_28 --> changelist 148101, 148080 updated read group id, 148100 updated TLEN
+#2015_04_09 --> changelist 148796, updated read group id
+#2015_04_25 --> changelist 149721, update CIGAR string, replace M with X=.
+#2015_11_09 --> changelist 167117, added -refineConcordantAlignments
diff --git a/ctest/ecoli.t b/ctest/ecoli.t
index 07940fc..c682178 100644
--- a/ctest/ecoli.t
+++ b/ctest/ecoli.t
@@ -13,10 +13,9 @@ Test blasr with --bam
[INFO]* (glob)
[INFO]* (glob)
- $ $SAMTOOLS view $OUTDIR/ecoli_subset.bam > $OUTDIR/ecoli_subset.sam
- $ sed -n '5,$ p' $OUTDIR/ecoli_subset.sam | sort | cut -f 1--11 > $TMP1
- $ sed -n '5,$ p' $STDDIR/$UPDATEDATE/ecoli_subset.sam | sort | cut -f 1--11 > $TMP2
- $ diff $TMP1 $TMP2
- $ rm $TMP1 $TMP2
+ $ $SAMTOOLS view -h $OUTDIR/ecoli_subset.bam > $OUTDIR/ecoli_subset.sam
+ $ sed -n '5,$ p' $OUTDIR/ecoli_subset.sam | sort | cut -f 1-11 > $OUTDIR/ecoli_subset_out
+ $ sed -n '5,$ p' $STDDIR/2016_10_20/ecoli_subset.sam | sort | cut -f 1-11 > $OUTDIR/ecoli_subset_std
+ $ diff $OUTDIR/ecoli_subset_out $OUTDIR/ecoli_subset_std
# 2015_03_08 --> changelist 148101, 148080 updated read group id; 148100 updated TLEN
# 2015_04_09 --> changelist 148796, updated read group id
diff --git a/ctest/fastMaxInterval.t b/ctest/fastMaxInterval.t
index 67a687b..1d8816b 100644
--- a/ctest/fastMaxInterval.t
+++ b/ctest/fastMaxInterval.t
@@ -3,8 +3,8 @@ Set up
Test --fastMaxInterval.
$ rm -f $TMP1
- $ BASFILE=/mnt/data3/vol53/2450598/0001/Analysis_Results/m130812_185809_42141_c100533960310000001823079711101380_s1_p0.bas.h5
- $ REFFA=/mnt/secondary/Smrtpipe/repository/Ecoli_BL21_O26/sequence/Ecoli_BL21_O26.fasta
+ $ BASFILE=/pbi/dept/secondary/siv/testdata/BlasrTestData/ctest/data/aggressiveIntervalCut/m130812_185809_42141_c100533960310000001823079711101380_s1_p0.bas.h5
+ $ REFFA=/pbi/dept/secondary/siv/testdata/BlasrTestData/ctest/data/references/Ecoli_BL21_O26/sequence/Ecoli_BL21_O26.fasta
$ $EXEC $BASFILE $REFFA --holeNumbers 1--100 --out $TMP1 --fastMaxInterval
[INFO] * [blasr] started. (glob)
[INFO] * [blasr] ended. (glob)
diff --git a/ctest/multipart.t b/ctest/multipart.t
index 5e824d7..ed0e031 100644
--- a/ctest/multipart.t
+++ b/ctest/multipart.t
@@ -4,8 +4,8 @@ Set up
Test input.fofn containing a new bas.h5 file. Note that the new bas.h5 file does not
contain any /PulseData, instead contains /MultiPart/Parts.
$ rm -f $TMP1
- $ BASFILE=/mnt/data3/vol53/2450598/0001/Analysis_Results/m130812_185809_42141_c100533960310000001823079711101380_s1_p0.bas.h5
- $ REFFA=/mnt/secondary/Smrtpipe/repository/Ecoli_BL21_O26/sequence/Ecoli_BL21_O26.fasta
+ $ BASFILE=/pbi/dept/secondary/siv/testdata/BlasrTestData/ctest/data/aggressiveIntervalCut/m130812_185809_42141_c100533960310000001823079711101380_s1_p0.bas.h5
+ $ REFFA=/pbi/dept/secondary/siv/testdata/BlasrTestData/ctest/data/references/Ecoli_BL21_O26/sequence/Ecoli_BL21_O26.fasta
$ $EXEC $BASFILE $REFFA --holeNumbers 1--100 --out $TMP1
[INFO] * [blasr] started. (glob)
[INFO] * [blasr] ended. (glob)
diff --git a/ctest/noSplitSubreads.t b/ctest/noSplitSubreads.t
index ed4a192..5880288 100644
--- a/ctest/noSplitSubreads.t
+++ b/ctest/noSplitSubreads.t
@@ -16,3 +16,37 @@ Test blasr with --noSplitSubreads
[INFO]* (glob)
$ sort $OUTDIR/lambda_bax_noSplitSubreads_tmp_subset.m4 > $OUTDIR/lambda_bax_noSplitSubreads_subset.m4
$ diff $OUTDIR/lambda_bax_noSplitSubreads_subset.m4 $STDDIR/lambda_bax_noSplitSubreads_subset.m4
+
+# Test key command of unrolled resequencing, check bam header and alignments in output
+ $ outbam=$OUTDIR/unrolled-4mer.bam
+ $ outsam=$OUTDIR/unrolled-4mer.sam
+ $ query=$DATDIR/unrolled/m54006_151021_185942.subreadset.xml
+ $ ref=$DATDIR/unrolled/All4mer_V2_11_V2_13_V2_15_V2_44_circular_72x_l50256.fasta
+ $ stdsam=$STDDIR/unrolled-4mer.sam
+ $ rm -rf $outbam $outsam
+ $ $EXEC $query $ref --out $outbam --noSplitSubreads --fastMaxInterval --bam
+ [INFO]* (glob)
+ [INFO]* (glob)
+ $ $SAMTOOLS view -h $outbam -o $outsam
+ $ grep -v '^@PG' $outsam > $TMP1 && grep -v '^@PG' $stdsam > $TMP2 && diff $TMP1 $TMP2
+ $ grep '@RG' $outsam
+ @RG\tID:e6043908* (glob)
+ $ grep 'RG:Z:e6043908' $outsam |wc -l
+ 4
+
+
+ $ query=$DATDIR/unrolled/m54006_151021_185942.subreads.bam
+ $ outbam=$OUTDIR/unrolled-4mer-bam-in.bam
+ $ outsam=$OUTDIR/unrolled-4mer-bam-in.sam
+ $ rm -rf $outbam $outsam
+ $ $EXEC $query $ref --out $outbam --noSplitSubreads --fastMaxInterval --bam
+ [INFO]* (glob)
+ [INFO]* (glob)
+ $ $SAMTOOLS view -h $outbam -o $outsam
+ $ grep -v '^@PG' $outsam > $TMP1 && grep -v '^@PG' $stdsam > $TMP2 && diff $TMP1 $TMP2
+ $ grep '@RG' $outsam
+ @RG\tID:e6043908* (glob)
+ $ grep 'RG:Z:e6043908' $outsam |wc -l
+ 4
+
+
diff --git a/ctest/useccsallBestN1.t b/ctest/useccsallBestN1.t
index 659b3b8..950ba0b 100644
--- a/ctest/useccsallBestN1.t
+++ b/ctest/useccsallBestN1.t
@@ -5,7 +5,7 @@ Test --useccsall with bestn = 1
$ $EXEC $DATDIR/ccstest.fofn $DATDIR/ccstest_ref.fasta --bestn 1 --useccsall --bam --out $OUTDIR/useccsall.bam --holeNumbers 76772
[INFO]* (glob)
[INFO]* (glob)
- $ $SAMTOOLS view $OUTDIR/useccsall.bam > $OUTDIR/useccsall.sam
+ $ $SAMTOOLS view -h $OUTDIR/useccsall.bam > $OUTDIR/useccsall.sam
$ sed -n '9,$ p' $OUTDIR/useccsall.sam |cut -f 1-4 > $TMP1
$ sed -n '9,$ p' $STDDIR/$UPDATEDATE/useccsall.sam | cut -f 1-4 > $TMP2
$ diff $TMP1 $TMP2
diff --git a/iblasr/MappingParameters.h b/iblasr/MappingParameters.h
index 271fce4..df656a9 100644
--- a/iblasr/MappingParameters.h
+++ b/iblasr/MappingParameters.h
@@ -142,7 +142,9 @@ public:
bool refineConcordantAlignments;
int flankSize;
bool useRegionTable;
+ bool setIgnoreRegions;
bool useHQRegionTable;
+ bool setIgnoreHQRegions;
bool printUnaligned;
bool noPrintUnalignedSeqs; // print unaligned reads names only.
string unalignedFileName;
@@ -311,7 +313,9 @@ public:
refineConcordantAlignments=false;
flankSize=40;
useRegionTable = true;
+ setIgnoreRegions = false;
useHQRegionTable=true;
+ setIgnoreHQRegions = false;
printUnaligned = false;
unalignedFileName = "";
noPrintUnalignedSeqs = false;
@@ -454,6 +458,9 @@ public:
//
// Fix all logical incompatibilities with parameters.
//
+ if (setIgnoreRegions) { useRegionTable = false; }
+ if (setIgnoreHQRegions) { useHQRegionTable = false; }
+
if (nowarp) {
warp = false;
}
@@ -534,6 +541,14 @@ public:
useRegionTable = true;
readSeparateRegionTable = true;
}
+
+ bool isHDFFile = (queryFileType == FileType::HDFPulse or
+ queryFileType == FileType::HDFBase or
+ queryFileType == FileType::HDFCCSONLY);
+ if ((setIgnoreRegions or setIgnoreHQRegions) and not isHDFFile) {
+ cout << "ERROR: query must be HDF files in order to set ignoreRegions or ignoreHQRegions." << std::endl;
+ exit(1);
+ }
if (ccsFofnFileName != "") {
readSeparateCcsFofn = true;
}
@@ -699,6 +714,11 @@ public:
return ReadType::CCS;
}
if (queryFileType == FileType::PBBAM) {
+ if (not mapSubreadsSeparately) {
+ // specifal case: blasr subread.bam ref.fa --noSplitSubreads
+ // input type seems like subread while infact is polymerase
+ return ReadType::POLYMERASE;
+ }
// Read type in BAM may be CCS, SUBREAD, HQREGION or POLYMERASE.
// Determine it later.
return ReadType::UNKNOWN;
@@ -706,7 +726,11 @@ public:
if (mapSubreadsSeparately) {
return ReadType::SUBREAD;
} else {
- if (useHQRegionTable) {
+ if (useHQRegionTable and
+ (queryFileType == FileType::HDFCCSONLY or
+ queryFileType == FileType::HDFBase or
+ queryFileType == FileType::HDFPulse)) {
+ // Only HDF files can contain region table.
return ReadType::HQREGION;
} else {
return ReadType::POLYMERASE;
diff --git a/iblasr/RegisterBlasrOptions.h b/iblasr/RegisterBlasrOptions.h
index af7770a..f67e788 100644
--- a/iblasr/RegisterBlasrOptions.h
+++ b/iblasr/RegisterBlasrOptions.h
@@ -83,8 +83,8 @@ void RegisterBlasrOptions(CommandLineParser & clp, MappingParameters & params) {
clp.RegisterIntOption("-maxScore", ¶ms.maxScore, "", CommandLineParser::Integer);
clp.RegisterStringOption("-bwt", ¶ms.bwtFileName, "");
clp.RegisterIntOption("m", ¶ms.printFormat, "", CommandLineParser::NonNegativeInteger);
- clp.RegisterFlagOption("-sam", ¶ms.printSAM, "");
#ifdef USE_PBBAM
+ clp.RegisterFlagOption("-sam", ¶ms.printSAM, "");
clp.RegisterFlagOption("-bam", ¶ms.printBAM, "");
#endif
clp.RegisterStringOption("-clipping", ¶ms.clippingString, "");
@@ -135,8 +135,9 @@ void RegisterBlasrOptions(CommandLineParser & clp, MappingParameters & params) {
clp.RegisterIntOption("-flankSize", ¶ms.flankSize, "", CommandLineParser::NonNegativeInteger);
clp.RegisterStringOption("-titleTable", ¶ms.titleTableName, "");
clp.RegisterFlagOption("-useSensitiveSearch", ¶ms.doSensitiveSearch, "");
- clp.RegisterFlagOption("-ignoreRegions", ¶ms.useRegionTable, "");
- clp.RegisterFlagOption("-ignoreHQRegions", ¶ms.useHQRegionTable, "");
+ // ignoreRegions or ignoreHQRegions implies region table must exist (i.e., query is HDF).
+ clp.RegisterFlagOption("-ignoreRegions", ¶ms.setIgnoreRegions, "");
+ clp.RegisterFlagOption("-ignoreHQRegions", ¶ms.setIgnoreHQRegions, "");
clp.RegisterFlagOption("-computeAlignProbability", ¶ms.computeAlignProbability, "");
clp.RegisterStringOption("-unaligned", ¶ms.unalignedFileName, "");
// Print unaligned reads names only
@@ -289,8 +290,8 @@ const string BlasrHelp(MappingParameters & params) {
<< " Use no/hard/subread/soft clipping, ONLY for SAM/BAM output."<< endl
<< " --printSAMQV (false)" << endl
<< " Print quality values to SAM output." << endl
- << " --cigarUseSeqMatch (false)" << endl
- << " CIGAR strings in SAM/BAM output use '=' and 'X' to represent sequence match and mismatch instead of 'M'." << endl << endl
+// << " --cigarUseSeqMatch (false)" << endl
+// << " CIGAR strings in SAM/BAM output use '=' and 'X' to represent sequence match and mismatch instead of 'M'." << endl << endl
<< " Options for anchoring alignment regions. This will have the greatest effect on speed and sensitivity." << endl
<< " --minMatch m (12) " << endl
<< " Minimum seed length. Higher minMatch will speed up alignment, " << endl
diff --git a/makefile b/makefile
index e2d53fc..01b2072 100644
--- a/makefile
+++ b/makefile
@@ -76,6 +76,10 @@ cramfast: blasr utils
${MAKE} -f cram.mk cramfast
${MAKE} -C utils cramfast
+crammild: blasr utils
+ ${MAKE} -f cram.mk crammild
+ ${MAKE} -C utils crammild
+
gtest: blasr
# This requires the submodule to be configured with gtest.
${MAKE} -C libcpp gtest
diff --git a/utils/bam2bax/BUILD.txt b/utils/bam2bax/BUILD.txt
index 09539dc..bdb9476 100644
--- a/utils/bam2bax/BUILD.txt
+++ b/utils/bam2bax/BUILD.txt
@@ -9,6 +9,11 @@ Assuming that blasr and blaser_libcpp is placed under //depot/software/smrtanaly
Build instructions for users:
+
+ If pbbam and htslib are prebuilt and included in blasr/defines.mk,
+ set PacBioBAM_INCLUDE_DIRS, HTSLIB_INCLUDE_DIRS, PacBioBAM_LIBRARIES
+ and HTSLIB_LIBRARIES as below. Otherwise, set PacBioBAM_RootDir instead.
+
$ cd <bam2bax>
$ mkdir build; cd build;
$ cmake -DPacBioBAM_INCLUDE_DIRS=<path_to_include_dir> \
@@ -26,23 +31,38 @@ Build instructions for users:
-DHDF5_LIBRARIES=<path_to_lib_so_or_a> \
-DBam2Bax_EXE_LINKER_FLAGS="-Wl,--no-as-needed -ldl -pthread -lrt " \
../
-
$ make
$ ../tests/bin/test_bam2bax # to test bam2bax exe
-# e.g.,
-#cmake -DPacBioBAM_INCLUDE_DIRS=$smrtanalysis/bioinformatics/lib/cpp/pbbam/include \
-# -DHTSLIB_INCLUDE_DIRS=$smrtanalysis/bioinformatics/lib/cpp/htslib \
-# -DPacBioBAM_LIBRARIES=$smrtanalysis/bioinformatics/lib/cpp/pbbam/lib/libpbbam.a \
-# -DHTSLIB_LIBRARIES=$smrtanalysis/bioinformatics/lib/cpp/htslib/libhts.a \
-# -DPBDATA_INCLUDE_DIRS=$smrtanalysis/bioinformatics/ext/pi/blasr/libcpp/pbdata \
-# -DPBDATA_LIBRARIES=$smrtanalysis/bioinformatics/ext/pi/blasr/libcpp/pbdata/libpbdata.a \
-# -DPBIHDF_INCLUDE_DIRS=$smrtanalysis/bioinformatics/ext/pi/blasr/libcpp/hdf \
-# -DPBIHDF_LIBRARIES=$smrtanalysis/bioinformatics/ext/pi/blasr/libcpp/hdf/libpbihdf.a \
-# -DBLASR_INCLUDE_DIRS=$smrtanalysis/bioinformatics/ext/pi/blasr/libcpp/alignment/ \
-# -DBLASR_LIBRARIES=$smrtanalysis/bioinformatics/ext/pi/blasr/libcpp/alignment/libblasr.a \
-# -DHDF5_INCLUDE_DIRS=$smrtanalysis/prebuilt.out/hdf5/hdf5-1.8.12/ubuntu-1404/include \
-# -DHDF5_CPP_LIBRARIES=$smrtanalysis/prebuilt.out/hdf5/hdf5-1.8.12/ubuntu-1404/lib/libhdf5_cpp.a \
-# -DHDF5_LIBRARIES=$smrtanalysis/prebuilt.out/hdf5/hdf5-1.8.12/ubuntu-1404/lib/libhdf5.a \
-# -DBam2Bax_EXE_LINKER_FLAGS="-Wl,--no-as-needed -ldl -pthread -lrt " \
-# ../
+
+ An example:
+ $ cmake -DPacBioBAM_INCLUDE_DIRS=$smrtanalysis/bioinformatics/lib/cpp/pbbam/include \
+ -DHTSLIB_INCLUDE_DIRS=$smrtanalysis/bioinformatics/lib/cpp/htslib \
+ -DPacBioBAM_LIBRARIES=$smrtanalysis/bioinformatics/lib/cpp/pbbam/lib/libpbbam.a \
+ -DHTSLIB_LIBRARIES=$smrtanalysis/bioinformatics/lib/cpp/htslib/libhts.a \
+ -DPBDATA_INCLUDE_DIRS=$smrtanalysis/bioinformatics/ext/pi/blasr/libcpp/pbdata \
+ -DPBDATA_LIBRARIES=$smrtanalysis/bioinformatics/ext/pi/blasr/libcpp/pbdata/libpbdata.a \
+ -DPBIHDF_INCLUDE_DIRS=$smrtanalysis/bioinformatics/ext/pi/blasr/libcpp/hdf \
+ -DPBIHDF_LIBRARIES=$smrtanalysis/bioinformatics/ext/pi/blasr/libcpp/hdf/libpbihdf.a \
+ -DBLASR_INCLUDE_DIRS=$smrtanalysis/bioinformatics/ext/pi/blasr/libcpp/alignment/ \
+ -DBLASR_LIBRARIES=$smrtanalysis/bioinformatics/ext/pi/blasr/libcpp/alignment/libblasr.a \
+ -DHDF5_INCLUDE_DIRS=$smrtanalysis/prebuilt.out/hdf5/hdf5-1.8.12/ubuntu-1404/include \
+ -DHDF5_CPP_LIBRARIES=$smrtanalysis/prebuilt.out/hdf5/hdf5-1.8.12/ubuntu-1404/lib/libhdf5_cpp.a \
+ -DHDF5_LIBRARIES=$smrtanalysis/prebuilt.out/hdf5/hdf5-1.8.12/ubuntu-1404/lib/libhdf5.a \
+ -DBam2Bax_EXE_LINKER_FLAGS="-Wl,--no-as-needed -ldl -pthread -lrt " \
+ ../
+
+ Alternatively:
+ $ cmake \
+ -DPacBioBAM_RootDir=$smrtanalsis/bioinformatics/lib/cpp/pbbam \
+ -DPBDATA_INCLUDE_DIRS=$smrtanalysis/bioinformatics/ext/pi/blasr/libcpp/pbdata \
+ -DPBDATA_LIBRARIES=$smrtanalysis/bioinformatics/ext/pi/blasr/libcpp/pbdata/libpbdata.a \
+ -DPBIHDF_INCLUDE_DIRS=$smrtanalysis/bioinformatics/ext/pi/blasr/libcpp/hdf \
+ -DPBIHDF_LIBRARIES=$smrtanalysis/bioinformatics/ext/pi/blasr/libcpp/hdf/libpbihdf.a \
+ -DBLASR_INCLUDE_DIRS=$smrtanalysis/bioinformatics/ext/pi/blasr/libcpp/alignment/ \
+ -DBLASR_LIBRARIES=$smrtanalysis/bioinformatics/ext/pi/blasr/libcpp/alignment/libblasr.a \
+ -DHDF5_INCLUDE_DIRS=$smrtanalysis/prebuilt.out/hdf5/hdf5-1.8.12/ubuntu-1404/include \
+ -DHDF5_CPP_LIBRARIES=$smrtanalysis/prebuilt.out/hdf5/hdf5-1.8.12/ubuntu-1404/lib/libhdf5_cpp.a \
+ -DHDF5_LIBRARIES=$smrtanalysis/prebuilt.out/hdf5/hdf5-1.8.12/ubuntu-1404/lib/libhdf5.a \
+ -DBam2Bax_EXE_LINKER_FLAGS="-Wl,--no-as-needed -ldl -pthread -lrt " \
+ ../
diff --git a/utils/bam2bax/CMakeLists.txt b/utils/bam2bax/CMakeLists.txt
index 09e5990..3baa14d 100644
--- a/utils/bam2bax/CMakeLists.txt
+++ b/utils/bam2bax/CMakeLists.txt
@@ -66,8 +66,15 @@ endif()
if (NOT PacBioBAM_INCLUDE_DIRS OR NOT PacBioBAM_LIBRARIES
OR NOT HTSLIB_INCLUDE_DIRS OR NOT HTSLIB_LIBRARIES)
- set(PacBioBAM_RootDir ${Bam2Bax_RootDir}/../../../../../lib/cpp/pbbam)
+ set(PacBioBAM_LIBRARIES )
+ set(PacBioBAM_INCLUDE_DIRS )
+ set(HTSLIB_INCLUDE_DIRS )
+ set(HTSLIB_LIBRARIES )
+ if (NOT PacBioBAM_RootDir)
+ message ("Must either set (PacBioBAM_INCLUDE_DIRS, PacBioBAM_LIBRARIES, HTSLIB_INCLUDE_DIRS, and HTSLIB_LIBRARIES) or PacBioBAM_RootDir!")
+ endif()
add_subdirectory(${PacBioBAM_RootDir} external/build/pbbam)
+ set(PBBAM_LINK_FLAG pbbam)
endif()
if (NOT Boost_INCLUDE_DIRS)
@@ -130,4 +137,3 @@ if(Bam2Bax_build_tests)
add_subdirectory(${GTEST_SRC_DIR} external/gtest/build)
add_subdirectory(tests)
endif()
-
diff --git a/utils/bam2bax/makefile b/utils/bam2bax/makefile
index 277e605..d9f1c37 100644
--- a/utils/bam2bax/makefile
+++ b/utils/bam2bax/makefile
@@ -4,6 +4,9 @@ SRCDIR:=$(dir $(realpath $(lastword $(MAKEFILE_LIST))))
-include ${CURDIR}/../../defines.mk
include ${SRCDIR}/../../rules.mk
+# If pbbam and htslib are prebuilt and included in defines.mk,
+# set PacBioBAM_INCLUDE_DIRS, HTSLIB_INCLUDE_DIRS, PacBioBAM_LIBRARIES
+# and HTSLIB_LIBRARIES as below. Otherwise, set PacBioBAM_RootDir instead.
all: ${CURDIR}/src/*.cpp ${CURDIR}/src/*.h ${CURDIR}/tests/src/*.cpp ${CURDIR}/tests/src/*.h
@mkdir -p ${CURDIR}/build && \
cd ${CURDIR}/build && \
@@ -25,6 +28,25 @@ all: ${CURDIR}/src/*.cpp ${CURDIR}/src/*.h ${CURDIR}/tests/src/*.cpp ${CURDIR}/
../ && \
make
+# If pbbam is not prebuilt, just set PacBioBAM_RootDir
+#all: ${CURDIR}/src/*.cpp ${CURDIR}/src/*.h ${CURDIR}/tests/src/*.cpp ${CURDIR}/tests/src/*.h
+# @mkdir -p ${CURDIR}/build && \
+# cd ${CURDIR}/build && \
+# cmake -DBOOST_ROOT=${BOOST_ROOT} \
+# -DPacBioBAM_RootDir=/home/UNIXHOME/yli/git/depot/software/smrtanalysis/bioinformatics/lib/cpp/pbbam \
+# -DPBDATA_INCLUDE_DIRS=${LIBPBDATA_INC} \
+# -DPBDATA_LIBRARIES=${LIBPBDATA_LIB}/libpbdata${SH_LIB_EXT} \
+# -DPBIHDF_INCLUDE_DIRS=${LIBPBIHDF_INC} \
+# -DPBIHDF_LIBRARIES=${LIBPBIHDF_LIB}/libpbihdf${SH_LIB_EXT} \
+# -DBLASR_INCLUDE_DIRS=${LIBBLASR_INC}/ \
+# -DBLASR_LIBRARIES=${LIBBLASR_LIB}/libblasr${SH_LIB_EXT} \
+# -DHDF5_INCLUDE_DIRS=${HDF5_INC} \
+# -DHDF5_CPP_LIBRARIES=${HDF5_LIB}/libhdf5_cpp${SH_LIB_EXT} \
+# -DHDF5_LIBRARIES=${HDF5_LIB}/libhdf5${SH_LIB_EXT} \
+# -DBam2Bax_EXE_LINKER_FLAGS="-Wl,--no-as-needed -ldl -pthread -lrt " \
+# ../ && \
+# make
+
clean:
@rm -rf ${CURDIR}/bin/
@rm -rf ${CURDIR}/build
diff --git a/utils/bam2bax/src/CMakeLists.txt b/utils/bam2bax/src/CMakeLists.txt
index a91dc14..044c303 100644
--- a/utils/bam2bax/src/CMakeLists.txt
+++ b/utils/bam2bax/src/CMakeLists.txt
@@ -51,6 +51,7 @@ if (NOT APPLE)
else()
endif()
target_link_libraries(bam2bax
+ ${PBBAM_LINK_FLAG}
${BLASR_LIBRARIES}
${PBIHDF_LIBRARIES}
${PBDATA_LIBRARIES}
@@ -67,6 +68,7 @@ set_target_properties(bam2plx PROPERTIES
RUNTIME_OUTPUT_DIRECTORY ${Bam2Bax_BinDir}
)
target_link_libraries(bam2plx
+ ${PBBAM_LINK_FLAG}
${BLASR_LIBRARIES}
${PBIHDF_LIBRARIES}
${PBDATA_LIBRARIES}
diff --git a/utils/bam2bax/tests/CMakeLists.txt b/utils/bam2bax/tests/CMakeLists.txt
index 86bbe19..15e9527 100644
--- a/utils/bam2bax/tests/CMakeLists.txt
+++ b/utils/bam2bax/tests/CMakeLists.txt
@@ -74,6 +74,7 @@ endif()
target_link_libraries(test_bam2bax
gtest
gtest_main
+ ${PBBAM_LINK_FLAG}
${BLASR_LIBRARIES}
${PBIHDF_LIBRARIES}
${PBDATA_LIBRARIES}
diff --git a/utils/bax2bam/CMakeLists.txt b/utils/bax2bam/CMakeLists.txt
index 14f4697..3e1cc49 100644
--- a/utils/bax2bam/CMakeLists.txt
+++ b/utils/bax2bam/CMakeLists.txt
@@ -65,8 +65,15 @@ endif()
if (NOT PacBioBAM_INCLUDE_DIRS OR NOT PacBioBAM_LIBRARIES
OR NOT HTSLIB_INCLUDE_DIRS OR NOT HTSLIB_LIBRARIES)
- set(PacBioBAM_RootDir ${Bax2Bam_RootDir}/../../../../../lib/cpp/pbbam)
+ set(PacBioBAM_INCLUDE_DIRS )
+ set(PacBioBAM_LIBRARIES )
+ set(HTSLIB_INCLUDE_DIRS )
+ set(HTSLIB_LIBRARIES )
+ if (NOT PacBioBAM_RootDir)
+ message ("Must either set (PacBioBAM_INCLUDE_DIRS, PacBioBAM_LIBRARIES, HTSLIB_INCLUDE_DIRS, and HTSLIB_LIBRARIES) or PacBioBAM_RootDir!")
+ endif()
add_subdirectory(${PacBioBAM_RootDir} external/build/pbbam)
+ set(PBBAM_LINK_FLAG pbbam)
endif()
if (NOT Boost_INCLUDE_DIRS)
diff --git a/utils/bax2bam/makefile b/utils/bax2bam/makefile
index 4c395bb..501a1fe 100644
--- a/utils/bax2bam/makefile
+++ b/utils/bax2bam/makefile
@@ -4,6 +4,9 @@ SRCDIR:=$(dir $(realpath $(lastword $(MAKEFILE_LIST))))
include ${CURDIR}/../../defines.mk
include ${SRCDIR}/../../rules.mk
+# If pbbam and htslib are prebuilt and included in blasr/defines.mk,
+# set PacBioBAM_INCLUDE_DIRS, HTSLIB_INCLUDE_DIRS, PacBioBAM_LIBRARIES
+# and HTSLIB_LIBRARIES as below. Otherwise, just set PacBioBAM_RootDir instead.
all: ${CURDIR}/src/* ${CURDIR}/tests/src/*
@mkdir -p ${CURDIR}/build && \
cd ${CURDIR}/build && \
@@ -25,6 +28,7 @@ all: ${CURDIR}/src/* ${CURDIR}/tests/src/*
../ && \
make
+
clean:
@rm -rf ${CURDIR}/bin/
@rm -rf ${CURDIR}/build
diff --git a/utils/bax2bam/src/CMakeLists.txt b/utils/bax2bam/src/CMakeLists.txt
index aa8b803..5287f60 100644
--- a/utils/bax2bam/src/CMakeLists.txt
+++ b/utils/bax2bam/src/CMakeLists.txt
@@ -45,6 +45,7 @@ if (NOT APPLE)
else()
endif()
target_link_libraries(bax2bam
+ ${PBBAM_LINK_FLAG}
${BLASR_LIBRARIES}
${PBIHDF_LIBRARIES}
${PBDATA_LIBRARIES}
diff --git a/utils/bax2bam/src/IConverter.cpp b/utils/bax2bam/src/IConverter.cpp
index 8313d5c..df89477 100644
--- a/utils/bax2bam/src/IConverter.cpp
+++ b/utils/bax2bam/src/IConverter.cpp
@@ -24,13 +24,6 @@ BamHeader IConverter::CreateHeader(const string& modeString)
{
BamHeader header;
- // @HD VN:<current SAM/BAM spec version>
- // SO:unsorted
- // pb:<current PacBio BAM spec version>
- header.Version("1.5")
- .SortOrder("unknown")
- .PacBioBamVersion("3.0.2");
-
// @RG ID: <read group ID>
// DS: READTYPE=<HQREGION|POLYMERASE|SUBREAD>[;<Tag Manifest>;BINDINGKIT=<foo>;SEQUENCINGKIT=<bar>;BASECALLERVERSION=<42>]
// PL: PACBIO
diff --git a/utils/bax2bam/src/main.cpp b/utils/bax2bam/src/main.cpp
index 8cbb7e8..7ba68f0 100644
--- a/utils/bax2bam/src/main.cpp
+++ b/utils/bax2bam/src/main.cpp
@@ -65,7 +65,7 @@ int main(int argc, char* argv[])
readModeGroup.add_option("--ccs")
.dest(Settings::Option::ccsMode_)
.action("store_true")
- .help("Output CCS sequences");
+ .help("Output CCS sequences (requires ccs.h5 input)");
parser.add_option_group(readModeGroup);
auto featureGroup = optparse::OptionGroup(parser, "Pulse feature options");
diff --git a/utils/bax2bam/tests/CMakeLists.txt b/utils/bax2bam/tests/CMakeLists.txt
index 4a3680a..7bd5ba1 100644
--- a/utils/bax2bam/tests/CMakeLists.txt
+++ b/utils/bax2bam/tests/CMakeLists.txt
@@ -69,6 +69,7 @@ endif()
target_link_libraries(test_bax2bam
gtest
gtest_main
+ ${PBBAM_LINK_FLAG}
${BLASR_LIBRARIES}
${PBIHDF_LIBRARIES}
${PBDATA_LIBRARIES}
diff --git a/utils/bax2bam/tests/src/TestData.h.in b/utils/bax2bam/tests/src/TestData.h.in
index 53f0a89..c432cda 100644
--- a/utils/bax2bam/tests/src/TestData.h.in
+++ b/utils/bax2bam/tests/src/TestData.h.in
@@ -11,6 +11,8 @@ const std::string Bax2Bam_Exe = std::string("@Bax2Bam_BinDir@/bax2bam");
const std::string Source_Dir = std::string("@Bax2Bam_TestsDir@");
const std::string Bin_Dir = std::string("@CMAKE_CURRENT_BINARY_DIR@");
const std::string Data_Dir = std::string("/pbi/dept/secondary/siv/testdata/bax2bam");
+const std::string Header_Version = std::string("1.1");
+const std::string PacBioBam_Version = std::string("3.0.3");
} // namespace tests
diff --git a/utils/bax2bam/tests/src/test_ccs.cpp b/utils/bax2bam/tests/src/test_ccs.cpp
index 4851be0..edade95 100644
--- a/utils/bax2bam/tests/src/test_ccs.cpp
+++ b/utils/bax2bam/tests/src/test_ccs.cpp
@@ -101,9 +101,9 @@ TEST(CcsTest, EndToEnd_Multiple)
// check BAM header information
const BamHeader& header = bamFile.Header();
- EXPECT_EQ(string("1.5"), header.Version());
+ EXPECT_EQ(tests::Header_Version, header.Version());
EXPECT_EQ(string("unknown"), header.SortOrder());
- EXPECT_EQ(string("3.0.2"), header.PacBioBamVersion());
+ EXPECT_EQ(tests::PacBioBam_Version, header.PacBioBamVersion());
EXPECT_TRUE(header.Sequences().empty());
EXPECT_TRUE(header.Comments().empty());
ASSERT_FALSE(header.Programs().empty());
diff --git a/utils/bax2bam/tests/src/test_hqregions.cpp b/utils/bax2bam/tests/src/test_hqregions.cpp
index fba41f1..e922ebf 100644
--- a/utils/bax2bam/tests/src/test_hqregions.cpp
+++ b/utils/bax2bam/tests/src/test_hqregions.cpp
@@ -104,9 +104,9 @@ TEST(HqRegionsTest, EndToEnd_Single)
// check BAM header information
const BamHeader& header = bamFile.Header();
- EXPECT_EQ(string("1.5"), header.Version());
+ EXPECT_EQ(string(tests::Header_Version), header.Version());
EXPECT_EQ(string("unknown"), header.SortOrder());
- EXPECT_EQ(string("3.0.2"), header.PacBioBamVersion());
+ EXPECT_EQ(string(tests::PacBioBam_Version), header.PacBioBamVersion());
EXPECT_TRUE(header.Sequences().empty());
EXPECT_TRUE(header.Comments().empty());
ASSERT_FALSE(header.Programs().empty());
@@ -261,9 +261,9 @@ TEST(HqRegionsTest, EndToEnd_Single)
// check BAM header information
const BamHeader& header = bamFile.Header();
- EXPECT_EQ(string("1.5"), header.Version());
+ EXPECT_EQ(tests::Header_Version, header.Version());
EXPECT_EQ(string("unknown"), header.SortOrder());
- EXPECT_EQ(string("3.0.2"), header.PacBioBamVersion());
+ EXPECT_EQ(tests::PacBioBam_Version, header.PacBioBamVersion());
EXPECT_TRUE(header.Sequences().empty());
EXPECT_TRUE(header.Comments().empty());
ASSERT_FALSE(header.Programs().empty());
diff --git a/utils/bax2bam/tests/src/test_polymerase.cpp b/utils/bax2bam/tests/src/test_polymerase.cpp
index 73c50a6..9c4f91f 100644
--- a/utils/bax2bam/tests/src/test_polymerase.cpp
+++ b/utils/bax2bam/tests/src/test_polymerase.cpp
@@ -87,9 +87,9 @@ TEST(PolymeraseTest, EndToEnd_Single)
// check BAM header information
const BamHeader& header = bamFile.Header();
- EXPECT_EQ(string("1.5"), header.Version());
+ EXPECT_EQ(tests::Header_Version, header.Version());
EXPECT_EQ(string("unknown"), header.SortOrder());
- EXPECT_EQ(string("3.0.2"), header.PacBioBamVersion());
+ EXPECT_EQ(tests::PacBioBam_Version, header.PacBioBamVersion());
EXPECT_TRUE(header.Sequences().empty());
EXPECT_TRUE(header.Comments().empty());
ASSERT_FALSE(header.Programs().empty());
diff --git a/utils/bax2bam/tests/src/test_subreads.cpp b/utils/bax2bam/tests/src/test_subreads.cpp
index 6394f4a..e40f7b1 100644
--- a/utils/bax2bam/tests/src/test_subreads.cpp
+++ b/utils/bax2bam/tests/src/test_subreads.cpp
@@ -199,9 +199,9 @@ TEST(SubreadsTest, EndToEnd_Multiple)
// check BAM header information
const BamHeader& header = bamFile.Header();
- EXPECT_EQ(string("1.5"), header.Version());
+ EXPECT_EQ(tests::Header_Version, header.Version());
EXPECT_EQ(string("unknown"), header.SortOrder());
- EXPECT_EQ(string("3.0.2"), header.PacBioBamVersion());
+ EXPECT_EQ(tests::PacBioBam_Version, header.PacBioBamVersion());
EXPECT_TRUE(header.Sequences().empty());
EXPECT_TRUE(header.Comments().empty());
ASSERT_FALSE(header.Programs().empty());
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/blasr.git
More information about the debian-med-commit
mailing list