[med-svn] [blasr] 01/04: Imported Upstream version 5.3+0

Afif Elghraoui afif at moszumanska.debian.org
Sat Dec 24 01:53:54 UTC 2016


This is an automated email from the git hooks/post-receive script.

afif pushed a commit to branch master
in repository blasr.

commit afe0a9b297f64db82f4824b91794f0b538bd763b
Author: Afif Elghraoui <afif at debian.org>
Date:   Fri Dec 23 16:51:42 2016 -0800

    Imported Upstream version 5.3+0
---
 cram.mk                                     | 21 +++++++----
 ctest/aggressiveIntervalCut.t               |  4 +--
 ctest/concordant.t                          | 39 ++++++++++-----------
 ctest/ecoli.t                               |  9 +++--
 ctest/fastMaxInterval.t                     |  4 +--
 ctest/multipart.t                           |  4 +--
 ctest/noSplitSubreads.t                     | 34 ++++++++++++++++++
 ctest/useccsallBestN1.t                     |  2 +-
 iblasr/MappingParameters.h                  | 26 +++++++++++++-
 iblasr/RegisterBlasrOptions.h               | 11 +++---
 makefile                                    |  4 +++
 utils/bam2bax/BUILD.txt                     | 54 ++++++++++++++++++++---------
 utils/bam2bax/CMakeLists.txt                | 10 ++++--
 utils/bam2bax/makefile                      | 22 ++++++++++++
 utils/bam2bax/src/CMakeLists.txt            |  2 ++
 utils/bam2bax/tests/CMakeLists.txt          |  1 +
 utils/bax2bam/CMakeLists.txt                |  9 ++++-
 utils/bax2bam/makefile                      |  4 +++
 utils/bax2bam/src/CMakeLists.txt            |  1 +
 utils/bax2bam/src/IConverter.cpp            |  7 ----
 utils/bax2bam/src/main.cpp                  |  2 +-
 utils/bax2bam/tests/CMakeLists.txt          |  1 +
 utils/bax2bam/tests/src/TestData.h.in       |  2 ++
 utils/bax2bam/tests/src/test_ccs.cpp        |  4 +--
 utils/bax2bam/tests/src/test_hqregions.cpp  |  8 ++---
 utils/bax2bam/tests/src/test_polymerase.cpp |  4 +--
 utils/bax2bam/tests/src/test_subreads.cpp   |  4 +--
 27 files changed, 210 insertions(+), 83 deletions(-)

diff --git a/cram.mk b/cram.mk
index 35f62e8..f6e77d5 100644
--- a/cram.mk
+++ b/cram.mk
@@ -1,18 +1,27 @@
 FAST_CTESTS := \
+ctest/ecoli.t \
+ctest/fastMaxInterval.t \
+ctest/aggressiveIntervalCut.t \
+ctest/multipart.t \
 ctest/affineAlign.t            ctest/bamOut.t           ctest/ccsH5.t           ctest/filtercriteria.t  ctest/m0-5.t \
-ctest/aggressiveIntervalCut.t  ctest/fofn.t             ctest/multipart.t \
+ctest/fofn.t \
 ctest/alignScore.t             ctest/hitpolicy.t       ctest/noSplitSubreads.t \
-ctest/bamIn.t                  ctest/fastMaxInterval.t  ctest/open_fail.t       ctest/verbose.t         ctest/deterministic.t
+ctest/bamIn.t                  ctest/open_fail.t       ctest/verbose.t         ctest/deterministic.t
 
 
 MILD_CTESTS := \
-	ctest/bug25766.t ctest/holeNumbers.t
+	ctest/concordant.t ctest/bug25766.t ctest/holeNumbers.t
 
-SLOW_CTESTS := ctest/bug25328.t ctest/useccsallLargeGenome.t
+SLOW_CTESTS := ctest/bug25328.t
 
 # XXX: following tests sidelined, needs bam input after --sam option removed
-# FAST: ctest/ecoli.t
-# MILD: ctest/useccsallBestN1.t ctest/concordant.t 
+# MILD: ctest/useccsallBestN1.t
+
+
+# sidelined because of changes in directories
+#
+# needed to restore  /mnt/data3/vol53/2450530/0014
+# SLOW ctest/useccsallLargeGenome.t
 
 #BLASR_PATH=/mnt/secondary/builds/full/3.0.0/prod/current-build_smrtanalysis/private/otherbins/internalall/bin/
 #export BLASR_PATH
diff --git a/ctest/aggressiveIntervalCut.t b/ctest/aggressiveIntervalCut.t
index 8743782..7cc0cc7 100644
--- a/ctest/aggressiveIntervalCut.t
+++ b/ctest/aggressiveIntervalCut.t
@@ -3,8 +3,8 @@ Set up
 
 Test --aggressiveIntervalCut.
   $ rm -f $TMP1
-  $ BASFILE=/mnt/data3/vol53/2450598/0001/Analysis_Results/m130812_185809_42141_c100533960310000001823079711101380_s1_p0.bas.h5
-  $ REFFA=/mnt/secondary/Smrtpipe/repository/Ecoli_BL21_O26/sequence/Ecoli_BL21_O26.fasta
+  $ BASFILE=/pbi/dept/secondary/siv/testdata/BlasrTestData/ctest/data/aggressiveIntervalCut/m130812_185809_42141_c100533960310000001823079711101380_s1_p0.bas.h5
+  $ REFFA=/pbi/dept/secondary/siv/testdata/BlasrTestData/ctest/data/references/Ecoli_BL21_O26/sequence/Ecoli_BL21_O26.fasta
   $ $EXEC $BASFILE $REFFA --holeNumbers 1--100 --out $TMP1 --aggressiveIntervalCut
   [INFO] * [blasr] started. (glob)
   [INFO] * [blasr] ended. (glob)
diff --git a/ctest/concordant.t b/ctest/concordant.t
index e0b0225..253d9bf 100644
--- a/ctest/concordant.t
+++ b/ctest/concordant.t
@@ -2,29 +2,13 @@ Set up
   $ . $TESTDIR/setup.sh
 
 Test --concordant
-  $ rm -rf $OUTDIR/concordant_subset.bam
-  $ rm -rf $OUTDIR/concordant_subset.sam
-  $ $EXEC $DATDIR/ecoli_lp.fofn $DATDIR/ecoli_reference.fasta --concordant --refineConcordantAlignments --bam --out $OUTDIR/concordant_subset.bam --nproc 12 --holeNumbers 1--10000 --sa $DATDIR/ecoli_reference.sa
+  $ rm -rf $OUTDIR/concordant_subset.sam $OUTDIR/tmp1 $OUTDIR/tmp2
+  $ $EXEC $DATDIR/ecoli_lp.fofn $DATDIR/ecoli_reference.fasta --concordant --refineConcordantAlignments -m 4 --out $OUTDIR/concordant_subset.m4 --nproc 12 --holeNumbers 1--10000 --sa $DATDIR/ecoli_reference.sa
   [INFO]* (glob)
   [INFO]* (glob)
-  $ $SAMTOOLS view $OUTDIR/concordant_subset.bam > $OUTDIR/concordant_subset.sam
-  $ sed -n 6,110864p $OUTDIR/concordant_subset.sam > $OUTDIR/tmp1 
-  $ sort $OUTDIR/tmp1 > $OUTDIR/tmp11
-  $ sed -n 6,110864p $STDDIR/$UPDATEDATE/concordant_subset.sam > $OUTDIR/tmp2
-  $ sort $OUTDIR/tmp2 > $OUTDIR/tmp22
-  $ diff $OUTDIR/tmp11 $OUTDIR/tmp22
-  $ rm -rf $OUTDIR/tmp1 $OUTDIR/tmp2 $OUTDIR/tmp11 $OUTDIR/tmp22
-#2014_05_28  --> changelist 135254, use MAX_BAND_SIZE to contrain GuidedAlign
-#2014_08_21  --> changelist 138516, added YS, YE, ZM tags. 
-#2014_08_28  --> changelist 139176, update SAM MD5 
-#2014_09_12  --> changelist 140410, changed the default value of '--concordantTemplate' from 'longestsubread' to 'typicalsubread'
-#2014_09_17  --> changelist 140573, changed SDPFragment LessThan to make sure blasr compiled with gcc 4.4 and 4.8 can produce identical results. 
-#2014_10_16  --> changelist 141378, changed the default value of '--concordantTemplate' from 'typicalsubread' to 'mediansubread'
-#2015_03_01  --> changelist 146599, reads from the same movie should have unique readGroupId
-#2015_03_28  --> changelist 148101, 148080 updated read group id, 148100 updated TLEN
-#2015_04_09  --> changelist 148796, updated read group id
-#2015_04_25  --> changelist 149721, update CIGAR string, replace M with X=.
-#2015_04_25  --> changelist ?, force refine all concordant alignments
+  $ sort $OUTDIR/concordant_subset.m4 > $OUTDIR/tmp1
+Updated in 2016_10_05  --> changed output format from sam to m4, isolate concordant tests from file format tests
+  $ diff $OUTDIR/tmp1 $STDDIR/2016_10_05/concordant_subset.m4
 
 Test --concordant FMR1 case (the 'typical subread' is selected as template for concordant mapping)
   $ FOFN=$DATDIR/FMR1_concordant.fofn
@@ -33,3 +17,16 @@ Test --concordant FMR1 case (the 'typical subread' is selected as template for c
   [INFO]* (glob)
   [INFO]* (glob)
   $ diff $OUTDIR/FMR1_zmw_37927.m4 $STDDIR/$UPDATEDATE/FMR1_zmw_37927.m4
+
+#History
+#2014_05_28  --> changelist 135254, use MAX_BAND_SIZE to contrain GuidedAlign
+#2014_08_21  --> changelist 138516, added YS, YE, ZM tags.
+#2014_08_28  --> changelist 139176, update SAM MD5
+#2014_09_12  --> changelist 140410, changed the default value of '--concordantTemplate' from 'longestsubread' to 'typicalsubread'
+#2014_09_17  --> changelist 140573, changed SDPFragment LessThan to make sure blasr compiled with gcc 4.4 and 4.8 can produce identical results.
+#2014_10_16  --> changelist 141378, changed the default value of '--concordantTemplate' from 'typicalsubread' to 'mediansubread'
+#2015_03_01  --> changelist 146599, reads from the same movie should have unique readGroupId
+#2015_03_28  --> changelist 148101, 148080 updated read group id, 148100 updated TLEN
+#2015_04_09  --> changelist 148796, updated read group id
+#2015_04_25  --> changelist 149721, update CIGAR string, replace M with X=.
+#2015_11_09  --> changelist 167117, added -refineConcordantAlignments
diff --git a/ctest/ecoli.t b/ctest/ecoli.t
index 07940fc..c682178 100644
--- a/ctest/ecoli.t
+++ b/ctest/ecoli.t
@@ -13,10 +13,9 @@ Test blasr with --bam
   [INFO]* (glob)
   [INFO]* (glob)
 
-  $ $SAMTOOLS view $OUTDIR/ecoli_subset.bam > $OUTDIR/ecoli_subset.sam
-  $ sed -n '5,$ p' $OUTDIR/ecoli_subset.sam | sort | cut -f 1--11 > $TMP1
-  $ sed -n '5,$ p' $STDDIR/$UPDATEDATE/ecoli_subset.sam | sort | cut -f 1--11 > $TMP2
-  $ diff $TMP1 $TMP2
-  $ rm $TMP1 $TMP2
+  $ $SAMTOOLS view -h $OUTDIR/ecoli_subset.bam > $OUTDIR/ecoli_subset.sam
+  $ sed -n '5,$ p' $OUTDIR/ecoli_subset.sam | sort | cut -f 1-11 > $OUTDIR/ecoli_subset_out
+  $ sed -n '5,$ p' $STDDIR/2016_10_20/ecoli_subset.sam | sort | cut -f 1-11 > $OUTDIR/ecoli_subset_std
+  $ diff $OUTDIR/ecoli_subset_out $OUTDIR/ecoli_subset_std
 # 2015_03_08 --> changelist 148101, 148080 updated read group id; 148100 updated TLEN
 # 2015_04_09 --> changelist 148796, updated read group id
diff --git a/ctest/fastMaxInterval.t b/ctest/fastMaxInterval.t
index 67a687b..1d8816b 100644
--- a/ctest/fastMaxInterval.t
+++ b/ctest/fastMaxInterval.t
@@ -3,8 +3,8 @@ Set up
 
 Test --fastMaxInterval.
   $ rm -f $TMP1
-  $ BASFILE=/mnt/data3/vol53/2450598/0001/Analysis_Results/m130812_185809_42141_c100533960310000001823079711101380_s1_p0.bas.h5
-  $ REFFA=/mnt/secondary/Smrtpipe/repository/Ecoli_BL21_O26/sequence/Ecoli_BL21_O26.fasta
+  $ BASFILE=/pbi/dept/secondary/siv/testdata/BlasrTestData/ctest/data/aggressiveIntervalCut/m130812_185809_42141_c100533960310000001823079711101380_s1_p0.bas.h5
+  $ REFFA=/pbi/dept/secondary/siv/testdata/BlasrTestData/ctest/data/references/Ecoli_BL21_O26/sequence/Ecoli_BL21_O26.fasta
   $ $EXEC $BASFILE $REFFA --holeNumbers 1--100 --out $TMP1 --fastMaxInterval
   [INFO] * [blasr] started. (glob)
   [INFO] * [blasr] ended. (glob)
diff --git a/ctest/multipart.t b/ctest/multipart.t
index 5e824d7..ed0e031 100644
--- a/ctest/multipart.t
+++ b/ctest/multipart.t
@@ -4,8 +4,8 @@ Set up
 Test input.fofn containing a new bas.h5 file. Note that the new bas.h5 file does not 
 contain any /PulseData, instead contains /MultiPart/Parts.
   $ rm -f $TMP1
-  $ BASFILE=/mnt/data3/vol53/2450598/0001/Analysis_Results/m130812_185809_42141_c100533960310000001823079711101380_s1_p0.bas.h5
-  $ REFFA=/mnt/secondary/Smrtpipe/repository/Ecoli_BL21_O26/sequence/Ecoli_BL21_O26.fasta
+  $ BASFILE=/pbi/dept/secondary/siv/testdata/BlasrTestData/ctest/data/aggressiveIntervalCut/m130812_185809_42141_c100533960310000001823079711101380_s1_p0.bas.h5
+  $ REFFA=/pbi/dept/secondary/siv/testdata/BlasrTestData/ctest/data/references/Ecoli_BL21_O26/sequence/Ecoli_BL21_O26.fasta
   $ $EXEC $BASFILE $REFFA --holeNumbers 1--100 --out $TMP1
   [INFO] * [blasr] started. (glob)
   [INFO] * [blasr] ended. (glob)
diff --git a/ctest/noSplitSubreads.t b/ctest/noSplitSubreads.t
index ed4a192..5880288 100644
--- a/ctest/noSplitSubreads.t
+++ b/ctest/noSplitSubreads.t
@@ -16,3 +16,37 @@ Test blasr with --noSplitSubreads
   [INFO]* (glob)
   $ sort $OUTDIR/lambda_bax_noSplitSubreads_tmp_subset.m4 > $OUTDIR/lambda_bax_noSplitSubreads_subset.m4
   $ diff $OUTDIR/lambda_bax_noSplitSubreads_subset.m4 $STDDIR/lambda_bax_noSplitSubreads_subset.m4
+
+# Test key command of unrolled resequencing, check bam header and alignments in output
+  $ outbam=$OUTDIR/unrolled-4mer.bam
+  $ outsam=$OUTDIR/unrolled-4mer.sam
+  $ query=$DATDIR/unrolled/m54006_151021_185942.subreadset.xml
+  $ ref=$DATDIR/unrolled/All4mer_V2_11_V2_13_V2_15_V2_44_circular_72x_l50256.fasta
+  $ stdsam=$STDDIR/unrolled-4mer.sam
+  $ rm -rf $outbam $outsam
+  $ $EXEC $query $ref --out $outbam --noSplitSubreads --fastMaxInterval --bam
+  [INFO]* (glob)
+  [INFO]* (glob)
+  $ $SAMTOOLS view -h $outbam -o $outsam
+  $ grep -v '^@PG' $outsam > $TMP1 && grep -v '^@PG' $stdsam > $TMP2 && diff $TMP1 $TMP2
+  $ grep '@RG' $outsam
+  @RG\tID:e6043908* (glob)
+  $ grep 'RG:Z:e6043908' $outsam |wc -l
+  4
+
+
+  $ query=$DATDIR/unrolled/m54006_151021_185942.subreads.bam
+  $ outbam=$OUTDIR/unrolled-4mer-bam-in.bam
+  $ outsam=$OUTDIR/unrolled-4mer-bam-in.sam
+  $ rm -rf $outbam $outsam
+  $ $EXEC $query $ref --out $outbam --noSplitSubreads --fastMaxInterval --bam
+  [INFO]* (glob)
+  [INFO]* (glob)
+  $ $SAMTOOLS view -h $outbam -o $outsam
+  $ grep -v '^@PG' $outsam > $TMP1 && grep -v '^@PG' $stdsam > $TMP2 && diff $TMP1 $TMP2
+  $ grep '@RG' $outsam
+  @RG\tID:e6043908* (glob)
+  $ grep 'RG:Z:e6043908' $outsam |wc -l
+  4
+
+
diff --git a/ctest/useccsallBestN1.t b/ctest/useccsallBestN1.t
index 659b3b8..950ba0b 100644
--- a/ctest/useccsallBestN1.t
+++ b/ctest/useccsallBestN1.t
@@ -5,7 +5,7 @@ Test --useccsall with bestn = 1
   $ $EXEC $DATDIR/ccstest.fofn $DATDIR/ccstest_ref.fasta --bestn 1 --useccsall --bam --out $OUTDIR/useccsall.bam --holeNumbers 76772
   [INFO]* (glob)
   [INFO]* (glob)
-  $ $SAMTOOLS view $OUTDIR/useccsall.bam > $OUTDIR/useccsall.sam
+  $ $SAMTOOLS view -h $OUTDIR/useccsall.bam > $OUTDIR/useccsall.sam
   $ sed -n '9,$ p' $OUTDIR/useccsall.sam |cut -f 1-4 > $TMP1
   $ sed -n '9,$ p' $STDDIR/$UPDATEDATE/useccsall.sam | cut -f 1-4 > $TMP2
   $ diff $TMP1 $TMP2
diff --git a/iblasr/MappingParameters.h b/iblasr/MappingParameters.h
index 271fce4..df656a9 100644
--- a/iblasr/MappingParameters.h
+++ b/iblasr/MappingParameters.h
@@ -142,7 +142,9 @@ public:
     bool refineConcordantAlignments;
     int  flankSize;
     bool useRegionTable;
+    bool setIgnoreRegions;
     bool useHQRegionTable;
+    bool setIgnoreHQRegions;
     bool printUnaligned;
     bool noPrintUnalignedSeqs; // print unaligned reads names only.
     string unalignedFileName;
@@ -311,7 +313,9 @@ public:
         refineConcordantAlignments=false;
         flankSize=40;
         useRegionTable = true;
+        setIgnoreRegions = false;
         useHQRegionTable=true;
+        setIgnoreHQRegions = false;
         printUnaligned = false;
         unalignedFileName = "";
         noPrintUnalignedSeqs = false;
@@ -454,6 +458,9 @@ public:
         //
         // Fix all logical incompatibilities with parameters.
         //
+        if (setIgnoreRegions) { useRegionTable = false; }
+        if (setIgnoreHQRegions) { useHQRegionTable = false; }
+
         if (nowarp) {
             warp = false;
         }
@@ -534,6 +541,14 @@ public:
             useRegionTable = true;
             readSeparateRegionTable = true;
         }
+
+        bool isHDFFile = (queryFileType == FileType::HDFPulse or
+                          queryFileType == FileType::HDFBase or
+                          queryFileType == FileType::HDFCCSONLY);
+        if ((setIgnoreRegions or setIgnoreHQRegions) and not isHDFFile) {
+            cout << "ERROR: query must be HDF files in order to set ignoreRegions or ignoreHQRegions." << std::endl;
+            exit(1);
+        }
         if (ccsFofnFileName != "") {
             readSeparateCcsFofn = true;
         }
@@ -699,6 +714,11 @@ public:
             return ReadType::CCS;
         }
         if (queryFileType == FileType::PBBAM) {
+            if (not mapSubreadsSeparately) {
+                // specifal case: blasr subread.bam ref.fa --noSplitSubreads
+                // input type seems like subread while infact is polymerase
+                return ReadType::POLYMERASE;
+            }
             // Read type in BAM may be CCS, SUBREAD, HQREGION or POLYMERASE.
             // Determine it later.
             return ReadType::UNKNOWN;
@@ -706,7 +726,11 @@ public:
         if (mapSubreadsSeparately) {
             return ReadType::SUBREAD;
         } else {
-            if (useHQRegionTable) {
+            if (useHQRegionTable and
+                (queryFileType == FileType::HDFCCSONLY or
+                 queryFileType == FileType::HDFBase or
+                 queryFileType == FileType::HDFPulse)) {
+                // Only HDF files can contain region table.
                 return ReadType::HQREGION;
             } else {
                 return ReadType::POLYMERASE;
diff --git a/iblasr/RegisterBlasrOptions.h b/iblasr/RegisterBlasrOptions.h
index af7770a..f67e788 100644
--- a/iblasr/RegisterBlasrOptions.h
+++ b/iblasr/RegisterBlasrOptions.h
@@ -83,8 +83,8 @@ void RegisterBlasrOptions(CommandLineParser & clp, MappingParameters & params) {
     clp.RegisterIntOption("-maxScore", &params.maxScore, "", CommandLineParser::Integer);
     clp.RegisterStringOption("-bwt", &params.bwtFileName, "");
     clp.RegisterIntOption("m", &params.printFormat, "", CommandLineParser::NonNegativeInteger);
-    clp.RegisterFlagOption("-sam", &params.printSAM, "");
 #ifdef USE_PBBAM
+    clp.RegisterFlagOption("-sam", &params.printSAM, "");
     clp.RegisterFlagOption("-bam", &params.printBAM, "");
 #endif
     clp.RegisterStringOption("-clipping", &params.clippingString, "");
@@ -135,8 +135,9 @@ void RegisterBlasrOptions(CommandLineParser & clp, MappingParameters & params) {
     clp.RegisterIntOption("-flankSize", &params.flankSize, "", CommandLineParser::NonNegativeInteger);
     clp.RegisterStringOption("-titleTable", &params.titleTableName, "");
     clp.RegisterFlagOption("-useSensitiveSearch", &params.doSensitiveSearch, "");
-    clp.RegisterFlagOption("-ignoreRegions", &params.useRegionTable, "");
-    clp.RegisterFlagOption("-ignoreHQRegions", &params.useHQRegionTable, "");
+    // ignoreRegions or ignoreHQRegions implies region table must exist (i.e., query is HDF).
+    clp.RegisterFlagOption("-ignoreRegions", &params.setIgnoreRegions, "");
+    clp.RegisterFlagOption("-ignoreHQRegions", &params.setIgnoreHQRegions, "");
     clp.RegisterFlagOption("-computeAlignProbability", &params.computeAlignProbability, "");
     clp.RegisterStringOption("-unaligned", &params.unalignedFileName, "");
     // Print unaligned reads names only
@@ -289,8 +290,8 @@ const string BlasrHelp(MappingParameters & params) {
              << "               Use no/hard/subread/soft clipping, ONLY for SAM/BAM output."<< endl
              << "   --printSAMQV (false)" << endl
              << "               Print quality values to SAM output." << endl
-             << "   --cigarUseSeqMatch (false)" << endl
-             << "               CIGAR strings in SAM/BAM output use '=' and 'X' to represent sequence match and mismatch instead of 'M'." << endl << endl
+//             << "   --cigarUseSeqMatch (false)" << endl
+//             << "               CIGAR strings in SAM/BAM output use '=' and 'X' to represent sequence match and mismatch instead of 'M'." << endl << endl
              << " Options for anchoring alignment regions. This will have the greatest effect on speed and sensitivity." << endl
              << "   --minMatch m (12) " << endl
              << "               Minimum seed length.  Higher minMatch will speed up alignment, " << endl
diff --git a/makefile b/makefile
index e2d53fc..01b2072 100644
--- a/makefile
+++ b/makefile
@@ -76,6 +76,10 @@ cramfast: blasr utils
 	${MAKE} -f cram.mk cramfast
 	${MAKE} -C utils cramfast
 
+crammild: blasr utils
+	${MAKE} -f cram.mk crammild
+	${MAKE} -C utils crammild
+
 gtest: blasr
 	# This requires the submodule to be configured with gtest.
 	${MAKE} -C libcpp gtest
diff --git a/utils/bam2bax/BUILD.txt b/utils/bam2bax/BUILD.txt
index 09539dc..bdb9476 100644
--- a/utils/bam2bax/BUILD.txt
+++ b/utils/bam2bax/BUILD.txt
@@ -9,6 +9,11 @@ Assuming that blasr and blaser_libcpp is placed under //depot/software/smrtanaly
 
 
 Build instructions for users:
+
+  If pbbam and htslib are prebuilt and included in blasr/defines.mk,
+  set PacBioBAM_INCLUDE_DIRS, HTSLIB_INCLUDE_DIRS, PacBioBAM_LIBRARIES
+  and HTSLIB_LIBRARIES as below. Otherwise, set PacBioBAM_RootDir instead.
+
   $ cd <bam2bax>
   $ mkdir build; cd build; 
   $ cmake -DPacBioBAM_INCLUDE_DIRS=<path_to_include_dir> \
@@ -26,23 +31,38 @@ Build instructions for users:
       -DHDF5_LIBRARIES=<path_to_lib_so_or_a> \
       -DBam2Bax_EXE_LINKER_FLAGS="-Wl,--no-as-needed -ldl -pthread -lrt " \
       ../
-
   $ make
   $ ../tests/bin/test_bam2bax # to test bam2bax exe
 
-# e.g.,
-#cmake -DPacBioBAM_INCLUDE_DIRS=$smrtanalysis/bioinformatics/lib/cpp/pbbam/include \
-#      -DHTSLIB_INCLUDE_DIRS=$smrtanalysis/bioinformatics/lib/cpp/htslib \
-#      -DPacBioBAM_LIBRARIES=$smrtanalysis/bioinformatics/lib/cpp/pbbam/lib/libpbbam.a \
-#      -DHTSLIB_LIBRARIES=$smrtanalysis/bioinformatics/lib/cpp/htslib/libhts.a \
-#      -DPBDATA_INCLUDE_DIRS=$smrtanalysis/bioinformatics/ext/pi/blasr/libcpp/pbdata \
-#      -DPBDATA_LIBRARIES=$smrtanalysis/bioinformatics/ext/pi/blasr/libcpp/pbdata/libpbdata.a \
-#      -DPBIHDF_INCLUDE_DIRS=$smrtanalysis/bioinformatics/ext/pi/blasr/libcpp/hdf \
-#      -DPBIHDF_LIBRARIES=$smrtanalysis/bioinformatics/ext/pi/blasr/libcpp/hdf/libpbihdf.a \
-#      -DBLASR_INCLUDE_DIRS=$smrtanalysis/bioinformatics/ext/pi/blasr/libcpp/alignment/ \
-#      -DBLASR_LIBRARIES=$smrtanalysis/bioinformatics/ext/pi/blasr/libcpp/alignment/libblasr.a \
-#      -DHDF5_INCLUDE_DIRS=$smrtanalysis/prebuilt.out/hdf5/hdf5-1.8.12/ubuntu-1404/include \
-#      -DHDF5_CPP_LIBRARIES=$smrtanalysis/prebuilt.out/hdf5/hdf5-1.8.12/ubuntu-1404/lib/libhdf5_cpp.a \
-#      -DHDF5_LIBRARIES=$smrtanalysis/prebuilt.out/hdf5/hdf5-1.8.12/ubuntu-1404/lib/libhdf5.a \
-#      -DBam2Bax_EXE_LINKER_FLAGS="-Wl,--no-as-needed -ldl -pthread -lrt " \
-#      ../
+
+  An example:
+  $ cmake -DPacBioBAM_INCLUDE_DIRS=$smrtanalysis/bioinformatics/lib/cpp/pbbam/include \
+      -DHTSLIB_INCLUDE_DIRS=$smrtanalysis/bioinformatics/lib/cpp/htslib \
+      -DPacBioBAM_LIBRARIES=$smrtanalysis/bioinformatics/lib/cpp/pbbam/lib/libpbbam.a \
+      -DHTSLIB_LIBRARIES=$smrtanalysis/bioinformatics/lib/cpp/htslib/libhts.a \
+      -DPBDATA_INCLUDE_DIRS=$smrtanalysis/bioinformatics/ext/pi/blasr/libcpp/pbdata \
+      -DPBDATA_LIBRARIES=$smrtanalysis/bioinformatics/ext/pi/blasr/libcpp/pbdata/libpbdata.a \
+      -DPBIHDF_INCLUDE_DIRS=$smrtanalysis/bioinformatics/ext/pi/blasr/libcpp/hdf \
+      -DPBIHDF_LIBRARIES=$smrtanalysis/bioinformatics/ext/pi/blasr/libcpp/hdf/libpbihdf.a \
+      -DBLASR_INCLUDE_DIRS=$smrtanalysis/bioinformatics/ext/pi/blasr/libcpp/alignment/ \
+      -DBLASR_LIBRARIES=$smrtanalysis/bioinformatics/ext/pi/blasr/libcpp/alignment/libblasr.a \
+      -DHDF5_INCLUDE_DIRS=$smrtanalysis/prebuilt.out/hdf5/hdf5-1.8.12/ubuntu-1404/include \
+      -DHDF5_CPP_LIBRARIES=$smrtanalysis/prebuilt.out/hdf5/hdf5-1.8.12/ubuntu-1404/lib/libhdf5_cpp.a \
+      -DHDF5_LIBRARIES=$smrtanalysis/prebuilt.out/hdf5/hdf5-1.8.12/ubuntu-1404/lib/libhdf5.a \
+      -DBam2Bax_EXE_LINKER_FLAGS="-Wl,--no-as-needed -ldl -pthread -lrt " \
+      ../
+
+  Alternatively:
+  $ cmake \ 
+      -DPacBioBAM_RootDir=$smrtanalsis/bioinformatics/lib/cpp/pbbam \
+      -DPBDATA_INCLUDE_DIRS=$smrtanalysis/bioinformatics/ext/pi/blasr/libcpp/pbdata \
+      -DPBDATA_LIBRARIES=$smrtanalysis/bioinformatics/ext/pi/blasr/libcpp/pbdata/libpbdata.a \
+      -DPBIHDF_INCLUDE_DIRS=$smrtanalysis/bioinformatics/ext/pi/blasr/libcpp/hdf \
+      -DPBIHDF_LIBRARIES=$smrtanalysis/bioinformatics/ext/pi/blasr/libcpp/hdf/libpbihdf.a \
+      -DBLASR_INCLUDE_DIRS=$smrtanalysis/bioinformatics/ext/pi/blasr/libcpp/alignment/ \
+      -DBLASR_LIBRARIES=$smrtanalysis/bioinformatics/ext/pi/blasr/libcpp/alignment/libblasr.a \
+      -DHDF5_INCLUDE_DIRS=$smrtanalysis/prebuilt.out/hdf5/hdf5-1.8.12/ubuntu-1404/include \
+      -DHDF5_CPP_LIBRARIES=$smrtanalysis/prebuilt.out/hdf5/hdf5-1.8.12/ubuntu-1404/lib/libhdf5_cpp.a \
+      -DHDF5_LIBRARIES=$smrtanalysis/prebuilt.out/hdf5/hdf5-1.8.12/ubuntu-1404/lib/libhdf5.a \
+      -DBam2Bax_EXE_LINKER_FLAGS="-Wl,--no-as-needed -ldl -pthread -lrt " \
+      ../
diff --git a/utils/bam2bax/CMakeLists.txt b/utils/bam2bax/CMakeLists.txt
index 09e5990..3baa14d 100644
--- a/utils/bam2bax/CMakeLists.txt
+++ b/utils/bam2bax/CMakeLists.txt
@@ -66,8 +66,15 @@ endif()
 
 if (NOT PacBioBAM_INCLUDE_DIRS OR NOT PacBioBAM_LIBRARIES
     OR NOT HTSLIB_INCLUDE_DIRS OR NOT HTSLIB_LIBRARIES)
-    set(PacBioBAM_RootDir ${Bam2Bax_RootDir}/../../../../../lib/cpp/pbbam)
+    set(PacBioBAM_LIBRARIES )
+    set(PacBioBAM_INCLUDE_DIRS )
+    set(HTSLIB_INCLUDE_DIRS )
+    set(HTSLIB_LIBRARIES )
+    if (NOT PacBioBAM_RootDir)
+        message ("Must either set (PacBioBAM_INCLUDE_DIRS, PacBioBAM_LIBRARIES, HTSLIB_INCLUDE_DIRS, and HTSLIB_LIBRARIES) or PacBioBAM_RootDir!")
+    endif()
     add_subdirectory(${PacBioBAM_RootDir} external/build/pbbam)
+    set(PBBAM_LINK_FLAG pbbam)
 endif()
 
 if (NOT Boost_INCLUDE_DIRS)
@@ -130,4 +137,3 @@ if(Bam2Bax_build_tests)
     add_subdirectory(${GTEST_SRC_DIR} external/gtest/build)
     add_subdirectory(tests)
 endif()
-
diff --git a/utils/bam2bax/makefile b/utils/bam2bax/makefile
index 277e605..d9f1c37 100644
--- a/utils/bam2bax/makefile
+++ b/utils/bam2bax/makefile
@@ -4,6 +4,9 @@ SRCDIR:=$(dir $(realpath $(lastword $(MAKEFILE_LIST))))
 -include ${CURDIR}/../../defines.mk
 include ${SRCDIR}/../../rules.mk
 
+# If pbbam and htslib are prebuilt and included in defines.mk,
+# set PacBioBAM_INCLUDE_DIRS, HTSLIB_INCLUDE_DIRS, PacBioBAM_LIBRARIES
+# and HTSLIB_LIBRARIES as below. Otherwise, set PacBioBAM_RootDir instead.
 all: ${CURDIR}/src/*.cpp ${CURDIR}/src/*.h  ${CURDIR}/tests/src/*.cpp ${CURDIR}/tests/src/*.h
 	@mkdir -p ${CURDIR}/build && \
 	 cd ${CURDIR}/build && \
@@ -25,6 +28,25 @@ all: ${CURDIR}/src/*.cpp ${CURDIR}/src/*.h  ${CURDIR}/tests/src/*.cpp ${CURDIR}/
           ../ && \
 		make
 
+# If pbbam is not prebuilt, just set PacBioBAM_RootDir
+#all: ${CURDIR}/src/*.cpp ${CURDIR}/src/*.h  ${CURDIR}/tests/src/*.cpp ${CURDIR}/tests/src/*.h
+#	@mkdir -p ${CURDIR}/build && \
+#	 cd ${CURDIR}/build && \
+#		cmake -DBOOST_ROOT=${BOOST_ROOT} \
+#          -DPacBioBAM_RootDir=/home/UNIXHOME/yli/git/depot/software/smrtanalysis/bioinformatics/lib/cpp/pbbam \
+#          -DPBDATA_INCLUDE_DIRS=${LIBPBDATA_INC} \
+#          -DPBDATA_LIBRARIES=${LIBPBDATA_LIB}/libpbdata${SH_LIB_EXT} \
+#          -DPBIHDF_INCLUDE_DIRS=${LIBPBIHDF_INC} \
+#          -DPBIHDF_LIBRARIES=${LIBPBIHDF_LIB}/libpbihdf${SH_LIB_EXT} \
+#          -DBLASR_INCLUDE_DIRS=${LIBBLASR_INC}/ \
+#          -DBLASR_LIBRARIES=${LIBBLASR_LIB}/libblasr${SH_LIB_EXT} \
+#          -DHDF5_INCLUDE_DIRS=${HDF5_INC} \
+#          -DHDF5_CPP_LIBRARIES=${HDF5_LIB}/libhdf5_cpp${SH_LIB_EXT} \
+#          -DHDF5_LIBRARIES=${HDF5_LIB}/libhdf5${SH_LIB_EXT} \
+#          -DBam2Bax_EXE_LINKER_FLAGS="-Wl,--no-as-needed -ldl -pthread -lrt " \
+#          ../ && \
+#		make
+
 clean:
 	@rm -rf ${CURDIR}/bin/
 	@rm -rf ${CURDIR}/build
diff --git a/utils/bam2bax/src/CMakeLists.txt b/utils/bam2bax/src/CMakeLists.txt
index a91dc14..044c303 100644
--- a/utils/bam2bax/src/CMakeLists.txt
+++ b/utils/bam2bax/src/CMakeLists.txt
@@ -51,6 +51,7 @@ if (NOT APPLE)
 else()
 endif()
 target_link_libraries(bam2bax
+    ${PBBAM_LINK_FLAG}
     ${BLASR_LIBRARIES}
     ${PBIHDF_LIBRARIES}
     ${PBDATA_LIBRARIES}
@@ -67,6 +68,7 @@ set_target_properties(bam2plx PROPERTIES
     RUNTIME_OUTPUT_DIRECTORY ${Bam2Bax_BinDir}
 )
 target_link_libraries(bam2plx
+    ${PBBAM_LINK_FLAG}
     ${BLASR_LIBRARIES}
     ${PBIHDF_LIBRARIES}
     ${PBDATA_LIBRARIES}
diff --git a/utils/bam2bax/tests/CMakeLists.txt b/utils/bam2bax/tests/CMakeLists.txt
index 86bbe19..15e9527 100644
--- a/utils/bam2bax/tests/CMakeLists.txt
+++ b/utils/bam2bax/tests/CMakeLists.txt
@@ -74,6 +74,7 @@ endif()
 target_link_libraries(test_bam2bax
     gtest
     gtest_main
+    ${PBBAM_LINK_FLAG}
     ${BLASR_LIBRARIES}
     ${PBIHDF_LIBRARIES}
     ${PBDATA_LIBRARIES}
diff --git a/utils/bax2bam/CMakeLists.txt b/utils/bax2bam/CMakeLists.txt
index 14f4697..3e1cc49 100644
--- a/utils/bax2bam/CMakeLists.txt
+++ b/utils/bax2bam/CMakeLists.txt
@@ -65,8 +65,15 @@ endif()
 
 if (NOT PacBioBAM_INCLUDE_DIRS OR NOT PacBioBAM_LIBRARIES
     OR NOT HTSLIB_INCLUDE_DIRS OR NOT HTSLIB_LIBRARIES)
-    set(PacBioBAM_RootDir  ${Bax2Bam_RootDir}/../../../../../lib/cpp/pbbam)
+    set(PacBioBAM_INCLUDE_DIRS )
+    set(PacBioBAM_LIBRARIES )
+    set(HTSLIB_INCLUDE_DIRS )
+    set(HTSLIB_LIBRARIES )
+    if (NOT PacBioBAM_RootDir)
+        message ("Must either set (PacBioBAM_INCLUDE_DIRS, PacBioBAM_LIBRARIES, HTSLIB_INCLUDE_DIRS, and HTSLIB_LIBRARIES) or PacBioBAM_RootDir!")
+    endif()
     add_subdirectory(${PacBioBAM_RootDir} external/build/pbbam)
+    set(PBBAM_LINK_FLAG pbbam)
 endif()
 
 if (NOT Boost_INCLUDE_DIRS)
diff --git a/utils/bax2bam/makefile b/utils/bax2bam/makefile
index 4c395bb..501a1fe 100644
--- a/utils/bax2bam/makefile
+++ b/utils/bax2bam/makefile
@@ -4,6 +4,9 @@ SRCDIR:=$(dir $(realpath $(lastword $(MAKEFILE_LIST))))
 include ${CURDIR}/../../defines.mk
 include ${SRCDIR}/../../rules.mk
 
+# If pbbam and htslib are prebuilt and included in blasr/defines.mk,
+# set PacBioBAM_INCLUDE_DIRS, HTSLIB_INCLUDE_DIRS, PacBioBAM_LIBRARIES
+# and HTSLIB_LIBRARIES as below. Otherwise, just set PacBioBAM_RootDir instead.
 all: ${CURDIR}/src/* ${CURDIR}/tests/src/*
 	@mkdir -p ${CURDIR}/build && \
 	 cd ${CURDIR}/build && \
@@ -25,6 +28,7 @@ all: ${CURDIR}/src/* ${CURDIR}/tests/src/*
           ../ && \
 		make
 
+
 clean:
 	@rm -rf ${CURDIR}/bin/
 	@rm -rf ${CURDIR}/build
diff --git a/utils/bax2bam/src/CMakeLists.txt b/utils/bax2bam/src/CMakeLists.txt
index aa8b803..5287f60 100644
--- a/utils/bax2bam/src/CMakeLists.txt
+++ b/utils/bax2bam/src/CMakeLists.txt
@@ -45,6 +45,7 @@ if (NOT APPLE)
 else()
 endif()
 target_link_libraries(bax2bam 
+    ${PBBAM_LINK_FLAG}
     ${BLASR_LIBRARIES}
     ${PBIHDF_LIBRARIES}
     ${PBDATA_LIBRARIES} 
diff --git a/utils/bax2bam/src/IConverter.cpp b/utils/bax2bam/src/IConverter.cpp
index 8313d5c..df89477 100644
--- a/utils/bax2bam/src/IConverter.cpp
+++ b/utils/bax2bam/src/IConverter.cpp
@@ -24,13 +24,6 @@ BamHeader IConverter::CreateHeader(const string& modeString)
 {
     BamHeader header;
 
-    // @HD VN:<current SAM/BAM spec version>
-    //     SO:unsorted
-    //     pb:<current PacBio BAM spec version>
-    header.Version("1.5")
-          .SortOrder("unknown")
-          .PacBioBamVersion("3.0.2");
-
     // @RG ID: <read group ID>
     //     DS: READTYPE=<HQREGION|POLYMERASE|SUBREAD>[;<Tag Manifest>;BINDINGKIT=<foo>;SEQUENCINGKIT=<bar>;BASECALLERVERSION=<42>]
     //     PL: PACBIO
diff --git a/utils/bax2bam/src/main.cpp b/utils/bax2bam/src/main.cpp
index 8cbb7e8..7ba68f0 100644
--- a/utils/bax2bam/src/main.cpp
+++ b/utils/bax2bam/src/main.cpp
@@ -65,7 +65,7 @@ int main(int argc, char* argv[])
     readModeGroup.add_option("--ccs")
                  .dest(Settings::Option::ccsMode_)
                  .action("store_true")
-                 .help("Output CCS sequences");
+                 .help("Output CCS sequences (requires ccs.h5 input)");
     parser.add_option_group(readModeGroup);
 
     auto featureGroup = optparse::OptionGroup(parser, "Pulse feature options");
diff --git a/utils/bax2bam/tests/CMakeLists.txt b/utils/bax2bam/tests/CMakeLists.txt
index 4a3680a..7bd5ba1 100644
--- a/utils/bax2bam/tests/CMakeLists.txt
+++ b/utils/bax2bam/tests/CMakeLists.txt
@@ -69,6 +69,7 @@ endif()
 target_link_libraries(test_bax2bam
     gtest
     gtest_main
+    ${PBBAM_LINK_FLAG}
     ${BLASR_LIBRARIES}
     ${PBIHDF_LIBRARIES}
     ${PBDATA_LIBRARIES} 
diff --git a/utils/bax2bam/tests/src/TestData.h.in b/utils/bax2bam/tests/src/TestData.h.in
index 53f0a89..c432cda 100644
--- a/utils/bax2bam/tests/src/TestData.h.in
+++ b/utils/bax2bam/tests/src/TestData.h.in
@@ -11,6 +11,8 @@ const std::string Bax2Bam_Exe  = std::string("@Bax2Bam_BinDir@/bax2bam");
 const std::string Source_Dir   = std::string("@Bax2Bam_TestsDir@");
 const std::string Bin_Dir      = std::string("@CMAKE_CURRENT_BINARY_DIR@");
 const std::string Data_Dir     = std::string("/pbi/dept/secondary/siv/testdata/bax2bam");
+const std::string Header_Version = std::string("1.1");
+const std::string PacBioBam_Version = std::string("3.0.3");
 
 } // namespace tests
 
diff --git a/utils/bax2bam/tests/src/test_ccs.cpp b/utils/bax2bam/tests/src/test_ccs.cpp
index 4851be0..edade95 100644
--- a/utils/bax2bam/tests/src/test_ccs.cpp
+++ b/utils/bax2bam/tests/src/test_ccs.cpp
@@ -101,9 +101,9 @@ TEST(CcsTest, EndToEnd_Multiple)
 
         // check BAM header information
         const BamHeader& header = bamFile.Header();
-        EXPECT_EQ(string("1.5"),     header.Version());
+        EXPECT_EQ(tests::Header_Version,     header.Version());
         EXPECT_EQ(string("unknown"), header.SortOrder());
-        EXPECT_EQ(string("3.0.2"),   header.PacBioBamVersion());
+        EXPECT_EQ(tests::PacBioBam_Version,   header.PacBioBamVersion());
         EXPECT_TRUE(header.Sequences().empty());
         EXPECT_TRUE(header.Comments().empty());
         ASSERT_FALSE(header.Programs().empty());
diff --git a/utils/bax2bam/tests/src/test_hqregions.cpp b/utils/bax2bam/tests/src/test_hqregions.cpp
index fba41f1..e922ebf 100644
--- a/utils/bax2bam/tests/src/test_hqregions.cpp
+++ b/utils/bax2bam/tests/src/test_hqregions.cpp
@@ -104,9 +104,9 @@ TEST(HqRegionsTest, EndToEnd_Single)
 
         // check BAM header information
         const BamHeader& header = bamFile.Header();
-        EXPECT_EQ(string("1.5"),     header.Version());
+        EXPECT_EQ(string(tests::Header_Version),     header.Version());
         EXPECT_EQ(string("unknown"), header.SortOrder());
-        EXPECT_EQ(string("3.0.2"),   header.PacBioBamVersion());
+        EXPECT_EQ(string(tests::PacBioBam_Version),   header.PacBioBamVersion());
         EXPECT_TRUE(header.Sequences().empty());
         EXPECT_TRUE(header.Comments().empty());
         ASSERT_FALSE(header.Programs().empty());
@@ -261,9 +261,9 @@ TEST(HqRegionsTest, EndToEnd_Single)
 
         // check BAM header information
         const BamHeader& header = bamFile.Header();
-        EXPECT_EQ(string("1.5"),     header.Version());
+        EXPECT_EQ(tests::Header_Version,     header.Version());
         EXPECT_EQ(string("unknown"), header.SortOrder());
-        EXPECT_EQ(string("3.0.2"),   header.PacBioBamVersion());
+        EXPECT_EQ(tests::PacBioBam_Version,   header.PacBioBamVersion());
         EXPECT_TRUE(header.Sequences().empty());
         EXPECT_TRUE(header.Comments().empty());
         ASSERT_FALSE(header.Programs().empty());
diff --git a/utils/bax2bam/tests/src/test_polymerase.cpp b/utils/bax2bam/tests/src/test_polymerase.cpp
index 73c50a6..9c4f91f 100644
--- a/utils/bax2bam/tests/src/test_polymerase.cpp
+++ b/utils/bax2bam/tests/src/test_polymerase.cpp
@@ -87,9 +87,9 @@ TEST(PolymeraseTest, EndToEnd_Single)
 
         // check BAM header information
         const BamHeader& header = bamFile.Header();
-        EXPECT_EQ(string("1.5"),     header.Version());
+        EXPECT_EQ(tests::Header_Version,     header.Version());
         EXPECT_EQ(string("unknown"), header.SortOrder());
-        EXPECT_EQ(string("3.0.2"),   header.PacBioBamVersion());
+        EXPECT_EQ(tests::PacBioBam_Version,  header.PacBioBamVersion());
         EXPECT_TRUE(header.Sequences().empty());
         EXPECT_TRUE(header.Comments().empty());
         ASSERT_FALSE(header.Programs().empty());
diff --git a/utils/bax2bam/tests/src/test_subreads.cpp b/utils/bax2bam/tests/src/test_subreads.cpp
index 6394f4a..e40f7b1 100644
--- a/utils/bax2bam/tests/src/test_subreads.cpp
+++ b/utils/bax2bam/tests/src/test_subreads.cpp
@@ -199,9 +199,9 @@ TEST(SubreadsTest, EndToEnd_Multiple)
 
         // check BAM header information
         const BamHeader& header = bamFile.Header();
-        EXPECT_EQ(string("1.5"),     header.Version());
+        EXPECT_EQ(tests::Header_Version,     header.Version());
         EXPECT_EQ(string("unknown"), header.SortOrder());
-        EXPECT_EQ(string("3.0.2"),   header.PacBioBamVersion());
+        EXPECT_EQ(tests::PacBioBam_Version,  header.PacBioBamVersion());
         EXPECT_TRUE(header.Sequences().empty());
         EXPECT_TRUE(header.Comments().empty());
         ASSERT_FALSE(header.Programs().empty());

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/blasr.git



More information about the debian-med-commit mailing list