[med-svn] [pbseqlib] 01/04: Imported Upstream version 0~20161219

Tue Dec 20 09:01:09 UTC 2016

This is an automated email from the git hooks/post-receive script.

afif pushed a commit to branch master
in repository pbseqlib.

commit 849e1e0c48d4426de32561e5840a4c13d39b5f9a
Author: Afif Elghraoui <afif at debian.org>
Date:   Mon Dec 19 23:46:45 2016 -0800

    Imported Upstream version 0~20161219
---
 .../algorithms/sorting/LightweightSuffixArray.cpp  | 10 ++--
 alignment/files/ReaderAgglomerate.cpp              |  8 ++--
 alignment/files/ReaderAgglomerate.hpp              |  7 ++-
 alignment/format/BAMPrinter.hpp                    |  2 +-
 alignment/format/BAMPrinterImpl.hpp                |  2 +-
 alignment/format/SAMHeaderPrinter.cpp              | 56 +++++++++-------------
 hdf/HDFZMWWriter.cpp                               |  4 +-
 pbdata/SMRTSequence.cpp                            |  4 +-
 8 files changed, 41 insertions(+), 52 deletions(-)

diff --git a/alignment/algorithms/sorting/LightweightSuffixArray.cpp b/alignment/algorithms/sorting/LightweightSuffixArray.cpp
index cb881ed..bc57ecf 100644
--- a/alignment/algorithms/sorting/LightweightSuffixArray.cpp
+++ b/alignment/algorithms/sorting/LightweightSuffixArray.cpp
@@ -286,7 +286,7 @@ bool LightweightSuffixSort(unsigned char text[], UInt textLength, UInt *index, i
         }
     }
     UInt dSetSize = dIndex;
-    std::cerr << "Sorting " << diffCoverSize << "-prefixes of the genome." << std::endl;
+    std::cout << "Sorting " << diffCoverSize << "-prefixes of the genome." << std::endl;
     MediankeyBoundedQuicksort(text, index, dIndex, 0, dSetSize, 0, diffCoverSize);
     UInt i;
 
@@ -300,7 +300,7 @@ bool LightweightSuffixSort(unsigned char text[], UInt textLength, UInt *index, i
     DiffCoverMu mu;
     mu.Initialize(diffCover, diffCoverLength, diffCoverSize, textLength);
     UInt largestLexName;
-    std::cerr << "Enumerating " << diffCoverSize << "-prefixes." << std::endl;
+    std::cout << "Enumerating " << diffCoverSize << "-prefixes." << std::endl;
     largestLexName = DiffCoverBuildLexNaming(text, textLength,
             index, dSetSize, diffCoverLength, diffCoverSize, 
             mu.diffCoverReverseLookup, lexVNaming);
@@ -348,7 +348,7 @@ bool LightweightSuffixSort(unsigned char text[], UInt textLength, UInt *index, i
     // [0,n-v], the relative order of the suffixes starting at
     // i+\delta(,j) and j+\delta(i,j) is already known.
     //
-    std::cerr << "Sorting suffices." << std::endl;
+    std::cout << "Sorting suffices." << std::endl;
     // Step 2.1 v-order suffices using multikey quicksort
     for (i = 0; i < textLength; i++ ){
         index[i] = i;
@@ -369,14 +369,14 @@ bool LightweightSuffixSort(unsigned char text[], UInt textLength, UInt *index, i
     lOrderComparator.diffCoverReverseLookup = mu.diffCoverReverseLookup;
     UInt setBegin, setEnd;
     setBegin = setEnd = 0;
-    std::cerr << "Sorting buckets." << std::endl;
+    std::cout << "Sorting buckets." << std::endl;
     int percentDone = 0;
     int curPercentage = 0;
     while(setBegin < textLength) {
         setEnd = setBegin;
         percentDone = (int)(((1.0*setBegin) / textLength) * 100);
         if ( percentDone > curPercentage) {
-            std::cerr << " " << percentDone << "% of buckets sorted."  << std::endl;
+            std::cout << " " << percentDone << "% of buckets sorted."  << std::endl;
             curPercentage = percentDone;
         }
         while(setEnd < textLength and
diff --git a/alignment/files/ReaderAgglomerate.cpp b/alignment/files/ReaderAgglomerate.cpp
index 26d29b3..d9ef711 100644
--- a/alignment/files/ReaderAgglomerate.cpp
+++ b/alignment/files/ReaderAgglomerate.cpp
@@ -253,7 +253,7 @@ int ReaderAgglomerate::Initialize(bool unrolled_mode) {
             if (unrolled) {
                 if (fileType == FileType::PBBAM) {
                     // Handle PBBAM here , use scrapFileName 
-                    VPReader = new PacBio::BAM::VirtualPolymeraseReader(fileName, scrapsFileName);
+                    VPReader = new PacBio::BAM::ZmwReadStitcher(fileName, scrapsFileName);
                     assert(VPReader != nullptr);
                     // 
                 } else if (fileType == FileType::PBDATASET) {
@@ -261,7 +261,7 @@ int ReaderAgglomerate::Initialize(bool unrolled_mode) {
                     // No need in setting filters for PolymeraseReads
                     // prefiltering, in a form it is currently implemented migght crate Polymerase reads
                     // with skipped subreads, which defies the whole purpose of unrolled mode 
-		    VPCReader = new PacBio::BAM::VirtualPolymeraseCompositeReader(*dataSetPtr);
+		    VPCReader = new PacBio::BAM::ZmwReadStitcher(*dataSetPtr);
                     assert(VPCReader != nullptr);
                 }
             }
@@ -506,7 +506,7 @@ int ReaderAgglomerate::GetNext(SMRTSequence &seq) {
                 if ( VPCReader->HasNext() ) {
                     // TODO check for length mismatch (as temporary fix)
 
-                    PacBio::BAM::VirtualPolymeraseBamRecord record = VPCReader->Next();
+                    PacBio::BAM::VirtualZmwBamRecord record = VPCReader->Next();
 
                     numRecords = 1;   // a single record only 
                     seq.Copy(record); // need to copy into seq
@@ -525,7 +525,7 @@ int ReaderAgglomerate::GetNext(SMRTSequence &seq) {
                 if ( VPReader->HasNext() ) {
                     // TODO check for length mismatch (as temporary fix)
 
-                    PacBio::BAM::VirtualPolymeraseBamRecord record = VPReader->Next();
+                    PacBio::BAM::VirtualZmwBamRecord record = VPReader->Next();
 
                     numRecords = 1;   // a single record only 
                     seq.Copy(record); // need to copy into seq
diff --git a/alignment/files/ReaderAgglomerate.hpp b/alignment/files/ReaderAgglomerate.hpp
index 36fd83c..9db65d3 100644
--- a/alignment/files/ReaderAgglomerate.hpp
+++ b/alignment/files/ReaderAgglomerate.hpp
@@ -24,8 +24,7 @@
 #include "../query/PbiFilterZmwGroupQuery.h"
 #include <pbbam/BamRecord.h>
 // the following added to support Polymerase read for unrolled mode
-#include <pbbam/virtual/VirtualPolymeraseCompositeReader.h>
-#include <pbbam/virtual/VirtualPolymeraseReader.h>
+#include <pbbam/virtual/ZmwReadStitcher.h>                     // new interface
 #endif
 
 class ReaderAgglomerate : public BaseSequenceIO {
@@ -139,8 +138,8 @@ public:
   PacBio::BAM::PbiFilterZmwGroupQuery * pbiFilterZmwQueryPtr;
   PacBio::BAM::PbiFilterZmwGroupQuery::iterator pbiFilterZmwIterator;
   // the following to added to support Polymerase reads in unrolled mode
-  PacBio::BAM::VirtualPolymeraseReader * VPReader;
-  PacBio::BAM::VirtualPolymeraseCompositeReader * VPCReader;
+  PacBio::BAM::ZmwReadStitcher            * VPReader;           // new interface
+  PacBio::BAM::ZmwReadStitcher            * VPCReader;          // new interface
 #endif
 };
 
diff --git a/alignment/format/BAMPrinter.hpp b/alignment/format/BAMPrinter.hpp
index a0d9be7..1962a8a 100644
--- a/alignment/format/BAMPrinter.hpp
+++ b/alignment/format/BAMPrinter.hpp
@@ -21,7 +21,7 @@ namespace BAMOutput {
 template<typename T_Sequence>
 void PrintAlignment(T_AlignmentCandidate &alignment, T_Sequence &read,
         T_Sequence & subread,
-        PacBio::BAM::BamWriter &bamWriter, AlignmentContext &context, 
+        PacBio::BAM::IRecordWriter &bamWriter, AlignmentContext &context, 
         SupplementalQVList & qvList, Clipping clipping, 
         bool cigarUseSeqMatch=false, const bool allowAdjacentIndels=true);
 }
diff --git a/alignment/format/BAMPrinterImpl.hpp b/alignment/format/BAMPrinterImpl.hpp
index 26103e2..95c332a 100644
--- a/alignment/format/BAMPrinterImpl.hpp
+++ b/alignment/format/BAMPrinterImpl.hpp
@@ -156,7 +156,7 @@ void AlignmentToBamRecord(T_AlignmentCandidate & alignment,
 
 template<typename T_Sequence>
 void BAMOutput::PrintAlignment(T_AlignmentCandidate &alignment, T_Sequence &read, T_Sequence & subread,
-        PacBio::BAM::BamWriter &bamWriter, AlignmentContext &context, 
+        PacBio::BAM::IRecordWriter &bamWriter, AlignmentContext &context, 
         SupplementalQVList & qvList, Clipping clipping,
         bool cigarUseSeqMatch, const bool allowAdjacentIndels) {
 
diff --git a/alignment/format/SAMHeaderPrinter.cpp b/alignment/format/SAMHeaderPrinter.cpp
index f5674bd..866a24c 100644
--- a/alignment/format/SAMHeaderPrinter.cpp
+++ b/alignment/format/SAMHeaderPrinter.cpp
@@ -345,42 +345,32 @@ SAMHeaderRGs SAMHeaderPrinter::MakeRGs(const std::vector<std::string> & readsFil
         delete reader;
     } else {
 #ifdef USE_PBBAM
-        if (fileType == FileType::PBBAM) {
-        // TODO: use Derek's API to merge bamHeaders from different files when 
-        // it is in place. Use the following code for now. 
-        std::vector<std::string>::const_iterator rfit;
-        for(rfit = readsFiles.begin(); rfit != readsFiles.end(); rfit++) {
-            try {
-                PacBio::BAM::BamFile bamFile(*rfit);
-                PacBio::BAM::BamHeader header = bamFile.Header();
-                // Get read groups from bam header.
-                std::vector<PacBio::BAM::ReadGroupInfo> vrgs = header.ReadGroups();
-                std::vector<PacBio::BAM::ReadGroupInfo>::iterator rgit;
-                for (rgit = vrgs.begin(); rgit != vrgs.end(); rgit++) {
-                    rgs.Add(SAMHeaderRG((*rgit).ToSam()));
-                }
-            } catch (std::exception e) {
-                cout << "ERROR, unable to open bam file " << (*rfit) << endl;
-                exit(1);
-            }
-        }
-        } else if (fileType == FileType::PBDATASET) {
+        if (fileType == FileType::PBDATASET or fileType == FileType::PBBAM) {
+            // use + operator to merge headers
+            bool first = true;
+            PacBio::BAM::BamHeader mergedHeader;
+            //
+            // First stage : merge headers - loop thru all file headers and create a merged header
+            //
             for (auto xmlfn: readsFiles) {
                 for (auto bamFile: PacBio::BAM::DataSet(xmlfn).BamFiles()) {
-                    for (auto rg: bamFile.Header().ReadGroups())
-                    {
-                        if (readType == ReadType::POLYMERASE) {
-                            // fix for 27505
-                            rg.ReadType("POLYMERASE");
-                            rg.Id(rg.MovieName(),rg.ReadType());
-                            rgs.Add(SAMHeaderRG(rg.ToSam()));
-                        }
-                        else {
-                            // For later, Investigate why no ReadType is used for REG and CCS
-                            rgs.Add(SAMHeaderRG(rg.ToSam()));
-                        }
-                    }
+                    if (first) {
+                        mergedHeader = bamFile.Header();   // assign first header to mergedHeader
+                        first = false;
+                    } else
+                        mergedHeader += bamFile.Header();  // add subsequent headers and to mergedHeader
+                }
+            }
+
+            //
+            // Second stage : Loop thru all read groups, must modify ReadType if POLYMERASE
+            //
+            for (PacBio::BAM::ReadGroupInfo rg : mergedHeader.ReadGroups()) {
+                if (readType == ReadType::POLYMERASE) {
+                    rg.ReadType("POLYMERASE");
+                    rg.Id(rg.MovieName(), rg.ReadType());
                 }
+                rgs.Add(SAMHeaderRG(rg.ToSam()));
             }
         }
 #else
diff --git a/hdf/HDFZMWWriter.cpp b/hdf/HDFZMWWriter.cpp
index e94413e..8f36f93 100644
--- a/hdf/HDFZMWWriter.cpp
+++ b/hdf/HDFZMWWriter.cpp
@@ -68,8 +68,8 @@ bool HDFZMWWriter::WriteOneZmw(const PacBio::BAM::BamRecord & read) {
 
     uint32_t hn_ = read.HoleNumber();
     _WriteHoleNumber(hn_);
-    _WriteHoleXY(static_cast<int16_t>(hn_ & 0x0000FFFF), 
-                 static_cast<int16_t>(hn_ >> 16));
+    _WriteHoleXY(static_cast<int16_t>(hn_ >> 16), 
+                 static_cast<int16_t>(hn_ & 0x0000FFFF));
     _WriteHoleStatus(PacBio::AttributeValues::ZMW::HoleStatus::sequencingzmw);
     _WriteBaseLineSigma(read);
     return Errors().empty();
diff --git a/pbdata/SMRTSequence.cpp b/pbdata/SMRTSequence.cpp
index 18756ee..ee1d3d9 100644
--- a/pbdata/SMRTSequence.cpp
+++ b/pbdata/SMRTSequence.cpp
@@ -467,8 +467,8 @@ void SMRTSequence::Copy(const PacBio::BAM::BamRecord & record,
     this->HoleNumber(hn).
     // Assumption: holeStatus of a bam record must be 'SEQUENCING'
           HoleStatus(static_cast<unsigned char> (PacBio::AttributeValues::ZMW::HoleStatus::sequencingzmw)).
-    // x = lower 16 bit, y = upper 16 bit
-          HoleXY(hn & 0x0000FFFF, hn >> 16);
+    // x = upper 16 bit, y = lower 16 bit
+          HoleXY(hn >> 16, hn & 0x0000FFFF);
 
     // Set hq region read score
     if (record.HasReadAccuracy()) {

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/pbseqlib.git