[med-svn] [pbseqlib] 01/04: Imported Upstream version 0~20161219
Afif Elghraoui
afif at moszumanska.debian.org
Tue Dec 20 09:01:09 UTC 2016
This is an automated email from the git hooks/post-receive script.
afif pushed a commit to branch master
in repository pbseqlib.
commit 849e1e0c48d4426de32561e5840a4c13d39b5f9a
Author: Afif Elghraoui <afif at debian.org>
Date: Mon Dec 19 23:46:45 2016 -0800
Imported Upstream version 0~20161219
---
.../algorithms/sorting/LightweightSuffixArray.cpp | 10 ++--
alignment/files/ReaderAgglomerate.cpp | 8 ++--
alignment/files/ReaderAgglomerate.hpp | 7 ++-
alignment/format/BAMPrinter.hpp | 2 +-
alignment/format/BAMPrinterImpl.hpp | 2 +-
alignment/format/SAMHeaderPrinter.cpp | 56 +++++++++-------------
hdf/HDFZMWWriter.cpp | 4 +-
pbdata/SMRTSequence.cpp | 4 +-
8 files changed, 41 insertions(+), 52 deletions(-)
diff --git a/alignment/algorithms/sorting/LightweightSuffixArray.cpp b/alignment/algorithms/sorting/LightweightSuffixArray.cpp
index cb881ed..bc57ecf 100644
--- a/alignment/algorithms/sorting/LightweightSuffixArray.cpp
+++ b/alignment/algorithms/sorting/LightweightSuffixArray.cpp
@@ -286,7 +286,7 @@ bool LightweightSuffixSort(unsigned char text[], UInt textLength, UInt *index, i
}
}
UInt dSetSize = dIndex;
- std::cerr << "Sorting " << diffCoverSize << "-prefixes of the genome." << std::endl;
+ std::cout << "Sorting " << diffCoverSize << "-prefixes of the genome." << std::endl;
MediankeyBoundedQuicksort(text, index, dIndex, 0, dSetSize, 0, diffCoverSize);
UInt i;
@@ -300,7 +300,7 @@ bool LightweightSuffixSort(unsigned char text[], UInt textLength, UInt *index, i
DiffCoverMu mu;
mu.Initialize(diffCover, diffCoverLength, diffCoverSize, textLength);
UInt largestLexName;
- std::cerr << "Enumerating " << diffCoverSize << "-prefixes." << std::endl;
+ std::cout << "Enumerating " << diffCoverSize << "-prefixes." << std::endl;
largestLexName = DiffCoverBuildLexNaming(text, textLength,
index, dSetSize, diffCoverLength, diffCoverSize,
mu.diffCoverReverseLookup, lexVNaming);
@@ -348,7 +348,7 @@ bool LightweightSuffixSort(unsigned char text[], UInt textLength, UInt *index, i
// [0,n-v], the relative order of the suffixes starting at
// i+\delta(,j) and j+\delta(i,j) is already known.
//
- std::cerr << "Sorting suffices." << std::endl;
+ std::cout << "Sorting suffices." << std::endl;
// Step 2.1 v-order suffices using multikey quicksort
for (i = 0; i < textLength; i++ ){
index[i] = i;
@@ -369,14 +369,14 @@ bool LightweightSuffixSort(unsigned char text[], UInt textLength, UInt *index, i
lOrderComparator.diffCoverReverseLookup = mu.diffCoverReverseLookup;
UInt setBegin, setEnd;
setBegin = setEnd = 0;
- std::cerr << "Sorting buckets." << std::endl;
+ std::cout << "Sorting buckets." << std::endl;
int percentDone = 0;
int curPercentage = 0;
while(setBegin < textLength) {
setEnd = setBegin;
percentDone = (int)(((1.0*setBegin) / textLength) * 100);
if ( percentDone > curPercentage) {
- std::cerr << " " << percentDone << "% of buckets sorted." << std::endl;
+ std::cout << " " << percentDone << "% of buckets sorted." << std::endl;
curPercentage = percentDone;
}
while(setEnd < textLength and
diff --git a/alignment/files/ReaderAgglomerate.cpp b/alignment/files/ReaderAgglomerate.cpp
index 26d29b3..d9ef711 100644
--- a/alignment/files/ReaderAgglomerate.cpp
+++ b/alignment/files/ReaderAgglomerate.cpp
@@ -253,7 +253,7 @@ int ReaderAgglomerate::Initialize(bool unrolled_mode) {
if (unrolled) {
if (fileType == FileType::PBBAM) {
// Handle PBBAM here , use scrapFileName
- VPReader = new PacBio::BAM::VirtualPolymeraseReader(fileName, scrapsFileName);
+ VPReader = new PacBio::BAM::ZmwReadStitcher(fileName, scrapsFileName);
assert(VPReader != nullptr);
//
} else if (fileType == FileType::PBDATASET) {
@@ -261,7 +261,7 @@ int ReaderAgglomerate::Initialize(bool unrolled_mode) {
// No need in setting filters for PolymeraseReads
// prefiltering, in a form it is currently implemented migght crate Polymerase reads
// with skipped subreads, which defies the whole purpose of unrolled mode
- VPCReader = new PacBio::BAM::VirtualPolymeraseCompositeReader(*dataSetPtr);
+ VPCReader = new PacBio::BAM::ZmwReadStitcher(*dataSetPtr);
assert(VPCReader != nullptr);
}
}
@@ -506,7 +506,7 @@ int ReaderAgglomerate::GetNext(SMRTSequence &seq) {
if ( VPCReader->HasNext() ) {
// TODO check for length mismatch (as temporary fix)
- PacBio::BAM::VirtualPolymeraseBamRecord record = VPCReader->Next();
+ PacBio::BAM::VirtualZmwBamRecord record = VPCReader->Next();
numRecords = 1; // a single record only
seq.Copy(record); // need to copy into seq
@@ -525,7 +525,7 @@ int ReaderAgglomerate::GetNext(SMRTSequence &seq) {
if ( VPReader->HasNext() ) {
// TODO check for length mismatch (as temporary fix)
- PacBio::BAM::VirtualPolymeraseBamRecord record = VPReader->Next();
+ PacBio::BAM::VirtualZmwBamRecord record = VPReader->Next();
numRecords = 1; // a single record only
seq.Copy(record); // need to copy into seq
diff --git a/alignment/files/ReaderAgglomerate.hpp b/alignment/files/ReaderAgglomerate.hpp
index 36fd83c..9db65d3 100644
--- a/alignment/files/ReaderAgglomerate.hpp
+++ b/alignment/files/ReaderAgglomerate.hpp
@@ -24,8 +24,7 @@
#include "../query/PbiFilterZmwGroupQuery.h"
#include <pbbam/BamRecord.h>
// the following added to support Polymerase read for unrolled mode
-#include <pbbam/virtual/VirtualPolymeraseCompositeReader.h>
-#include <pbbam/virtual/VirtualPolymeraseReader.h>
+#include <pbbam/virtual/ZmwReadStitcher.h> // new interface
#endif
class ReaderAgglomerate : public BaseSequenceIO {
@@ -139,8 +138,8 @@ public:
PacBio::BAM::PbiFilterZmwGroupQuery * pbiFilterZmwQueryPtr;
PacBio::BAM::PbiFilterZmwGroupQuery::iterator pbiFilterZmwIterator;
// the following to added to support Polymerase reads in unrolled mode
- PacBio::BAM::VirtualPolymeraseReader * VPReader;
- PacBio::BAM::VirtualPolymeraseCompositeReader * VPCReader;
+ PacBio::BAM::ZmwReadStitcher * VPReader; // new interface
+ PacBio::BAM::ZmwReadStitcher * VPCReader; // new interface
#endif
};
diff --git a/alignment/format/BAMPrinter.hpp b/alignment/format/BAMPrinter.hpp
index a0d9be7..1962a8a 100644
--- a/alignment/format/BAMPrinter.hpp
+++ b/alignment/format/BAMPrinter.hpp
@@ -21,7 +21,7 @@ namespace BAMOutput {
template<typename T_Sequence>
void PrintAlignment(T_AlignmentCandidate &alignment, T_Sequence &read,
T_Sequence & subread,
- PacBio::BAM::BamWriter &bamWriter, AlignmentContext &context,
+ PacBio::BAM::IRecordWriter &bamWriter, AlignmentContext &context,
SupplementalQVList & qvList, Clipping clipping,
bool cigarUseSeqMatch=false, const bool allowAdjacentIndels=true);
}
diff --git a/alignment/format/BAMPrinterImpl.hpp b/alignment/format/BAMPrinterImpl.hpp
index 26103e2..95c332a 100644
--- a/alignment/format/BAMPrinterImpl.hpp
+++ b/alignment/format/BAMPrinterImpl.hpp
@@ -156,7 +156,7 @@ void AlignmentToBamRecord(T_AlignmentCandidate & alignment,
template<typename T_Sequence>
void BAMOutput::PrintAlignment(T_AlignmentCandidate &alignment, T_Sequence &read, T_Sequence & subread,
- PacBio::BAM::BamWriter &bamWriter, AlignmentContext &context,
+ PacBio::BAM::IRecordWriter &bamWriter, AlignmentContext &context,
SupplementalQVList & qvList, Clipping clipping,
bool cigarUseSeqMatch, const bool allowAdjacentIndels) {
diff --git a/alignment/format/SAMHeaderPrinter.cpp b/alignment/format/SAMHeaderPrinter.cpp
index f5674bd..866a24c 100644
--- a/alignment/format/SAMHeaderPrinter.cpp
+++ b/alignment/format/SAMHeaderPrinter.cpp
@@ -345,42 +345,32 @@ SAMHeaderRGs SAMHeaderPrinter::MakeRGs(const std::vector<std::string> & readsFil
delete reader;
} else {
#ifdef USE_PBBAM
- if (fileType == FileType::PBBAM) {
- // TODO: use Derek's API to merge bamHeaders from different files when
- // it is in place. Use the following code for now.
- std::vector<std::string>::const_iterator rfit;
- for(rfit = readsFiles.begin(); rfit != readsFiles.end(); rfit++) {
- try {
- PacBio::BAM::BamFile bamFile(*rfit);
- PacBio::BAM::BamHeader header = bamFile.Header();
- // Get read groups from bam header.
- std::vector<PacBio::BAM::ReadGroupInfo> vrgs = header.ReadGroups();
- std::vector<PacBio::BAM::ReadGroupInfo>::iterator rgit;
- for (rgit = vrgs.begin(); rgit != vrgs.end(); rgit++) {
- rgs.Add(SAMHeaderRG((*rgit).ToSam()));
- }
- } catch (std::exception e) {
- cout << "ERROR, unable to open bam file " << (*rfit) << endl;
- exit(1);
- }
- }
- } else if (fileType == FileType::PBDATASET) {
+ if (fileType == FileType::PBDATASET or fileType == FileType::PBBAM) {
+ // use + operator to merge headers
+ bool first = true;
+ PacBio::BAM::BamHeader mergedHeader;
+ //
+ // First stage : merge headers - loop thru all file headers and create a merged header
+ //
for (auto xmlfn: readsFiles) {
for (auto bamFile: PacBio::BAM::DataSet(xmlfn).BamFiles()) {
- for (auto rg: bamFile.Header().ReadGroups())
- {
- if (readType == ReadType::POLYMERASE) {
- // fix for 27505
- rg.ReadType("POLYMERASE");
- rg.Id(rg.MovieName(),rg.ReadType());
- rgs.Add(SAMHeaderRG(rg.ToSam()));
- }
- else {
- // For later, Investigate why no ReadType is used for REG and CCS
- rgs.Add(SAMHeaderRG(rg.ToSam()));
- }
- }
+ if (first) {
+ mergedHeader = bamFile.Header(); // assign first header to mergedHeader
+ first = false;
+ } else
+ mergedHeader += bamFile.Header(); // add subsequent headers and to mergedHeader
+ }
+ }
+
+ //
+ // Second stage : Loop thru all read groups, must modify ReadType if POLYMERASE
+ //
+ for (PacBio::BAM::ReadGroupInfo rg : mergedHeader.ReadGroups()) {
+ if (readType == ReadType::POLYMERASE) {
+ rg.ReadType("POLYMERASE");
+ rg.Id(rg.MovieName(), rg.ReadType());
}
+ rgs.Add(SAMHeaderRG(rg.ToSam()));
}
}
#else
diff --git a/hdf/HDFZMWWriter.cpp b/hdf/HDFZMWWriter.cpp
index e94413e..8f36f93 100644
--- a/hdf/HDFZMWWriter.cpp
+++ b/hdf/HDFZMWWriter.cpp
@@ -68,8 +68,8 @@ bool HDFZMWWriter::WriteOneZmw(const PacBio::BAM::BamRecord & read) {
uint32_t hn_ = read.HoleNumber();
_WriteHoleNumber(hn_);
- _WriteHoleXY(static_cast<int16_t>(hn_ & 0x0000FFFF),
- static_cast<int16_t>(hn_ >> 16));
+ _WriteHoleXY(static_cast<int16_t>(hn_ >> 16),
+ static_cast<int16_t>(hn_ & 0x0000FFFF));
_WriteHoleStatus(PacBio::AttributeValues::ZMW::HoleStatus::sequencingzmw);
_WriteBaseLineSigma(read);
return Errors().empty();
diff --git a/pbdata/SMRTSequence.cpp b/pbdata/SMRTSequence.cpp
index 18756ee..ee1d3d9 100644
--- a/pbdata/SMRTSequence.cpp
+++ b/pbdata/SMRTSequence.cpp
@@ -467,8 +467,8 @@ void SMRTSequence::Copy(const PacBio::BAM::BamRecord & record,
this->HoleNumber(hn).
// Assumption: holeStatus of a bam record must be 'SEQUENCING'
HoleStatus(static_cast<unsigned char> (PacBio::AttributeValues::ZMW::HoleStatus::sequencingzmw)).
- // x = lower 16 bit, y = upper 16 bit
- HoleXY(hn & 0x0000FFFF, hn >> 16);
+ // x = upper 16 bit, y = lower 16 bit
+ HoleXY(hn >> 16, hn & 0x0000FFFF);
// Set hq region read score
if (record.HasReadAccuracy()) {
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/pbseqlib.git
More information about the debian-med-commit
mailing list